Source code for langchain_community.document_loaders.baiducloud_bos_file

import logging
import os
import tempfile
from typing import Any, Iterator

from langchain_core.documents import Document

from langchain_community.document_loaders.base import BaseLoader
from langchain_community.document_loaders.unstructured import UnstructuredFileLoader

logger = logging.getLogger(__name__)

[docs]class BaiduBOSFileLoader(BaseLoader): """Load from `Baidu Cloud BOS` file."""
[docs] def __init__(self, conf: Any, bucket: str, key: str): """Initialize with BOS config, bucket and key name. :param conf(BceClientConfiguration): BOS config. :param bucket(str): BOS bucket. :param key(str): BOS file key. """ self.conf = conf self.bucket = bucket self.key = key
[docs] def lazy_load(self) -> Iterator[Document]: """Load documents.""" try: from import BosClient except ImportError: raise ImportError( "Please using `pip install bce-python-sdk`" + " before import bos related package." ) # Initialize BOS Client client = BosClient(self.conf) with tempfile.TemporaryDirectory() as temp_dir: file_path = f"{temp_dir}/{self.bucket}/{self.key}" os.makedirs(os.path.dirname(file_path), exist_ok=True) # Download the file to a destination logger.debug(f"get object key {self.key} to file {file_path}") client.get_object_to_file(self.bucket, self.key, file_path) try: loader = UnstructuredFileLoader(file_path) documents = loader.load() return iter(documents) except Exception as ex: logger.error(f"load document error = {ex}") return iter([Document(page_content="")])