from pathlib import Path
from typing import List, Union

from langchain_core.documents import Document

from langchain_community.document_loaders.base import BaseLoader

[docs]class SRTLoader(BaseLoader): """Load `.srt` (subtitle) files."""
[docs] def __init__(self, file_path: Union[str, Path]): """Initialize with a file path.""" try: import pysrt # noqa:F401 except ImportError: raise ImportError( "package `pysrt` not found, please install it with `pip install pysrt`" ) self.file_path = str(file_path)
[docs] def load(self) -> List[Document]: """Load using pysrt file.""" import pysrt parsed_info = text = " ".join([t.text for t in parsed_info]) metadata = {"source": self.file_path} return [Document(page_content=text, metadata=metadata)]