Source code for langchain_core.documents.transformers

from __future__ import annotations

from abc import ABC, abstractmethod
from typing import TYPE_CHECKING, Any, Sequence

from langchain_core.runnables.config import run_in_executor

if TYPE_CHECKING:
    from langchain_core.documents import Document


[docs]class BaseDocumentTransformer(ABC): """抽象基类,用于文档转换系统。 文档转换系统接受一系列文档并返回一系列转换后的文档。 示例: .. code-block:: python class EmbeddingsRedundantFilter(BaseDocumentTransformer, BaseModel): embeddings: Embeddings similarity_fn: Callable = cosine_similarity similarity_threshold: float = 0.95 class Config: arbitrary_types_allowed = True def transform_documents( self, documents: Sequence[Document], **kwargs: Any ) -> Sequence[Document]: stateful_documents = get_stateful_documents(documents) embedded_documents = _get_embeddings_from_stateful_docs( self.embeddings, stateful_documents ) included_idxs = _filter_similar_embeddings( embedded_documents, self.similarity_fn, self.similarity_threshold ) return [stateful_documents[i] for i in sorted(included_idxs)] async def atransform_documents( self, documents: Sequence[Document], **kwargs: Any ) -> Sequence[Document]: raise NotImplementedError """ # noqa: E501
[docs] @abstractmethod def transform_documents( self, documents: Sequence[Document], **kwargs: Any ) -> Sequence[Document]: """转换文档列表。 参数: documents:要转换的文档序列。 返回: 转换后的文档列表。 """
[docs] async def atransform_documents( self, documents: Sequence[Document], **kwargs: Any ) -> Sequence[Document]: """异步转换文档列表。 参数: documents:要转换的文档序列。 返回: 转换后的文档列表。 """ return await run_in_executor( None, self.transform_documents, documents, **kwargs )