Chroma

ChromaReader #

Bases: BaseReader

Chroma阅读器。

从现有的持久化Chroma集合中检索文档。

Parameters:

Name	Type	Description	Default
`collection_name`	`str`	持久化集合的名称。	required
`persist_directory`	`Optional[str]`	集合持久化的目录。	`None`

Source code in llama_index/readers/chroma/base.py

class ChromaReader(BaseReader):
    """Chroma阅读器。

    从现有的持久化Chroma集合中检索文档。

    Args:
        collection_name: 持久化集合的名称。
        persist_directory: 集合持久化的目录。"""

    def __init__(
        self,
        collection_name: str,
        persist_directory: Optional[str] = None,
        chroma_api_impl: str = "rest",
        chroma_db_impl: Optional[str] = None,
        host: str = "localhost",
        port: int = 8000,
    ) -> None:
        """使用参数进行初始化。"""
        import_err_msg = (
            "`chromadb` package not found, please run `pip install chromadb`"
        )
        try:
            import chromadb
        except ImportError:
            raise ImportError(import_err_msg)

        if collection_name is None:
            raise ValueError("Please provide a collection name.")
        # from chromadb.config import Settings

        if persist_directory is not None:
            self._client = chromadb.PersistentClient(
                path=persist_directory if persist_directory else "./chroma",
            )
        elif (host is not None) or (port is not None):
            self._client = chromadb.HttpClient(
                host=host,
                port=port,
            )

        self._collection = self._client.get_collection(collection_name)

    def create_documents(self, results: Any) -> List[Document]:
        """根据结果创建文档。

Args:
    results: 查询结果。

Returns:
    文档列表。
"""
        documents = []
        for result in zip(
            results["ids"][0],
            results["documents"][0],
            results["embeddings"][0],
            results["metadatas"][0],
        ):
            document = Document(
                id_=result[0],
                text=result[1],
                embedding=result[2],
                metadata=result[3],
            )
            documents.append(document)

        return documents

    def load_data(
        self,
        query_embedding: Optional[List[float]] = None,
        limit: int = 10,
        where: Optional[dict] = None,
        where_document: Optional[dict] = None,
        query: Optional[Union[str, List[str]]] = None,
    ) -> Any:
        """从集合中加载数据。

Args:
    limit：要返回的结果数量。
    where：按元数据过滤结果。{"metadata_field": "is_equal_to_this"}
    where_document：按文档过滤结果。{"$contains":"search_string"}

Returns:
    文档列表。
"""
        where = where or {}
        where_document = where_document or {}
        if query_embedding is not None:
            results = self._collection.search(
                query_embedding=query_embedding,
                n_results=limit,
                where=where,
                where_document=where_document,
                include=["metadatas", "documents", "distances", "embeddings"],
            )
            return self.create_documents(results)
        elif query is not None:
            query = query if isinstance(query, list) else [query]
            results = self._collection.query(
                query_texts=query,
                n_results=limit,
                where=where,
                where_document=where_document,
                include=["metadatas", "documents", "distances", "embeddings"],
            )
            return self.create_documents(results)
        else:
            raise ValueError("Please provide either query embedding or query.")

create_documents #

create_documents(results: Any) -> List[Document]

根据结果创建文档。

Parameters:

Name	Type	Description	Default
`results`	`Any`	查询结果。	required

Returns:

Type	Description
`List[Document]`	文档列表。

Source code in llama_index/readers/chroma/base.py

    def create_documents(self, results: Any) -> List[Document]:
        """根据结果创建文档。

Args:
    results: 查询结果。

Returns:
    文档列表。
"""
        documents = []
        for result in zip(
            results["ids"][0],
            results["documents"][0],
            results["embeddings"][0],
            results["metadatas"][0],
        ):
            document = Document(
                id_=result[0],
                text=result[1],
                embedding=result[2],
                metadata=result[3],
            )
            documents.append(document)

        return documents

load_data #

load_data(
    query_embedding: Optional[List[float]] = None,
    limit: int = 10,
    where: Optional[dict] = None,
    where_document: Optional[dict] = None,
    query: Optional[Union[str, List[str]]] = None,
) -> Any

从集合中加载数据。

Parameters:

Name	Type	Description	Default
`where：按元数据过滤结果。{"metadata_field"`		"is_equal_to_this"}	required
`where_document：按文档过滤结果。{"$contains"`		"search_string"}	required

Returns:

Type	Description
`Any`	文档列表。

Source code in llama_index/readers/chroma/base.py

    def load_data(
        self,
        query_embedding: Optional[List[float]] = None,
        limit: int = 10,
        where: Optional[dict] = None,
        where_document: Optional[dict] = None,
        query: Optional[Union[str, List[str]]] = None,
    ) -> Any:
        """从集合中加载数据。

Args:
    limit：要返回的结果数量。
    where：按元数据过滤结果。{"metadata_field": "is_equal_to_this"}
    where_document：按文档过滤结果。{"$contains":"search_string"}

Returns:
    文档列表。
"""
        where = where or {}
        where_document = where_document or {}
        if query_embedding is not None:
            results = self._collection.search(
                query_embedding=query_embedding,
                n_results=limit,
                where=where,
                where_document=where_document,
                include=["metadatas", "documents", "distances", "embeddings"],
            )
            return self.create_documents(results)
        elif query is not None:
            query = query if isinstance(query, list) else [query]
            results = self._collection.query(
                query_texts=query,
                n_results=limit,
                where=where,
                where_document=where_document,
                include=["metadatas", "documents", "distances", "embeddings"],
            )
            return self.create_documents(results)
        else:
            raise ValueError("Please provide either query embedding or query.")