Skip to content

Chroma

ChromaReader #

Bases: BaseReader

Chroma阅读器。

从现有的持久化Chroma集合中检索文档。

Parameters:

Name Type Description Default
collection_name str

持久化集合的名称。

required
persist_directory Optional[str]

集合持久化的目录。

None
Source code in llama_index/readers/chroma/base.py
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
class ChromaReader(BaseReader):
    """Chroma阅读器。

    从现有的持久化Chroma集合中检索文档。

    Args:
        collection_name: 持久化集合的名称。
        persist_directory: 集合持久化的目录。"""

    def __init__(
        self,
        collection_name: str,
        persist_directory: Optional[str] = None,
        chroma_api_impl: str = "rest",
        chroma_db_impl: Optional[str] = None,
        host: str = "localhost",
        port: int = 8000,
    ) -> None:
        """使用参数进行初始化。"""
        import_err_msg = (
            "`chromadb` package not found, please run `pip install chromadb`"
        )
        try:
            import chromadb
        except ImportError:
            raise ImportError(import_err_msg)

        if collection_name is None:
            raise ValueError("Please provide a collection name.")
        # from chromadb.config import Settings

        if persist_directory is not None:
            self._client = chromadb.PersistentClient(
                path=persist_directory if persist_directory else "./chroma",
            )
        elif (host is not None) or (port is not None):
            self._client = chromadb.HttpClient(
                host=host,
                port=port,
            )

        self._collection = self._client.get_collection(collection_name)

    def create_documents(self, results: Any) -> List[Document]:
        """根据结果创建文档。

Args:
    results: 查询结果。

Returns:
    文档列表。
"""
        documents = []
        for result in zip(
            results["ids"][0],
            results["documents"][0],
            results["embeddings"][0],
            results["metadatas"][0],
        ):
            document = Document(
                id_=result[0],
                text=result[1],
                embedding=result[2],
                metadata=result[3],
            )
            documents.append(document)

        return documents

    def load_data(
        self,
        query_embedding: Optional[List[float]] = None,
        limit: int = 10,
        where: Optional[dict] = None,
        where_document: Optional[dict] = None,
        query: Optional[Union[str, List[str]]] = None,
    ) -> Any:
        """从集合中加载数据。

Args:
    limit:要返回的结果数量。
    where:按元数据过滤结果。{"metadata_field": "is_equal_to_this"}
    where_document:按文档过滤结果。{"$contains":"search_string"}

Returns:
    文档列表。
"""
        where = where or {}
        where_document = where_document or {}
        if query_embedding is not None:
            results = self._collection.search(
                query_embedding=query_embedding,
                n_results=limit,
                where=where,
                where_document=where_document,
                include=["metadatas", "documents", "distances", "embeddings"],
            )
            return self.create_documents(results)
        elif query is not None:
            query = query if isinstance(query, list) else [query]
            results = self._collection.query(
                query_texts=query,
                n_results=limit,
                where=where,
                where_document=where_document,
                include=["metadatas", "documents", "distances", "embeddings"],
            )
            return self.create_documents(results)
        else:
            raise ValueError("Please provide either query embedding or query.")

create_documents #

create_documents(results: Any) -> List[Document]

根据结果创建文档。

Parameters:

Name Type Description Default
results Any

查询结果。

required

Returns:

Type Description
List[Document]

文档列表。

Source code in llama_index/readers/chroma/base.py
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
    def create_documents(self, results: Any) -> List[Document]:
        """根据结果创建文档。

Args:
    results: 查询结果。

Returns:
    文档列表。
"""
        documents = []
        for result in zip(
            results["ids"][0],
            results["documents"][0],
            results["embeddings"][0],
            results["metadatas"][0],
        ):
            document = Document(
                id_=result[0],
                text=result[1],
                embedding=result[2],
                metadata=result[3],
            )
            documents.append(document)

        return documents

load_data #

load_data(
    query_embedding: Optional[List[float]] = None,
    limit: int = 10,
    where: Optional[dict] = None,
    where_document: Optional[dict] = None,
    query: Optional[Union[str, List[str]]] = None,
) -> Any

从集合中加载数据。

Parameters:

Name Type Description Default
where:按元数据过滤结果。{"metadata_field"

"is_equal_to_this"}

required
where_document:按文档过滤结果。{"$contains"

"search_string"}

required

Returns:

Type Description
Any

文档列表。

Source code in llama_index/readers/chroma/base.py
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
    def load_data(
        self,
        query_embedding: Optional[List[float]] = None,
        limit: int = 10,
        where: Optional[dict] = None,
        where_document: Optional[dict] = None,
        query: Optional[Union[str, List[str]]] = None,
    ) -> Any:
        """从集合中加载数据。

Args:
    limit:要返回的结果数量。
    where:按元数据过滤结果。{"metadata_field": "is_equal_to_this"}
    where_document:按文档过滤结果。{"$contains":"search_string"}

Returns:
    文档列表。
"""
        where = where or {}
        where_document = where_document or {}
        if query_embedding is not None:
            results = self._collection.search(
                query_embedding=query_embedding,
                n_results=limit,
                where=where,
                where_document=where_document,
                include=["metadatas", "documents", "distances", "embeddings"],
            )
            return self.create_documents(results)
        elif query is not None:
            query = query if isinstance(query, list) else [query]
            results = self._collection.query(
                query_texts=query,
                n_results=limit,
                where=where,
                where_document=where_document,
                include=["metadatas", "documents", "distances", "embeddings"],
            )
            return self.create_documents(results)
        else:
            raise ValueError("Please provide either query embedding or query.")