Source code for langchain_community.vectorstores.tigris

from __future__ import annotations

import itertools
from typing import TYPE_CHECKING, Any, Iterable, List, Optional, Tuple

from langchain_core.documents import Document
from langchain_core.embeddings import Embeddings
from langchain_core.vectorstores import VectorStore

if TYPE_CHECKING:
    from tigrisdb import TigrisClient
    from tigrisdb import VectorStore as TigrisVectorStore
    from tigrisdb.types.filters import Filter as TigrisFilter
    from tigrisdb.types.vector import Document as TigrisDocument


[docs]class Tigris(VectorStore): """`Tigris`向量存储。"""
[docs] def __init__(self, client: TigrisClient, embeddings: Embeddings, index_name: str): """初始化Tigris向量存储。""" try: import tigrisdb # noqa: F401 except ImportError: raise ImportError( "Could not import tigrisdb python package. " "Please install it with `pip install tigrisdb`" ) self._embed_fn = embeddings self._vector_store = TigrisVectorStore(client.get_search(), index_name)
@property def embeddings(self) -> Embeddings: return self._embed_fn @property def search_index(self) -> TigrisVectorStore: return self._vector_store
[docs] def add_texts( self, texts: Iterable[str], metadatas: Optional[List[dict]] = None, ids: Optional[List[str]] = None, **kwargs: Any, ) -> List[str]: """运行更多的文本通过嵌入并添加到向量存储。 参数: texts:要添加到向量存储的字符串的可迭代对象。 metadatas:与文本相关联的元数据的可选列表。 ids:文档的可选id列表。 如果未提供,将自动生成id。 kwargs:向量存储特定参数 返回: 将文本添加到向量存储中的id列表。 """ docs = self._prep_docs(texts, metadatas, ids) result = self.search_index.add_documents(docs) return [r.id for r in result]
[docs] def similarity_search_with_score( self, query: str, k: int = 4, filter: Optional[TigrisFilter] = None, ) -> List[Tuple[Document, float]]: """运行使用Chroma和距离进行相似性搜索。 参数: query (str): 要搜索的查询文本。 k (int): 要返回的结果数量。默认为4。 filter (Optional[TigrisFilter]): 按元数据过滤。默认为None。 返回: List[Tuple[Document, float]]: 与查询文本最相似的文档列表,带有浮点距离。 """ vector = self._embed_fn.embed_query(query) result = self.search_index.similarity_search( vector=vector, k=k, filter_by=filter ) docs: List[Tuple[Document, float]] = [] for r in result: docs.append( ( Document( page_content=r.doc["text"], metadata=r.doc.get("metadata") ), r.score, ) ) return docs
[docs] @classmethod def from_texts( cls, texts: List[str], embedding: Embeddings, metadatas: Optional[List[dict]] = None, ids: Optional[List[str]] = None, client: Optional[TigrisClient] = None, index_name: Optional[str] = None, **kwargs: Any, ) -> Tigris: """返回从文本和嵌入初始化的VectorStore。""" if not index_name: raise ValueError("`index_name` is required") if not client: client = TigrisClient() store = cls(client, embedding, index_name) store.add_texts(texts=texts, metadatas=metadatas, ids=ids) return store
def _prep_docs( self, texts: Iterable[str], metadatas: Optional[List[dict]], ids: Optional[List[str]], ) -> List[TigrisDocument]: embeddings: List[List[float]] = self._embed_fn.embed_documents(list(texts)) docs: List[TigrisDocument] = [] for t, m, e, _id in itertools.zip_longest( texts, metadatas or [], embeddings or [], ids or [] ): doc: TigrisDocument = { "text": t, "embeddings": e or [], "metadata": m or {}, } if _id: doc["id"] = _id docs.append(doc) return docs