Source code for langchain_community.vectorstores.usearch

from __future__ import annotations

from typing import Any, Dict, Iterable, List, Optional, Tuple

import numpy as np
from langchain_core.documents import Document
from langchain_core.embeddings import Embeddings
from langchain_core.utils import guard_import
from langchain_core.vectorstores import VectorStore

from langchain_community.docstore.base import AddableMixin, Docstore
from langchain_community.docstore.in_memory import InMemoryDocstore


[docs]def dependable_usearch_import() -> Any: """ 导入usearch(如果可用),否则引发错误。 """ return guard_import("usearch.index")
[docs]class USearch(VectorStore): """`USearch` 向量存储。 要使用,您应该安装``usearch`` python包。"""
[docs] def __init__( self, embedding: Embeddings, index: Any, docstore: Docstore, ids: List[str], ): """使用必要的组件进行初始化。""" self.embedding = embedding self.index = index self.docstore = docstore self.ids = ids
[docs] def add_texts( self, texts: Iterable[str], metadatas: Optional[List[Dict]] = None, ids: Optional[np.ndarray] = None, **kwargs: Any, ) -> List[str]: """运行更多文本通过嵌入并添加到向量存储。 参数: texts:要添加到向量存储的字符串的可迭代对象。 metadatas:与文本相关的元数据的可选列表。 ids:唯一ID的可选列表。 返回: 将文本添加到向量存储中的ID列表。 """ if not isinstance(self.docstore, AddableMixin): raise ValueError( "If trying to add texts, the underlying docstore should support " f"adding items, which {self.docstore} does not" ) embeddings = self.embedding.embed_documents(list(texts)) documents = [] for i, text in enumerate(texts): metadata = metadatas[i] if metadatas else {} documents.append(Document(page_content=text, metadata=metadata)) last_id = int(self.ids[-1]) + 1 if ids is None: ids = np.array([str(last_id + id) for id, _ in enumerate(texts)]) self.index.add(np.array(ids), np.array(embeddings)) self.docstore.add(dict(zip(ids, documents))) self.ids.extend(ids) return ids.tolist()
[docs] def similarity_search_with_score( self, query: str, k: int = 4, ) -> List[Tuple[Document, float]]: """返回与查询最相似的文档。 参数: query:要查找与之相似文档的文本。 k:要返回的文档数量。默认为4。 返回: 与查询最相似的文档列表,带有距离信息。 """ query_embedding = self.embedding.embed_query(query) matches = self.index.search(np.array(query_embedding), k) docs_with_scores: List[Tuple[Document, float]] = [] for id, score in zip(matches.keys, matches.distances): doc = self.docstore.search(str(id)) if not isinstance(doc, Document): raise ValueError(f"Could not find document for id {id}, got {doc}") docs_with_scores.append((doc, score)) return docs_with_scores
[docs] @classmethod def from_texts( cls, texts: List[str], embedding: Embeddings, metadatas: Optional[List[Dict]] = None, ids: Optional[np.ndarray] = None, metric: str = "cos", **kwargs: Any, ) -> USearch: """从原始文档构建USearch包装器。 这是一个用户友好的接口,可以: 1. 嵌入文档。 2. 创建一个内存中的文档存储。 3. 初始化USearch数据库。 这旨在是一个快速开始的方式。 示例: .. code-block:: python from langchain_community.vectorstores import USearch from langchain_community.embeddings import OpenAIEmbeddings embeddings = OpenAIEmbeddings() usearch = USearch.from_texts(texts, embeddings) """ embeddings = embedding.embed_documents(texts) documents: List[Document] = [] if ids is None: ids = np.array([str(id) for id, _ in enumerate(texts)]) for i, text in enumerate(texts): metadata = metadatas[i] if metadatas else {} documents.append(Document(page_content=text, metadata=metadata)) docstore = InMemoryDocstore(dict(zip(ids, documents))) usearch = guard_import("usearch.index") index = usearch.Index(ndim=len(embeddings[0]), metric=metric) index.add(np.array(ids), np.array(embeddings)) return cls(embedding, index, docstore, ids.tolist())