Dashvector

DashVectorReader #

Bases: BaseReader

DashVector 读取器。

Parameters:

Name	Type	Description	Default
`api_key`	`str`	DashVector API 密钥。	required
`endpoint`	`str`	DashVector 集群端点。	required

Source code in llama_index/readers/dashvector/base.py

class DashVectorReader(BaseReader):
    """DashVector 读取器。

    Args:
        api_key (str): DashVector API 密钥。
        endpoint (str): DashVector 集群端点。"""

    def __init__(self, api_key: str, endpoint: str):
        """使用参数进行初始化。"""
        try:
            import dashvector
        except ImportError:
            raise ImportError(
                "`dashvector` package not found, please run `pip install dashvector`"
            )

        self._client = dashvector.Client(api_key=api_key, endpoint=endpoint)

    def load_data(
        self,
        collection_name: str,
        id_to_text_map: Dict[str, str],
        vector: Optional[List[float]],
        top_k: int,
        separate_documents: bool = True,
        filter: Optional[str] = None,
        include_vector: bool = True,
    ) -> List[Document]:
        """从DashVector加载数据。

Args:
    collection_name (str): 集合的名称。
    id_to_text_map (Dict[str, str]): 从ID到文本的映射。
    separate_documents (Optional[bool]): 是否返回每个检索条目的单独文档。默认为True。
    vector (List[float]): 查询向量。
    top_k (int): 要返回的结果数量。
    filter (Optional[str]): 符合SQL where子句规范的文档字段过滤条件。
    include_vector (bool): 是否在响应中包含嵌入。默认为True。

Returns:
    List[Document]: 文档列表。
"""
        collection = self._client.get(collection_name)
        if not collection:
            raise ValueError(
                f"Failed to get collection: {collection_name}," f"Error: {collection}"
            )

        resp = collection.query(
            vector=vector,
            topk=top_k,
            filter=filter,
            include_vector=include_vector,
        )
        if not resp:
            raise Exception(f"Failed to query document," f"Error: {resp}")

        documents = []
        for doc in resp:
            if doc.id not in id_to_text_map:
                raise ValueError("ID not found in id_to_text_map.")
            text = id_to_text_map[doc.id]
            embedding = doc.vector
            if len(embedding) == 0:
                embedding = None
            documents.append(Document(text=text, embedding=embedding))

        if not separate_documents:
            text_list = [doc.get_content() for doc in documents]
            text = "\n\n".join(text_list)
            documents = [Document(text=text)]

        return documents

load_data #

load_data(
    collection_name: str,
    id_to_text_map: Dict[str, str],
    vector: Optional[List[float]],
    top_k: int,
    separate_documents: bool = True,
    filter: Optional[str] = None,
    include_vector: bool = True,
) -> List[Document]

从DashVector加载数据。

Parameters:

Name	Type	Description	Default
`collection_name`	`str`	集合的名称。	required
`id_to_text_map`	`Dict[str, str]`	从ID到文本的映射。	required
`separate_documents`	`Optional[bool]`	是否返回每个检索条目的单独文档。默认为True。	`True`
`vector`	`List[float]`	查询向量。	required
`top_k`	`int`	要返回的结果数量。	required
`filter`	`Optional[str]`	符合SQL where子句规范的文档字段过滤条件。	`None`
`include_vector`	`bool`	是否在响应中包含嵌入。默认为True。	`True`

Returns:

Type	Description
`List[Document]`	List[Document]: 文档列表。

Source code in llama_index/readers/dashvector/base.py

    def load_data(
        self,
        collection_name: str,
        id_to_text_map: Dict[str, str],
        vector: Optional[List[float]],
        top_k: int,
        separate_documents: bool = True,
        filter: Optional[str] = None,
        include_vector: bool = True,
    ) -> List[Document]:
        """从DashVector加载数据。

Args:
    collection_name (str): 集合的名称。
    id_to_text_map (Dict[str, str]): 从ID到文本的映射。
    separate_documents (Optional[bool]): 是否返回每个检索条目的单独文档。默认为True。
    vector (List[float]): 查询向量。
    top_k (int): 要返回的结果数量。
    filter (Optional[str]): 符合SQL where子句规范的文档字段过滤条件。
    include_vector (bool): 是否在响应中包含嵌入。默认为True。

Returns:
    List[Document]: 文档列表。
"""
        collection = self._client.get(collection_name)
        if not collection:
            raise ValueError(
                f"Failed to get collection: {collection_name}," f"Error: {collection}"
            )

        resp = collection.query(
            vector=vector,
            topk=top_k,
            filter=filter,
            include_vector=include_vector,
        )
        if not resp:
            raise Exception(f"Failed to query document," f"Error: {resp}")

        documents = []
        for doc in resp:
            if doc.id not in id_to_text_map:
                raise ValueError("ID not found in id_to_text_map.")
            text = id_to_text_map[doc.id]
            embedding = doc.vector
            if len(embedding) == 0:
                embedding = None
            documents.append(Document(text=text, embedding=embedding))

        if not separate_documents:
            text_list = [doc.get_content() for doc in documents]
            text = "\n\n".join(text_list)
            documents = [Document(text=text)]

        return documents