Qdrant

QdrantReader #

Bases: BaseReader

Qdrant阅读器。

从现有的Qdrant集合中检索文档。

Parameters:

Name	Type	Description	Default
`location`	`Optional[str]`	如果为 `:memory:` - 使用内存中的Qdrant实例。如果为 `str` - 将其用作 `url` 参数。如果为 `None` - 使用 `host` 和 `port` 的默认值。	`None`
`url`	`Optional[str]`	可以是主机或字符串 "Optional[scheme], host, Optional[port], Optional[prefix]"。默认值: `None`	`None`
`port`	`Optional[int]`	REST API接口的端口。默认值: 6333	`6333`
`grpc_port`	`int`	gRPC接口的端口。默认值: 6334	`6334`
`prefer_grpc`	`bool`	如果为 `true` - 在自定义方法中尽可能使用gPRC接口。	`False`
`https`	`Optional[bool]`	如果为 `true` - 使用HTTPS(SSL)协议。默认值: `false`	`None`
`api_key`	`Optional[str]`	用于在Qdrant Cloud中进行身份验证的API密钥。默认值: `None`	`None`
`prefix`	`Optional[str]`	如果不为 `None` - 将 `prefix` 添加到REST URL路径中。示例: `service/v1` 将导致REST API的URL路径为 `http://localhost:6333/service/v1/{qdrant-endpoint}`。默认值: `None`	`None`
`timeout`	`Optional[float]`	REST和gRPC API请求的超时时间。默认值: REST为5.0秒，gRPC为无限制	`None`
`host`	`Optional[str]`	Qdrant服务的主机名。如果url和host都为None，则设置为'localhost'。默认值: `None`	`None`

Source code in llama_index/readers/qdrant/base.py

class QdrantReader(BaseReader):
    """Qdrant阅读器。

    从现有的Qdrant集合中检索文档。

    Args:
        location:
            如果为 `:memory:` - 使用内存中的Qdrant实例。
            如果为 `str` - 将其用作 `url` 参数。
            如果为 `None` - 使用 `host` 和 `port` 的默认值。
        url:
            可以是主机或字符串 "Optional[scheme], host, Optional[port], Optional[prefix]"。
            默认值: `None`
        port: REST API接口的端口。默认值: 6333
        grpc_port: gRPC接口的端口。默认值: 6334
        prefer_grpc: 如果为 `true` - 在自定义方法中尽可能使用gPRC接口。
        https: 如果为 `true` - 使用HTTPS(SSL)协议。默认值: `false`
        api_key: 用于在Qdrant Cloud中进行身份验证的API密钥。默认值: `None`
        prefix:
            如果不为 `None` - 将 `prefix` 添加到REST URL路径中。
            示例: `service/v1` 将导致REST API的URL路径为
            `http://localhost:6333/service/v1/{qdrant-endpoint}`。
            默认值: `None`
        timeout:
            REST和gRPC API请求的超时时间。
            默认值: REST为5.0秒，gRPC为无限制
        host: Qdrant服务的主机名。如果url和host都为None，则设置为'localhost'。
            默认值: `None`"""

    def __init__(
        self,
        location: Optional[str] = None,
        url: Optional[str] = None,
        port: Optional[int] = 6333,
        grpc_port: int = 6334,
        prefer_grpc: bool = False,
        https: Optional[bool] = None,
        api_key: Optional[str] = None,
        prefix: Optional[str] = None,
        timeout: Optional[float] = None,
        host: Optional[str] = None,
        path: Optional[str] = None,
    ):
        """使用参数进行初始化。"""
        import_err_msg = (
            "`qdrant-client` package not found, please run `pip install qdrant-client`"
        )
        try:
            import qdrant_client
        except ImportError:
            raise ImportError(import_err_msg)

        self._client = qdrant_client.QdrantClient(
            location=location,
            url=url,
            port=port,
            grpc_port=grpc_port,
            prefer_grpc=prefer_grpc,
            https=https,
            api_key=api_key,
            prefix=prefix,
            timeout=timeout,
            host=host,
            path=path,
        )

    def load_data(
        self,
        collection_name: str,
        query_vector: List[float],
        should_search_mapping: Optional[Dict[str, str]] = None,
        must_search_mapping: Optional[Dict[str, str]] = None,
        must_not_search_mapping: Optional[Dict[str, str]] = None,
        rang_search_mapping: Optional[Dict[str, Dict[str, float]]] = None,
        limit: int = 10,
    ) -> List[Document]:
        """从Qdrant加载数据。

Args:
    collection_name (str): Qdrant集合的名称。
    query_vector (List[float]): 查询向量。
    should_search_mapping (Optional[Dict[str, str]]): 字段名称到查询字符串的映射。
    must_search_mapping (Optional[Dict[str, str]]): 字段名称到查询字符串的映射。
    must_not_search_mapping (Optional[Dict[str, str]]): 字段名称到查询字符串的映射。
    rang_search_mapping (Optional[Dict[str, Dict[str, float]]]): 字段名称到范围查询的映射。
    limit (int): 要返回的结果数量。

示例:
    reader = QdrantReader()
    reader.load_data(
         collection_name="test_collection",
         query_vector=[0.1, 0.2, 0.3],
         should_search_mapping={"text_field": "text"},
         must_search_mapping={"text_field": "text"},
         must_not_search_mapping={"text_field": "text"},
         # gte, lte, gt, lt supported
         rang_search_mapping={"text_field": {"gte": 0.1, "lte": 0.2}},
         limit=10
    )

Returns:
    List[Document]: 文档的列表。
"""
        from qdrant_client.http.models import (
            FieldCondition,
            Filter,
            MatchText,
            MatchValue,
            Range,
        )
        from qdrant_client.http.models.models import Payload

        should_search_mapping = should_search_mapping or {}
        must_search_mapping = must_search_mapping or {}
        must_not_search_mapping = must_not_search_mapping or {}
        rang_search_mapping = rang_search_mapping or {}

        should_search_conditions = [
            FieldCondition(key=key, match=MatchText(text=value))
            for key, value in should_search_mapping.items()
            if should_search_mapping
        ]
        must_search_conditions = [
            FieldCondition(key=key, match=MatchValue(value=value))
            for key, value in must_search_mapping.items()
            if must_search_mapping
        ]
        must_not_search_conditions = [
            FieldCondition(key=key, match=MatchValue(value=value))
            for key, value in must_not_search_mapping.items()
            if must_not_search_mapping
        ]
        rang_search_conditions = [
            FieldCondition(
                key=key,
                range=Range(
                    gte=value.get("gte"),
                    lte=value.get("lte"),
                    gt=value.get("gt"),
                    lt=value.get("lt"),
                ),
            )
            for key, value in rang_search_mapping.items()
            if rang_search_mapping
        ]
        should_search_conditions.extend(rang_search_conditions)
        response = self._client.search(
            collection_name=collection_name,
            query_vector=query_vector,
            query_filter=Filter(
                must=must_search_conditions,
                must_not=must_not_search_conditions,
                should=should_search_conditions,
            ),
            with_vectors=True,
            with_payload=True,
            limit=limit,
        )

        documents = []
        for point in response:
            payload = cast(Payload, point.payload)
            try:
                vector = cast(List[float], point.vector)
            except ValueError as e:
                raise ValueError("Could not cast vector to List[float].") from e
            document = Document(
                id_=payload.get("doc_id"),
                text=payload.get("text"),
                metadata=payload.get("metadata"),
                embedding=vector,
            )
            documents.append(document)

        return documents

load_data #

load_data(
    collection_name: str,
    query_vector: List[float],
    should_search_mapping: Optional[Dict[str, str]] = None,
    must_search_mapping: Optional[Dict[str, str]] = None,
    must_not_search_mapping: Optional[
        Dict[str, str]
    ] = None,
    rang_search_mapping: Optional[
        Dict[str, Dict[str, float]]
    ] = None,
    limit: int = 10,
) -> List[Document]

从Qdrant加载数据。

Parameters:

Name	Type	Description	Default
`collection_name`	`str`	Qdrant集合的名称。	required
`query_vector`	`List[float]`	查询向量。	required
`should_search_mapping`	`Optional[Dict[str, str]]`	字段名称到查询字符串的映射。	`None`
`must_search_mapping`	`Optional[Dict[str, str]]`	字段名称到查询字符串的映射。	`None`
`must_not_search_mapping`	`Optional[Dict[str, str]]`	字段名称到查询字符串的映射。	`None`
`rang_search_mapping`	`Optional[Dict[str, Dict[str, float]]]`	字段名称到范围查询的映射。	`None`
`limit`	`int`	要返回的结果数量。	`10`

示例

reader = QdrantReader() reader.load_data( collection_name="test_collection", query_vector=[0.1, 0.2, 0.3], should_search_mapping={"text_field": "text"}, must_search_mapping={"text_field": "text"}, must_not_search_mapping={"text_field": "text"}, # gte, lte, gt, lt supported rang_search_mapping={"text_field": {"gte": 0.1, "lte": 0.2}}, limit=10 )

Returns:

Type	Description
`List[Document]`	List[Document]: 文档的列表。

Source code in llama_index/readers/qdrant/base.py

    def load_data(
        self,
        collection_name: str,
        query_vector: List[float],
        should_search_mapping: Optional[Dict[str, str]] = None,
        must_search_mapping: Optional[Dict[str, str]] = None,
        must_not_search_mapping: Optional[Dict[str, str]] = None,
        rang_search_mapping: Optional[Dict[str, Dict[str, float]]] = None,
        limit: int = 10,
    ) -> List[Document]:
        """从Qdrant加载数据。

Args:
    collection_name (str): Qdrant集合的名称。
    query_vector (List[float]): 查询向量。
    should_search_mapping (Optional[Dict[str, str]]): 字段名称到查询字符串的映射。
    must_search_mapping (Optional[Dict[str, str]]): 字段名称到查询字符串的映射。
    must_not_search_mapping (Optional[Dict[str, str]]): 字段名称到查询字符串的映射。
    rang_search_mapping (Optional[Dict[str, Dict[str, float]]]): 字段名称到范围查询的映射。
    limit (int): 要返回的结果数量。

示例:
    reader = QdrantReader()
    reader.load_data(
         collection_name="test_collection",
         query_vector=[0.1, 0.2, 0.3],
         should_search_mapping={"text_field": "text"},
         must_search_mapping={"text_field": "text"},
         must_not_search_mapping={"text_field": "text"},
         # gte, lte, gt, lt supported
         rang_search_mapping={"text_field": {"gte": 0.1, "lte": 0.2}},
         limit=10
    )

Returns:
    List[Document]: 文档的列表。
"""
        from qdrant_client.http.models import (
            FieldCondition,
            Filter,
            MatchText,
            MatchValue,
            Range,
        )
        from qdrant_client.http.models.models import Payload

        should_search_mapping = should_search_mapping or {}
        must_search_mapping = must_search_mapping or {}
        must_not_search_mapping = must_not_search_mapping or {}
        rang_search_mapping = rang_search_mapping or {}

        should_search_conditions = [
            FieldCondition(key=key, match=MatchText(text=value))
            for key, value in should_search_mapping.items()
            if should_search_mapping
        ]
        must_search_conditions = [
            FieldCondition(key=key, match=MatchValue(value=value))
            for key, value in must_search_mapping.items()
            if must_search_mapping
        ]
        must_not_search_conditions = [
            FieldCondition(key=key, match=MatchValue(value=value))
            for key, value in must_not_search_mapping.items()
            if must_not_search_mapping
        ]
        rang_search_conditions = [
            FieldCondition(
                key=key,
                range=Range(
                    gte=value.get("gte"),
                    lte=value.get("lte"),
                    gt=value.get("gt"),
                    lt=value.get("lt"),
                ),
            )
            for key, value in rang_search_mapping.items()
            if rang_search_mapping
        ]
        should_search_conditions.extend(rang_search_conditions)
        response = self._client.search(
            collection_name=collection_name,
            query_vector=query_vector,
            query_filter=Filter(
                must=must_search_conditions,
                must_not=must_not_search_conditions,
                should=should_search_conditions,
            ),
            with_vectors=True,
            with_payload=True,
            limit=limit,
        )

        documents = []
        for point in response:
            payload = cast(Payload, point.payload)
            try:
                vector = cast(List[float], point.vector)
            except ValueError as e:
                raise ValueError("Could not cast vector to List[float].") from e
            document = Document(
                id_=payload.get("doc_id"),
                text=payload.get("text"),
                metadata=payload.get("metadata"),
                embedding=vector,
            )
            documents.append(document)

        return documents