Weaviate

WeaviateReader #

Bases: BaseReader

Weaviate阅读器。

通过向量查找从Weaviate中检索文档。允许选择将检索到的文档连接成一个文档，或者返回单独的每个文档的文档对象。

Parameters:

Name	Type	Description	Default
`host`	`str`	主机。	required
`auth_client_secret`	`Optional[AuthCredentials]`	auth_client_secret。	`None`

Source code in llama_index/readers/weaviate/base.py

class WeaviateReader(BaseReader):
    """Weaviate阅读器。

    通过向量查找从Weaviate中检索文档。允许选择将检索到的文档连接成一个文档，或者返回单独的每个文档的文档对象。

    Args:
        host (str): 主机。
        auth_client_secret (Optional[weaviate.auth.AuthCredentials]):
            auth_client_secret。"""

    def __init__(
        self,
        host: str,
        auth_client_secret: Optional[Any] = None,
    ) -> None:
        """使用参数进行初始化。"""
        try:
            import weaviate  # noqa
            from weaviate import Client
            from weaviate.auth import AuthCredentials  # noqa
        except ImportError:
            raise ImportError(
                "`weaviate` package not found, please run `pip install weaviate-client`"
            )

        self.client: Client = Client(host, auth_client_secret=auth_client_secret)

    def load_data(
        self,
        class_name: Optional[str] = None,
        properties: Optional[List[str]] = None,
        graphql_query: Optional[str] = None,
        separate_documents: Optional[bool] = True,
    ) -> List[Document]:
        """从Weaviate加载数据。

如果在load_kwargs中找不到“graphql_query”，我们假设提供了“class_name”和“properties”。

Args:
    class_name（可选[str]）：要从中检索文档的class_name。
    properties（可选[List[str]）：要从文档中检索的属性。
    graphql_query（可选[str]）：原始的GraphQL查询。
        我们假设查询是一个Get查询。
    separate_documents（可选[bool]）：是否返回单独的文档。默认为True。

Returns:
    List[Document]：文档的列表。
"""
        if class_name is not None and properties is not None:
            props_txt = "\n".join(properties)
            graphql_query = f"""
            {{
                Get {{
                    {class_name} {{
                        {props_txt}
                    }}
                }}
            }}
            """
        elif graphql_query is not None:
            pass
        else:
            raise ValueError(
                "Either `class_name` and `properties` must be specified, "
                "or `graphql_query` must be specified."
            )

        response = self.client.query.raw(graphql_query)
        if "errors" in response:
            raise ValueError("Invalid query, got errors: {}".format(response["errors"]))

        data_response = response["data"]
        if "Get" not in data_response:
            raise ValueError("Invalid query response, must be a Get query.")

        if class_name is None:
            # infer class_name if only graphql_query was provided
            class_name = next(iter(data_response["Get"].keys()))
        entries = data_response["Get"][class_name]
        documents = []
        for entry in entries:
            embedding: Optional[List[float]] = None
            # for each entry, join properties into <property>:<value>
            # separated by newlines
            text_list = []
            for k, v in entry.items():
                if k == "_additional":
                    if "vector" in v:
                        embedding = v["vector"]
                    continue
                text_list.append(f"{k}: {v}")

            text = "\n".join(text_list)
            documents.append(Document(text=text, embedding=embedding))

        if not separate_documents:
            # join all documents into one
            text_list = [doc.get_content() for doc in documents]
            text = "\n\n".join(text_list)
            documents = [Document(text=text)]

        return documents

load_data #

load_data(
    class_name: Optional[str] = None,
    properties: Optional[List[str]] = None,
    graphql_query: Optional[str] = None,
    separate_documents: Optional[bool] = True,
) -> List[Document]

从Weaviate加载数据。

如果在load_kwargs中找不到“graphql_query”，我们假设提供了“class_name”和“properties”。

Returns:

Type	Description
`List[Document]`	List[Document]：文档的列表。

Source code in llama_index/readers/weaviate/base.py

    def load_data(
        self,
        class_name: Optional[str] = None,
        properties: Optional[List[str]] = None,
        graphql_query: Optional[str] = None,
        separate_documents: Optional[bool] = True,
    ) -> List[Document]:
        """从Weaviate加载数据。

如果在load_kwargs中找不到“graphql_query”，我们假设提供了“class_name”和“properties”。

Args:
    class_name（可选[str]）：要从中检索文档的class_name。
    properties（可选[List[str]）：要从文档中检索的属性。
    graphql_query（可选[str]）：原始的GraphQL查询。
        我们假设查询是一个Get查询。
    separate_documents（可选[bool]）：是否返回单独的文档。默认为True。

Returns:
    List[Document]：文档的列表。
"""
        if class_name is not None and properties is not None:
            props_txt = "\n".join(properties)
            graphql_query = f"""
            {{
                Get {{
                    {class_name} {{
                        {props_txt}
                    }}
                }}
            }}
            """
        elif graphql_query is not None:
            pass
        else:
            raise ValueError(
                "Either `class_name` and `properties` must be specified, "
                "or `graphql_query` must be specified."
            )

        response = self.client.query.raw(graphql_query)
        if "errors" in response:
            raise ValueError("Invalid query, got errors: {}".format(response["errors"]))

        data_response = response["data"]
        if "Get" not in data_response:
            raise ValueError("Invalid query response, must be a Get query.")

        if class_name is None:
            # infer class_name if only graphql_query was provided
            class_name = next(iter(data_response["Get"].keys()))
        entries = data_response["Get"][class_name]
        documents = []
        for entry in entries:
            embedding: Optional[List[float]] = None
            # for each entry, join properties into <property>:<value>
            # separated by newlines
            text_list = []
            for k, v in entry.items():
                if k == "_additional":
                    if "vector" in v:
                        embedding = v["vector"]
                    continue
                text_list.append(f"{k}: {v}")

            text = "\n".join(text_list)
            documents.append(Document(text=text, embedding=embedding))

        if not separate_documents:
            # join all documents into one
            text_list = [doc.get_content() for doc in documents]
            text = "\n\n".join(text_list)
            documents = [Document(text=text)]

        return documents