Skip to content

Weaviate

WeaviateReader #

Bases: BaseReader

Weaviate阅读器。

通过向量查找从Weaviate中检索文档。允许选择将检索到的文档连接成一个文档,或者返回单独的每个文档的文档对象。

Parameters:

Name Type Description Default
host str

主机。

required
auth_client_secret Optional[AuthCredentials]

auth_client_secret。

None
Source code in llama_index/readers/weaviate/base.py
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
class WeaviateReader(BaseReader):
    """Weaviate阅读器。

    通过向量查找从Weaviate中检索文档。允许选择将检索到的文档连接成一个文档,或者返回单独的每个文档的文档对象。

    Args:
        host (str): 主机。
        auth_client_secret (Optional[weaviate.auth.AuthCredentials]):
            auth_client_secret。"""

    def __init__(
        self,
        host: str,
        auth_client_secret: Optional[Any] = None,
    ) -> None:
        """使用参数进行初始化。"""
        try:
            import weaviate  # noqa
            from weaviate import Client
            from weaviate.auth import AuthCredentials  # noqa
        except ImportError:
            raise ImportError(
                "`weaviate` package not found, please run `pip install weaviate-client`"
            )

        self.client: Client = Client(host, auth_client_secret=auth_client_secret)

    def load_data(
        self,
        class_name: Optional[str] = None,
        properties: Optional[List[str]] = None,
        graphql_query: Optional[str] = None,
        separate_documents: Optional[bool] = True,
    ) -> List[Document]:
        """从Weaviate加载数据。

如果在load_kwargs中找不到“graphql_query”,我们假设提供了“class_name”和“properties”。

Args:
    class_name(可选[str]):要从中检索文档的class_name。
    properties(可选[List[str]):要从文档中检索的属性。
    graphql_query(可选[str]):原始的GraphQL查询。
        我们假设查询是一个Get查询。
    separate_documents(可选[bool]):是否返回单独的文档。默认为True。

Returns:
    List[Document]:文档的列表。
"""
        if class_name is not None and properties is not None:
            props_txt = "\n".join(properties)
            graphql_query = f"""
            {{
                Get {{
                    {class_name} {{
                        {props_txt}
                    }}
                }}
            }}
            """
        elif graphql_query is not None:
            pass
        else:
            raise ValueError(
                "Either `class_name` and `properties` must be specified, "
                "or `graphql_query` must be specified."
            )

        response = self.client.query.raw(graphql_query)
        if "errors" in response:
            raise ValueError("Invalid query, got errors: {}".format(response["errors"]))

        data_response = response["data"]
        if "Get" not in data_response:
            raise ValueError("Invalid query response, must be a Get query.")

        if class_name is None:
            # infer class_name if only graphql_query was provided
            class_name = next(iter(data_response["Get"].keys()))
        entries = data_response["Get"][class_name]
        documents = []
        for entry in entries:
            embedding: Optional[List[float]] = None
            # for each entry, join properties into <property>:<value>
            # separated by newlines
            text_list = []
            for k, v in entry.items():
                if k == "_additional":
                    if "vector" in v:
                        embedding = v["vector"]
                    continue
                text_list.append(f"{k}: {v}")

            text = "\n".join(text_list)
            documents.append(Document(text=text, embedding=embedding))

        if not separate_documents:
            # join all documents into one
            text_list = [doc.get_content() for doc in documents]
            text = "\n\n".join(text_list)
            documents = [Document(text=text)]

        return documents

load_data #

load_data(
    class_name: Optional[str] = None,
    properties: Optional[List[str]] = None,
    graphql_query: Optional[str] = None,
    separate_documents: Optional[bool] = True,
) -> List[Document]

从Weaviate加载数据。

如果在load_kwargs中找不到“graphql_query”,我们假设提供了“class_name”和“properties”。

Returns:

Type Description
List[Document]

List[Document]:文档的列表。

Source code in llama_index/readers/weaviate/base.py
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
    def load_data(
        self,
        class_name: Optional[str] = None,
        properties: Optional[List[str]] = None,
        graphql_query: Optional[str] = None,
        separate_documents: Optional[bool] = True,
    ) -> List[Document]:
        """从Weaviate加载数据。

如果在load_kwargs中找不到“graphql_query”,我们假设提供了“class_name”和“properties”。

Args:
    class_name(可选[str]):要从中检索文档的class_name。
    properties(可选[List[str]):要从文档中检索的属性。
    graphql_query(可选[str]):原始的GraphQL查询。
        我们假设查询是一个Get查询。
    separate_documents(可选[bool]):是否返回单独的文档。默认为True。

Returns:
    List[Document]:文档的列表。
"""
        if class_name is not None and properties is not None:
            props_txt = "\n".join(properties)
            graphql_query = f"""
            {{
                Get {{
                    {class_name} {{
                        {props_txt}
                    }}
                }}
            }}
            """
        elif graphql_query is not None:
            pass
        else:
            raise ValueError(
                "Either `class_name` and `properties` must be specified, "
                "or `graphql_query` must be specified."
            )

        response = self.client.query.raw(graphql_query)
        if "errors" in response:
            raise ValueError("Invalid query, got errors: {}".format(response["errors"]))

        data_response = response["data"]
        if "Get" not in data_response:
            raise ValueError("Invalid query response, must be a Get query.")

        if class_name is None:
            # infer class_name if only graphql_query was provided
            class_name = next(iter(data_response["Get"].keys()))
        entries = data_response["Get"][class_name]
        documents = []
        for entry in entries:
            embedding: Optional[List[float]] = None
            # for each entry, join properties into <property>:<value>
            # separated by newlines
            text_list = []
            for k, v in entry.items():
                if k == "_additional":
                    if "vector" in v:
                        embedding = v["vector"]
                    continue
                text_list.append(f"{k}: {v}")

            text = "\n".join(text_list)
            documents.append(Document(text=text, embedding=embedding))

        if not separate_documents:
            # join all documents into one
            text_list = [doc.get_content() for doc in documents]
            text = "\n\n".join(text_list)
            documents = [Document(text=text)]

        return documents