Source code for langchain_community.document_loaders.bigquery
from __future__ import annotations
from typing import TYPE_CHECKING, List, Optional
from langchain_core._api.deprecation import deprecated
from langchain_core.documents import Document
from langchain_community.document_loaders.base import BaseLoader
from langchain_community.utilities.vertexai import get_client_info
if TYPE_CHECKING:
from google.auth.credentials import Credentials
[docs]@deprecated(
since="0.0.32",
removal="0.3.0",
alternative_import="langchain_google_community.BigQueryLoader",
)
class BigQueryLoader(BaseLoader):
"""从谷歌云平台`BigQuery`加载。
每个文档代表结果的一行。`page_content_columns`写入文档的`page_content`中。`metadata_columns`写入文档的`metadata`中。默认情况下,所有列都写入`page_content`,而不写入`metadata`。"""
[docs] def __init__(
self,
query: str,
project: Optional[str] = None,
page_content_columns: Optional[List[str]] = None,
metadata_columns: Optional[List[str]] = None,
credentials: Optional[Credentials] = None,
):
"""初始化BigQuery文档加载器。
参数:
query: 在BigQuery中运行的查询。
project: 可选。要在其中运行查询的项目。
page_content_columns: 可选。要写入文档的`page_content`中的列。
metadata_columns: 可选。要写入文档的`metadata`中的列。
credentials: google.auth.credentials.Credentials,可选
用于访问Google API的凭据。使用此参数来覆盖默认凭据,例如使用Compute Engine
(`google.auth.compute_engine.Credentials`)或Service Account
(`google.oauth2.service_account.Credentials`)凭据。
"""
self.query = query
self.project = project
self.page_content_columns = page_content_columns
self.metadata_columns = metadata_columns
self.credentials = credentials
[docs] def load(self) -> List[Document]:
try:
from google.cloud import bigquery
except ImportError as ex:
raise ImportError(
"Could not import google-cloud-bigquery python package. "
"Please install it with `pip install google-cloud-bigquery`."
) from ex
bq_client = bigquery.Client(
credentials=self.credentials,
project=self.project,
client_info=get_client_info(module="bigquery"),
)
if not bq_client.project:
error_desc = (
"GCP project for Big Query is not set! Either provide a "
"`project` argument during BigQueryLoader instantiation, "
"or set a default project with `gcloud config set project` "
"command."
)
raise ValueError(error_desc)
query_result = bq_client.query(self.query).result()
docs: List[Document] = []
page_content_columns = self.page_content_columns
metadata_columns = self.metadata_columns
if page_content_columns is None:
page_content_columns = [column.name for column in query_result.schema]
if metadata_columns is None:
metadata_columns = []
for row in query_result:
page_content = "\n".join(
f"{k}: {v}" for k, v in row.items() if k in page_content_columns
)
metadata = {k: v for k, v in row.items() if k in metadata_columns}
doc = Document(page_content=page_content, metadata=metadata)
docs.append(doc)
return docs