Source code for langchain_community.document_loaders.bigquery

from __future__ import annotations

from typing import TYPE_CHECKING, List, Optional

from langchain_core._api.deprecation import deprecated
from langchain_core.documents import Document

from langchain_community.document_loaders.base import BaseLoader
from langchain_community.utilities.vertexai import get_client_info

if TYPE_CHECKING:
    from google.auth.credentials import Credentials


[docs]@deprecated( since="0.0.32", removal="0.3.0", alternative_import="langchain_google_community.BigQueryLoader", ) class BigQueryLoader(BaseLoader): """从谷歌云平台`BigQuery`加载。 每个文档代表结果的一行。`page_content_columns`写入文档的`page_content`中。`metadata_columns`写入文档的`metadata`中。默认情况下,所有列都写入`page_content`,而不写入`metadata`。"""
[docs] def __init__( self, query: str, project: Optional[str] = None, page_content_columns: Optional[List[str]] = None, metadata_columns: Optional[List[str]] = None, credentials: Optional[Credentials] = None, ): """初始化BigQuery文档加载器。 参数: query: 在BigQuery中运行的查询。 project: 可选。要在其中运行查询的项目。 page_content_columns: 可选。要写入文档的`page_content`中的列。 metadata_columns: 可选。要写入文档的`metadata`中的列。 credentials: google.auth.credentials.Credentials,可选 用于访问Google API的凭据。使用此参数来覆盖默认凭据,例如使用Compute Engine (`google.auth.compute_engine.Credentials`)或Service Account (`google.oauth2.service_account.Credentials`)凭据。 """ self.query = query self.project = project self.page_content_columns = page_content_columns self.metadata_columns = metadata_columns self.credentials = credentials
[docs] def load(self) -> List[Document]: try: from google.cloud import bigquery except ImportError as ex: raise ImportError( "Could not import google-cloud-bigquery python package. " "Please install it with `pip install google-cloud-bigquery`." ) from ex bq_client = bigquery.Client( credentials=self.credentials, project=self.project, client_info=get_client_info(module="bigquery"), ) if not bq_client.project: error_desc = ( "GCP project for Big Query is not set! Either provide a " "`project` argument during BigQueryLoader instantiation, " "or set a default project with `gcloud config set project` " "command." ) raise ValueError(error_desc) query_result = bq_client.query(self.query).result() docs: List[Document] = [] page_content_columns = self.page_content_columns metadata_columns = self.metadata_columns if page_content_columns is None: page_content_columns = [column.name for column in query_result.schema] if metadata_columns is None: metadata_columns = [] for row in query_result: page_content = "\n".join( f"{k}: {v}" for k, v in row.items() if k in page_content_columns ) metadata = {k: v for k, v in row.items() if k in metadata_columns} doc = Document(page_content=page_content, metadata=metadata) docs.append(doc) return docs