Source code for langchain_community.document_loaders.duckdb_loader

from typing import Dict, List, Optional, cast

from langchain_core.documents import Document

from langchain_community.document_loaders.base import BaseLoader


[docs]class DuckDBLoader(BaseLoader): """从`DuckDB`加载。 每个文档代表结果的一行。`page_content_columns`写入文档的`page_content`。`metadata_columns`写入文档的`metadata`。默认情况下,所有列都写入`page_content`,而没有写入`metadata`。"""
[docs] def __init__( self, query: str, database: str = ":memory:", read_only: bool = False, config: Optional[Dict[str, str]] = None, page_content_columns: Optional[List[str]] = None, metadata_columns: Optional[List[str]] = None, ): """参数: query: 要执行的查询。 database: 要连接的数据库。默认为“:memory:”。 read_only: 是否以只读模式打开数据库。默认为False。 config: 要传递给数据库的配置选项字典。可选。 page_content_columns: 要写入文档的`page_content`中的列。可选。 metadata_columns: 要写入文档的`metadata`中的列。可选。 """ self.query = query self.database = database self.read_only = read_only self.config = config or {} self.page_content_columns = page_content_columns self.metadata_columns = metadata_columns
[docs] def load(self) -> List[Document]: try: import duckdb except ImportError: raise ImportError( "Could not import duckdb python package. " "Please install it with `pip install duckdb`." ) docs = [] with duckdb.connect( database=self.database, read_only=self.read_only, config=self.config ) as con: query_result = con.execute(self.query) results = query_result.fetchall() description = cast(list, query_result.description) field_names = [c[0] for c in description] if self.page_content_columns is None: page_content_columns = field_names else: page_content_columns = self.page_content_columns if self.metadata_columns is None: metadata_columns = [] else: metadata_columns = self.metadata_columns for result in results: page_content = "\n".join( f"{column}: {result[field_names.index(column)]}" for column in page_content_columns ) metadata = { column: result[field_names.index(column)] for column in metadata_columns } doc = Document(page_content=page_content, metadata=metadata) docs.append(doc) return docs