Source code for langchain_community.document_loaders.duckdb_loader
from typing import Dict, List, Optional, cast
from langchain_core.documents import Document
from langchain_community.document_loaders.base import BaseLoader
[docs]class DuckDBLoader(BaseLoader):
"""从`DuckDB`加载。
每个文档代表结果的一行。`page_content_columns`写入文档的`page_content`。`metadata_columns`写入文档的`metadata`。默认情况下,所有列都写入`page_content`,而没有写入`metadata`。"""
[docs] def __init__(
self,
query: str,
database: str = ":memory:",
read_only: bool = False,
config: Optional[Dict[str, str]] = None,
page_content_columns: Optional[List[str]] = None,
metadata_columns: Optional[List[str]] = None,
):
"""参数:
query: 要执行的查询。
database: 要连接的数据库。默认为“:memory:”。
read_only: 是否以只读模式打开数据库。默认为False。
config: 要传递给数据库的配置选项字典。可选。
page_content_columns: 要写入文档的`page_content`中的列。可选。
metadata_columns: 要写入文档的`metadata`中的列。可选。
"""
self.query = query
self.database = database
self.read_only = read_only
self.config = config or {}
self.page_content_columns = page_content_columns
self.metadata_columns = metadata_columns
[docs] def load(self) -> List[Document]:
try:
import duckdb
except ImportError:
raise ImportError(
"Could not import duckdb python package. "
"Please install it with `pip install duckdb`."
)
docs = []
with duckdb.connect(
database=self.database, read_only=self.read_only, config=self.config
) as con:
query_result = con.execute(self.query)
results = query_result.fetchall()
description = cast(list, query_result.description)
field_names = [c[0] for c in description]
if self.page_content_columns is None:
page_content_columns = field_names
else:
page_content_columns = self.page_content_columns
if self.metadata_columns is None:
metadata_columns = []
else:
metadata_columns = self.metadata_columns
for result in results:
page_content = "\n".join(
f"{column}: {result[field_names.index(column)]}"
for column in page_content_columns
)
metadata = {
column: result[field_names.index(column)]
for column in metadata_columns
}
doc = Document(page_content=page_content, metadata=metadata)
docs.append(doc)
return docs