Source code for langchain_community.document_loaders.open_city_data

from typing import Iterator

from langchain_core.documents import Document

from langchain_community.document_loaders.base import BaseLoader


[docs]class OpenCityDataLoader(BaseLoader): """从`开放城市`加载。"""
[docs] def __init__(self, city_id: str, dataset_id: str, limit: int): """初始化数据集ID。 示例:https://dev.socrata.com/foundry/data.sfgov.org/vw6y-z8j6 例如,city_id = data.sfgov.org 例如,dataset_id = vw6y-z8j6 参数: city_id:开放城市的城市标识符。 dataset_id:开放城市的数据集标识符。 limit:要加载的文档的最大数量。 """ self.city_id = city_id self.dataset_id = dataset_id self.limit = limit
[docs] def lazy_load(self) -> Iterator[Document]: """延迟加载记录。""" from sodapy import Socrata client = Socrata(self.city_id, None) results = client.get(self.dataset_id, limit=self.limit) for record in results: yield Document( page_content=str(record), metadata={ "source": self.city_id + "_" + self.dataset_id, }, )