Source code for langchain_community.document_loaders.open_city_data
from typing import Iterator
from langchain_core.documents import Document
from langchain_community.document_loaders.base import BaseLoader
[docs]class OpenCityDataLoader(BaseLoader):
"""从`开放城市`加载。"""
[docs] def __init__(self, city_id: str, dataset_id: str, limit: int):
"""初始化数据集ID。
示例:https://dev.socrata.com/foundry/data.sfgov.org/vw6y-z8j6
例如,city_id = data.sfgov.org
例如,dataset_id = vw6y-z8j6
参数:
city_id:开放城市的城市标识符。
dataset_id:开放城市的数据集标识符。
limit:要加载的文档的最大数量。
"""
self.city_id = city_id
self.dataset_id = dataset_id
self.limit = limit
[docs] def lazy_load(self) -> Iterator[Document]:
"""延迟加载记录。"""
from sodapy import Socrata
client = Socrata(self.city_id, None)
results = client.get(self.dataset_id, limit=self.limit)
for record in results:
yield Document(
page_content=str(record),
metadata={
"source": self.city_id + "_" + self.dataset_id,
},
)