Bases: BaseExtractor
Pydantic程序提取器。
使用LLM来提取Pydantic对象。以字典形式返回该对象的属性。
Source code in llama_index/core/extractors/metadata_extractors.py
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491 | class PydanticProgramExtractor(BaseExtractor):
"""Pydantic程序提取器。
使用LLM来提取Pydantic对象。以字典形式返回该对象的属性。"""
program: BasePydanticProgram = Field(
..., description="Pydantic program to extract."
)
input_key: str = Field(
default="input",
description=(
"Key to use as input to the program (the program "
"template string must expose this key)."
),
)
extract_template_str: str = Field(
default=DEFAULT_EXTRACT_TEMPLATE_STR,
description="Template to use for extraction.",
)
@classmethod
def class_name(cls) -> str:
return "PydanticModelExtractor"
async def _acall_program(self, node: BaseNode) -> Dict[str, Any]:
"""在一个节点上调用该程序。"""
if self.is_text_node_only and not isinstance(node, TextNode):
return {}
extract_str = self.extract_template_str.format(
context_str=node.get_content(metadata_mode=self.metadata_mode),
class_name=self.program.output_cls.__name__,
)
ret_object = await self.program.acall(**{self.input_key: extract_str})
return ret_object.dict()
async def aextract(self, nodes: Sequence[BaseNode]) -> List[Dict]:
"""提取pydantic程序。"""
program_jobs = []
for node in nodes:
program_jobs.append(self._acall_program(node))
metadata_list: List[Dict] = await run_jobs(
program_jobs, show_progress=self.show_progress, workers=self.num_workers
)
return metadata_list
|
aextract(nodes: Sequence[BaseNode]) -> List[Dict]
提取pydantic程序。
Source code in llama_index/core/extractors/metadata_extractors.py
481
482
483
484
485
486
487
488
489
490
491 | async def aextract(self, nodes: Sequence[BaseNode]) -> List[Dict]:
"""提取pydantic程序。"""
program_jobs = []
for node in nodes:
program_jobs.append(self._acall_program(node))
metadata_list: List[Dict] = await run_jobs(
program_jobs, show_progress=self.show_progress, workers=self.num_workers
)
return metadata_list
|