Skip to content

Pdb

PdbAbstractReader #

Bases: BaseReader

蛋白质数据银行条目的主要引用摘要阅读器。

Source code in llama_index/readers/pdb/base.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
class PdbAbstractReader(BaseReader):
    """蛋白质数据银行条目的主要引用摘要阅读器。"""

    def __init__(self) -> None:
        super().__init__()

    def load_data(self, pdb_ids: List[str]) -> List[Document]:
        """从RCSB或EBI REST API加载数据。

Args:
    pdb_ids(List[str]):需要读取主要引用摘要的PDB id列表。
"""
        results = []
        for pdb_id in pdb_ids:
            title, abstracts = get_pdb_abstract(pdb_id)
            primary_citation = abstracts[title]
            abstract = primary_citation["abstract"]
            abstract_text = "\n".join(
                ["\n".join([str(k), str(v)]) for k, v in abstract.items()]
            )
            results.append(
                Document(
                    text=abstract_text,
                    extra_info={"pdb_id": pdb_id, "primary_citation": primary_citation},
                )
            )
        return results

load_data #

load_data(pdb_ids: List[str]) -> List[Document]

从RCSB或EBI REST API加载数据。

Source code in llama_index/readers/pdb/base.py
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
    def load_data(self, pdb_ids: List[str]) -> List[Document]:
        """从RCSB或EBI REST API加载数据。

Args:
    pdb_ids(List[str]):需要读取主要引用摘要的PDB id列表。
"""
        results = []
        for pdb_id in pdb_ids:
            title, abstracts = get_pdb_abstract(pdb_id)
            primary_citation = abstracts[title]
            abstract = primary_citation["abstract"]
            abstract_text = "\n".join(
                ["\n".join([str(k), str(v)]) for k, v in abstract.items()]
            )
            results.append(
                Document(
                    text=abstract_text,
                    extra_info={"pdb_id": pdb_id, "primary_citation": primary_citation},
                )
            )
        return results