Skip to content

Kibela

KibelaReader #

Bases: BaseReader

Kibela阅读器。

从Kibela读取页面。

Parameters:

Name Type Description Default
team str

Kibela团队。

required
token str

Kibela API令牌。

required
Source code in llama_index/readers/kibela/base.py
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
class KibelaReader(BaseReader):
    """Kibela阅读器。

    从Kibela读取页面。

    Args:
        team (str): Kibela团队。
        token (str): Kibela API令牌。"""

    def __init__(self, team: str, token: str) -> None:
        """使用参数进行初始化。"""
        from gql import Client
        from gql.transport.aiohttp import AIOHTTPTransport

        self.url = f"https://{team}.kibe.la/api/v1"
        self.headers = {"Authorization": f"Bearer {token}"}
        transport = AIOHTTPTransport(url=self.url, headers=self.headers)
        self.client = Client(transport=transport, fetch_schema_from_transport=True)

    def request(self, query: str, params: dict) -> Dict:
        from gql import gql

        q = gql(query)
        return self.client.execute(q, variable_values=params)

    def load_data(self) -> List[Document]:
        """从Kibela加载数据。

返回:
    List[Document]: 文档列表。
"""
        query = """
        query getNotes($after: String) {
          notes(first: 100, after: $after) {
            totalCount
            pageInfo {
              endCursor
              startCursor
              hasNextPage
            }
            edges {
              cursor
              node {
                id
                url
                title
                content
              }
            }
          }
        }
        """
        params = {"after": ""}
        has_next = True
        documents = []
        # Due to the request limit of 10 requests per second on the Kibela API, we do not process in parallel.
        # See https://github.com/kibela/kibela-api-v1-document#1%E7%A7%92%E3%81%82%E3%81%9F%E3%82%8A%E3%81%AE%E3%83%AA%E3%82%AF%E3%82%A8%E3%82%B9%E3%83%88%E6%95%B0
        while has_next:
            res = self.request(query, params)
            note_conn = parse_obj_as(Connection[Note], res["notes"])
            for note in note_conn.edges:
                doc = (
                    f"---\nurl: {note.node.url}\ntitle:"
                    f" {note.node.title}\n---\ncontent:\n{note.node.content}\n"
                )
                documents.append(Document(text=doc))
            has_next = note_conn.pageInfo.hasNextPage
            params = {"after": note_conn.pageInfo.endCursor}

        return documents

load_data #

load_data() -> List[Document]

从Kibela加载数据。

返回: List[Document]: 文档列表。

Source code in llama_index/readers/kibela/base.py
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
    def load_data(self) -> List[Document]:
        """从Kibela加载数据。

返回:
    List[Document]: 文档列表。
"""
        query = """
        query getNotes($after: String) {
          notes(first: 100, after: $after) {
            totalCount
            pageInfo {
              endCursor
              startCursor
              hasNextPage
            }
            edges {
              cursor
              node {
                id
                url
                title
                content
              }
            }
          }
        }
        """
        params = {"after": ""}
        has_next = True
        documents = []
        # Due to the request limit of 10 requests per second on the Kibela API, we do not process in parallel.
        # See https://github.com/kibela/kibela-api-v1-document#1%E7%A7%92%E3%81%82%E3%81%9F%E3%82%8A%E3%81%AE%E3%83%AA%E3%82%AF%E3%82%A8%E3%82%B9%E3%83%88%E6%95%B0
        while has_next:
            res = self.request(query, params)
            note_conn = parse_obj_as(Connection[Note], res["notes"])
            for note in note_conn.edges:
                doc = (
                    f"---\nurl: {note.node.url}\ntitle:"
                    f" {note.node.title}\n---\ncontent:\n{note.node.content}\n"
                )
                documents.append(Document(text=doc))
            has_next = note_conn.pageInfo.hasNextPage
            params = {"after": note_conn.pageInfo.endCursor}

        return documents