Kibela

KibelaReader #

Bases: BaseReader

Kibela阅读器。

从Kibela读取页面。

Parameters:

Name	Type	Description	Default
`team`	`str`	Kibela团队。	required
`token`	`str`	Kibela API令牌。	required

Source code in llama_index/readers/kibela/base.py

class KibelaReader(BaseReader):
    """Kibela阅读器。

    从Kibela读取页面。

    Args:
        team (str): Kibela团队。
        token (str): Kibela API令牌。"""

    def __init__(self, team: str, token: str) -> None:
        """使用参数进行初始化。"""
        from gql import Client
        from gql.transport.aiohttp import AIOHTTPTransport

        self.url = f"https://{team}.kibe.la/api/v1"
        self.headers = {"Authorization": f"Bearer {token}"}
        transport = AIOHTTPTransport(url=self.url, headers=self.headers)
        self.client = Client(transport=transport, fetch_schema_from_transport=True)

    def request(self, query: str, params: dict) -> Dict:
        from gql import gql

        q = gql(query)
        return self.client.execute(q, variable_values=params)

    def load_data(self) -> List[Document]:
        """从Kibela加载数据。

返回：
    List[Document]: 文档列表。
"""
        query = """
        query getNotes($after: String) {
          notes(first: 100, after: $after) {
            totalCount
            pageInfo {
              endCursor
              startCursor
              hasNextPage
            }
            edges {
              cursor
              node {
                id
                url
                title
                content
              }
            }
          }
        }
        """
        params = {"after": ""}
        has_next = True
        documents = []
        # Due to the request limit of 10 requests per second on the Kibela API, we do not process in parallel.
        # See https://github.com/kibela/kibela-api-v1-document#1%E7%A7%92%E3%81%82%E3%81%9F%E3%82%8A%E3%81%AE%E3%83%AA%E3%82%AF%E3%82%A8%E3%82%B9%E3%83%88%E6%95%B0
        while has_next:
            res = self.request(query, params)
            note_conn = parse_obj_as(Connection[Note], res["notes"])
            for note in note_conn.edges:
                doc = (
                    f"---\nurl: {note.node.url}\ntitle:"
                    f" {note.node.title}\n---\ncontent:\n{note.node.content}\n"
                )
                documents.append(Document(text=doc))
            has_next = note_conn.pageInfo.hasNextPage
            params = {"after": note_conn.pageInfo.endCursor}

        return documents

load_data #

load_data() -> List[Document]

从Kibela加载数据。

返回： List[Document]: 文档列表。

Source code in llama_index/readers/kibela/base.py

    def load_data(self) -> List[Document]:
        """从Kibela加载数据。

返回：
    List[Document]: 文档列表。
"""
        query = """
        query getNotes($after: String) {
          notes(first: 100, after: $after) {
            totalCount
            pageInfo {
              endCursor
              startCursor
              hasNextPage
            }
            edges {
              cursor
              node {
                id
                url
                title
                content
              }
            }
          }
        }
        """
        params = {"after": ""}
        has_next = True
        documents = []
        # Due to the request limit of 10 requests per second on the Kibela API, we do not process in parallel.
        # See https://github.com/kibela/kibela-api-v1-document#1%E7%A7%92%E3%81%82%E3%81%9F%E3%82%8A%E3%81%AE%E3%83%AA%E3%82%AF%E3%82%A8%E3%82%B9%E3%83%88%E6%95%B0
        while has_next:
            res = self.request(query, params)
            note_conn = parse_obj_as(Connection[Note], res["notes"])
            for note in note_conn.edges:
                doc = (
                    f"---\nurl: {note.node.url}\ntitle:"
                    f" {note.node.title}\n---\ncontent:\n{note.node.content}\n"
                )
                documents.append(Document(text=doc))
            has_next = note_conn.pageInfo.hasNextPage
            params = {"after": note_conn.pageInfo.endCursor}

        return documents