Skip to content

Couchdb

SimpleCouchDBReader #

Bases: BaseReader

简单的CouchDB读取器。

将每个CouchDB文档连接成LlamaIndex使用的文档。

Parameters:

Name Type Description Default
couchdb_url str

CouchDB的完整URL。

None
max_docs int

要加载的最大文档数。

1000
Source code in llama_index/readers/couchdb/base.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
class SimpleCouchDBReader(BaseReader):
    """简单的CouchDB读取器。

将每个CouchDB文档连接成LlamaIndex使用的文档。

Args:
    couchdb_url (str): CouchDB的完整URL。
    max_docs (int): 要加载的最大文档数。"""

    def __init__(
        self,
        user: str,
        pwd: str,
        host: str,
        port: int,
        couchdb_url: Optional[Dict] = None,
        max_docs: int = 1000,
    ) -> None:
        """使用参数进行初始化。"""
        if couchdb_url is not None:
            self.client = couchdb3.Server(couchdb_url)
        else:
            self.client = couchdb3.Server(f"http://{user}:{pwd}@{host}:{port}")
        self.max_docs = max_docs

    def load_data(self, db_name: str, query: Optional[str] = None) -> List[Document]:
        """从输入目录加载数据。

Args:
    db_name(str):数据库的名称。
    query(可选[str]):用于过滤文档的查询。
        默认为None

Returns:
    List[Document]:文档列表。
"""
        documents = []
        db = self.client.get(db_name)
        if query is None:
            # if no query is specified, return all docs in database
            logging.debug("showing all docs")
            results = db.view("_all_docs", include_docs=True)
        else:
            logging.debug("executing query")
            results = db.find(query)

        if not isinstance(results, dict):
            logging.debug(results.rows)
        else:
            logging.debug(results)

        # check if more than one result
        if (
            not isinstance(results, dict)
            and hasattr(results, "rows")
            and results.rows is not None
        ):
            for row in results.rows:
                # check that the id field exists
                if "id" not in row:
                    raise ValueError("`id` field not found in CouchDB document.")
                documents.append(Document(text=json.dumps(row.doc)))
        else:
            # only one result
            if results.get("docs") is not None:
                for item in results.get("docs"):
                    # check that the _id field exists
                    if "_id" not in item:
                        raise ValueError("`_id` field not found in CouchDB document.")
                    documents.append(Document(text=json.dumps(item)))

        return documents

load_data #

load_data(
    db_name: str, query: Optional[str] = None
) -> List[Document]

从输入目录加载数据。

Returns:

Type Description
List[Document]

List[Document]:文档列表。

Source code in llama_index/readers/couchdb/base.py
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
    def load_data(self, db_name: str, query: Optional[str] = None) -> List[Document]:
        """从输入目录加载数据。

Args:
    db_name(str):数据库的名称。
    query(可选[str]):用于过滤文档的查询。
        默认为None

Returns:
    List[Document]:文档列表。
"""
        documents = []
        db = self.client.get(db_name)
        if query is None:
            # if no query is specified, return all docs in database
            logging.debug("showing all docs")
            results = db.view("_all_docs", include_docs=True)
        else:
            logging.debug("executing query")
            results = db.find(query)

        if not isinstance(results, dict):
            logging.debug(results.rows)
        else:
            logging.debug(results)

        # check if more than one result
        if (
            not isinstance(results, dict)
            and hasattr(results, "rows")
            and results.rows is not None
        ):
            for row in results.rows:
                # check that the id field exists
                if "id" not in row:
                    raise ValueError("`id` field not found in CouchDB document.")
                documents.append(Document(text=json.dumps(row.doc)))
        else:
            # only one result
            if results.get("docs") is not None:
                for item in results.get("docs"):
                    # check that the _id field exists
                    if "_id" not in item:
                        raise ValueError("`_id` field not found in CouchDB document.")
                    documents.append(Document(text=json.dumps(item)))

        return documents