Skip to content

Whatsapp

WhatsappChatLoader #

Bases: BaseReader

WhatsApp聊天数据加载器。

Parameters:

Name Type Description Default
path str

WhatsApp聊天文件的路径。

required
Source code in llama_index/readers/whatsapp/base.py
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
class WhatsappChatLoader(BaseReader):
    """WhatsApp聊天数据加载器。

Args:
    path (str): WhatsApp聊天文件的路径。"""

    def __init__(self, path: str):
        """使用路径进行初始化。"""
        self.file_path = path

    def load_data(self) -> List[Document]:
        """
        解析WhatsApp文件为文档。
        """
        from chatminer.chatparsers import WhatsAppParser

        path = Path(self.file_path)

        parser = WhatsAppParser(path)
        parser.parse_file()
        df = parser.parsed_messages.get_df()

        logging.debug(f"> Number of messages: {len(df)}.")

        docs = []
        n = 0
        for row in df.itertuples():
            extra_info = {
                "source": str(path).split("/")[-1].replace(".txt", ""),
                "author": row.author,
                "timestamp": str(row.timestamp),
            }

            docs.append(
                Document(
                    text=str(row.timestamp)
                    + " "
                    + row.author
                    + ":"
                    + " "
                    + row.message,
                    extra_info=extra_info,
                )
            )

            n += 1
            logging.debug(f"Added {n} of {len(df)} messages.")

        logging.debug(f"> Document creation for {path} is complete.")
        return docs

load_data #

load_data() -> List[Document]

解析WhatsApp文件为文档。

Source code in llama_index/readers/whatsapp/base.py
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
def load_data(self) -> List[Document]:
    """
    解析WhatsApp文件为文档。
    """
    from chatminer.chatparsers import WhatsAppParser

    path = Path(self.file_path)

    parser = WhatsAppParser(path)
    parser.parse_file()
    df = parser.parsed_messages.get_df()

    logging.debug(f"> Number of messages: {len(df)}.")

    docs = []
    n = 0
    for row in df.itertuples():
        extra_info = {
            "source": str(path).split("/")[-1].replace(".txt", ""),
            "author": row.author,
            "timestamp": str(row.timestamp),
        }

        docs.append(
            Document(
                text=str(row.timestamp)
                + " "
                + row.author
                + ":"
                + " "
                + row.message,
                extra_info=extra_info,
            )
        )

        n += 1
        logging.debug(f"Added {n} of {len(df)} messages.")

    logging.debug(f"> Document creation for {path} is complete.")
    return docs