[docs]classSlackChatLoader(BaseChatLoader):"""Load `Slack` conversations from a dump zip file."""
[docs]def__init__(self,path:Union[str,Path],):""" Initialize the chat loader with the path to the exported Slack dump zip file. :param path: Path to the exported Slack dump zip file. """self.zip_path=pathifisinstance(path,Path)elsePath(path)ifnotself.zip_path.exists():raiseFileNotFoundError(f"File {self.zip_path} not found")
@staticmethoddef_load_single_chat_session(messages:List[Dict])->ChatSession:results:List[Union[AIMessage,HumanMessage]]=[]previous_sender=Noneformessageinmessages:ifnotisinstance(message,dict):continuetext=message.get("text","")timestamp=message.get("ts","")sender=message.get("user","")ifnotsender:continueskip_pattern=re.compile(r"<@U\d+> has joined the channel",flags=re.IGNORECASE)ifskip_pattern.match(text):continueifsender==previous_sender:results[-1].content+="\n\n"+textresults[-1].additional_kwargs["events"].append({"message_time":timestamp})else:results.append(HumanMessage(# type: ignore[call-arg]role=sender,content=text,additional_kwargs={"sender":sender,"events":[{"message_time":timestamp}],},))previous_sender=senderreturnChatSession(messages=results)@staticmethoddef_read_json(zip_file:zipfile.ZipFile,file_path:str)->List[dict]:"""Read JSON data from a zip subfile."""withzip_file.open(file_path,"r")asf:data=json.load(f)ifnotisinstance(data,list):raiseValueError(f"Expected list of dictionaries, got {type(data)}")returndata
[docs]deflazy_load(self)->Iterator[ChatSession]:""" Lazy load the chat sessions from the Slack dump file and yield them in the required format. :return: Iterator of chat sessions containing messages. """withzipfile.ZipFile(str(self.zip_path),"r")aszip_file:forfile_pathinzip_file.namelist():iffile_path.endswith(".json"):messages=self._read_json(zip_file,file_path)yieldself._load_single_chat_session(messages)