Source code for langchain_community.document_loaders.trello

from __future__ import annotations

from typing import TYPE_CHECKING, Any, Iterator, Literal, Optional, Tuple

from langchain_core.documents import Document
from langchain_core.utils import get_from_env

from langchain_community.document_loaders.base import BaseLoader

if TYPE_CHECKING:
    from trello import Board, Card, TrelloClient


[docs]class TrelloLoader(BaseLoader): """从`Trello`板加载卡片。"""
[docs] def __init__( self, client: TrelloClient, board_name: str, *, include_card_name: bool = True, include_comments: bool = True, include_checklist: bool = True, card_filter: Literal["closed", "open", "all"] = "all", extra_metadata: Tuple[str, ...] = ("due_date", "labels", "list", "closed"), ): """初始化Trello加载器。 参数: client:Trello API客户端。 board_name:Trello板的名称。 include_card_name:是否在文档中包含卡片的名称。 include_comments:是否在文档中包含卡片的评论。 include_checklist:是否在文档中包含卡片的清单。 card_filter:卡片状态的过滤器。有效值为"closed","open","all"。 extra_metadata:要包含为文档元数据的附加元数据字段列表。有效值为"due_date","labels","list","closed"。 """ self.client = client self.board_name = board_name self.include_card_name = include_card_name self.include_comments = include_comments self.include_checklist = include_checklist self.extra_metadata = extra_metadata self.card_filter = card_filter
[docs] @classmethod def from_credentials( cls, board_name: str, *, api_key: Optional[str] = None, token: Optional[str] = None, **kwargs: Any, ) -> TrelloLoader: """方便的构造函数,为您构建TrelloClient的初始化参数。 参数: board_name: Trello板的名称。 api_key: Trello API密钥。也可以指定为环境变量TRELLO_API_KEY。 token: Trello令牌。也可以指定为环境变量TRELLO_TOKEN。 include_card_name: 是否在文档中包含卡片的名称。 include_comments: 是否在文档中包含卡片的评论。 include_checklist: 是否在文档中包含卡片的清单。 card_filter: 卡片状态的过滤器。有效值为"closed","open","all"。 extra_metadata: 要包含为文档元数据的附加元数据字段列表。有效值为"due_date","labels","list","closed"。 """ try: from trello import TrelloClient # type: ignore except ImportError as ex: raise ImportError( "Could not import trello python package. " "Please install it with `pip install py-trello`." ) from ex api_key = api_key or get_from_env("api_key", "TRELLO_API_KEY") token = token or get_from_env("token", "TRELLO_TOKEN") client = TrelloClient(api_key=api_key, token=token) return cls(client, board_name, **kwargs)
[docs] def lazy_load(self) -> Iterator[Document]: """从指定的Trello板加载所有卡片。 您可以使用可选参数过滤包括的卡片、元数据和文本。 返回: 一个文档列表,每个文档对应板上的一个卡片。 """ try: from bs4 import BeautifulSoup # noqa: F401 except ImportError as ex: raise ImportError( "`beautifulsoup4` package not found, please run" " `pip install beautifulsoup4`" ) from ex board = self._get_board() # Create a dictionary with the list IDs as keys and the list names as values list_dict = {list_item.id: list_item.name for list_item in board.list_lists()} # Get Cards on the board cards = board.get_cards(card_filter=self.card_filter) for card in cards: yield self._card_to_doc(card, list_dict)
def _get_board(self) -> Board: # Find the first board with a matching name board = next( (b for b in self.client.list_boards() if b.name == self.board_name), None ) if not board: raise ValueError(f"Board `{self.board_name}` not found.") return board def _card_to_doc(self, card: Card, list_dict: dict) -> Document: from bs4 import BeautifulSoup # type: ignore text_content = "" if self.include_card_name: text_content = card.name + "\n" if card.description.strip(): text_content += BeautifulSoup(card.description, "lxml").get_text() if self.include_checklist: # Get all the checklist items on the card for checklist in card.checklists: if checklist.items: items = [ f"{item['name']}:{item['state']}" for item in checklist.items ] text_content += f"\n{checklist.name}\n" + "\n".join(items) if self.include_comments: # Get all the comments on the card comments = [ BeautifulSoup(comment["data"]["text"], "lxml").get_text() for comment in card.comments ] text_content += "Comments:" + "\n".join(comments) # Default metadata fields metadata = { "title": card.name, "id": card.id, "url": card.url, } # Extra metadata fields. Card object is not subscriptable. if "labels" in self.extra_metadata: metadata["labels"] = [label.name for label in card.labels] if "list" in self.extra_metadata: if card.list_id in list_dict: metadata["list"] = list_dict[card.list_id] if "closed" in self.extra_metadata: metadata["closed"] = card.closed if "due_date" in self.extra_metadata: metadata["due_date"] = card.due_date return Document(page_content=text_content, metadata=metadata)