Source code for langchain_community.document_loaders.trello
from __future__ import annotations
from typing import TYPE_CHECKING, Any, Iterator, Literal, Optional, Tuple
from langchain_core.documents import Document
from langchain_core.utils import get_from_env
from langchain_community.document_loaders.base import BaseLoader
if TYPE_CHECKING:
from trello import Board, Card, TrelloClient
[docs]class TrelloLoader(BaseLoader):
"""从`Trello`板加载卡片。"""
[docs] def __init__(
self,
client: TrelloClient,
board_name: str,
*,
include_card_name: bool = True,
include_comments: bool = True,
include_checklist: bool = True,
card_filter: Literal["closed", "open", "all"] = "all",
extra_metadata: Tuple[str, ...] = ("due_date", "labels", "list", "closed"),
):
"""初始化Trello加载器。
参数:
client:Trello API客户端。
board_name:Trello板的名称。
include_card_name:是否在文档中包含卡片的名称。
include_comments:是否在文档中包含卡片的评论。
include_checklist:是否在文档中包含卡片的清单。
card_filter:卡片状态的过滤器。有效值为"closed","open","all"。
extra_metadata:要包含为文档元数据的附加元数据字段列表。有效值为"due_date","labels","list","closed"。
"""
self.client = client
self.board_name = board_name
self.include_card_name = include_card_name
self.include_comments = include_comments
self.include_checklist = include_checklist
self.extra_metadata = extra_metadata
self.card_filter = card_filter
[docs] @classmethod
def from_credentials(
cls,
board_name: str,
*,
api_key: Optional[str] = None,
token: Optional[str] = None,
**kwargs: Any,
) -> TrelloLoader:
"""方便的构造函数,为您构建TrelloClient的初始化参数。
参数:
board_name: Trello板的名称。
api_key: Trello API密钥。也可以指定为环境变量TRELLO_API_KEY。
token: Trello令牌。也可以指定为环境变量TRELLO_TOKEN。
include_card_name: 是否在文档中包含卡片的名称。
include_comments: 是否在文档中包含卡片的评论。
include_checklist: 是否在文档中包含卡片的清单。
card_filter: 卡片状态的过滤器。有效值为"closed","open","all"。
extra_metadata: 要包含为文档元数据的附加元数据字段列表。有效值为"due_date","labels","list","closed"。
"""
try:
from trello import TrelloClient # type: ignore
except ImportError as ex:
raise ImportError(
"Could not import trello python package. "
"Please install it with `pip install py-trello`."
) from ex
api_key = api_key or get_from_env("api_key", "TRELLO_API_KEY")
token = token or get_from_env("token", "TRELLO_TOKEN")
client = TrelloClient(api_key=api_key, token=token)
return cls(client, board_name, **kwargs)
[docs] def lazy_load(self) -> Iterator[Document]:
"""从指定的Trello板加载所有卡片。
您可以使用可选参数过滤包括的卡片、元数据和文本。
返回:
一个文档列表,每个文档对应板上的一个卡片。
"""
try:
from bs4 import BeautifulSoup # noqa: F401
except ImportError as ex:
raise ImportError(
"`beautifulsoup4` package not found, please run"
" `pip install beautifulsoup4`"
) from ex
board = self._get_board()
# Create a dictionary with the list IDs as keys and the list names as values
list_dict = {list_item.id: list_item.name for list_item in board.list_lists()}
# Get Cards on the board
cards = board.get_cards(card_filter=self.card_filter)
for card in cards:
yield self._card_to_doc(card, list_dict)
def _get_board(self) -> Board:
# Find the first board with a matching name
board = next(
(b for b in self.client.list_boards() if b.name == self.board_name), None
)
if not board:
raise ValueError(f"Board `{self.board_name}` not found.")
return board
def _card_to_doc(self, card: Card, list_dict: dict) -> Document:
from bs4 import BeautifulSoup # type: ignore
text_content = ""
if self.include_card_name:
text_content = card.name + "\n"
if card.description.strip():
text_content += BeautifulSoup(card.description, "lxml").get_text()
if self.include_checklist:
# Get all the checklist items on the card
for checklist in card.checklists:
if checklist.items:
items = [
f"{item['name']}:{item['state']}" for item in checklist.items
]
text_content += f"\n{checklist.name}\n" + "\n".join(items)
if self.include_comments:
# Get all the comments on the card
comments = [
BeautifulSoup(comment["data"]["text"], "lxml").get_text()
for comment in card.comments
]
text_content += "Comments:" + "\n".join(comments)
# Default metadata fields
metadata = {
"title": card.name,
"id": card.id,
"url": card.url,
}
# Extra metadata fields. Card object is not subscriptable.
if "labels" in self.extra_metadata:
metadata["labels"] = [label.name for label in card.labels]
if "list" in self.extra_metadata:
if card.list_id in list_dict:
metadata["list"] = list_dict[card.list_id]
if "closed" in self.extra_metadata:
metadata["closed"] = card.closed
if "due_date" in self.extra_metadata:
metadata["due_date"] = card.due_date
return Document(page_content=text_content, metadata=metadata)