Source code for langchain_community.document_loaders.joplin

import json
import urllib
from datetime import datetime
from typing import Iterator, List, Optional

from langchain_core.documents import Document
from langchain_core.utils import get_from_env

from langchain_community.document_loaders.base import BaseLoader

LINK_NOTE_TEMPLATE = "joplin://x-callback-url/openNote?id={id}"


[docs]class JoplinLoader(BaseLoader): """从`Joplin`加载笔记。 要使用此加载程序,您需要运行 Joplin,并启用 Web Clipper(在应用设置中查找“Web Clipper”)。 要获取访问令牌,您需要转到 Web Clipper 选项,然后在“高级选项”下找到访问令牌。 您可以在此处找到有关 Web Clipper 服务的更多信息: https://joplinapp.org/clipper/"""
[docs] def __init__( self, access_token: Optional[str] = None, port: int = 41184, host: str = "localhost", ) -> None: """参数: access_token:要使用的访问令牌。 port:Web Clipper 服务运行的端口。默认为41184。 host:Web Clipper 服务运行的主机。默认为localhost。 """ access_token = access_token or get_from_env( "access_token", "JOPLIN_ACCESS_TOKEN" ) base_url = f"http://{host}:{port}" self._get_note_url = ( f"{base_url}/notes?token={access_token}" f"&fields=id,parent_id,title,body,created_time,updated_time&page={{page}}" ) self._get_folder_url = ( f"{base_url}/folders/{{id}}?token={access_token}&fields=title" ) self._get_tag_url = ( f"{base_url}/notes/{{id}}/tags?token={access_token}&fields=title" )
def _get_notes(self) -> Iterator[Document]: has_more = True page = 1 while has_more: req_note = urllib.request.Request(self._get_note_url.format(page=page)) with urllib.request.urlopen(req_note) as response: json_data = json.loads(response.read().decode()) for note in json_data["items"]: metadata = { "source": LINK_NOTE_TEMPLATE.format(id=note["id"]), "folder": self._get_folder(note["parent_id"]), "tags": self._get_tags(note["id"]), "title": note["title"], "created_time": self._convert_date(note["created_time"]), "updated_time": self._convert_date(note["updated_time"]), } yield Document(page_content=note["body"], metadata=metadata) has_more = json_data["has_more"] page += 1 def _get_folder(self, folder_id: str) -> str: req_folder = urllib.request.Request(self._get_folder_url.format(id=folder_id)) with urllib.request.urlopen(req_folder) as response: json_data = json.loads(response.read().decode()) return json_data["title"] def _get_tags(self, note_id: str) -> List[str]: req_tag = urllib.request.Request(self._get_tag_url.format(id=note_id)) with urllib.request.urlopen(req_tag) as response: json_data = json.loads(response.read().decode()) return [tag["title"] for tag in json_data["items"]] def _convert_date(self, date: int) -> str: return datetime.fromtimestamp(date / 1000).strftime("%Y-%m-%d %H:%M:%S")
[docs] def lazy_load(self) -> Iterator[Document]: yield from self._get_notes()