Source code for langchain_community.document_loaders.browserbase

from typing import Iterator, Optional, Sequence

from langchain_core.documents import Document

from langchain_community.document_loaders.base import BaseLoader


[docs]class BrowserbaseLoader(BaseLoader): """使用托管在Browserbase上的无头浏览器加载预渲染的网页。 依赖于`browserbase`包。 从https://browserbase.com 获取您的API密钥。"""
[docs] def __init__( self, urls: Sequence[str], text_content: bool = False, api_key: Optional[str] = None, project_id: Optional[str] = None, session_id: Optional[str] = None, proxy: Optional[bool] = None, ): self.urls = urls self.text_content = text_content self.session_id = session_id self.proxy = proxy try: from browserbase import Browserbase except ImportError: raise ImportError( "You must run " "`pip install --upgrade " "browserbase` " "to use the Browserbase loader." ) self.browserbase = Browserbase(api_key, project_id)
[docs] def lazy_load(self) -> Iterator[Document]: """从URL加载页面""" pages = self.browserbase.load_urls( self.urls, self.text_content, self.session_id, self.proxy ) for i, page in enumerate(pages): yield Document( page_content=page, metadata={ "url": self.urls[i], }, )