o
    a+ i?                     @   sH   d dl mZmZmZmZmZ d dlmZ d dlm	Z	 G dd de	Z
dS )    )AnyDictIteratorOptionalSequence)Document)
BaseLoaderc                   @   sd   e Zd ZdZ					ddee dedee dee dee d	ee fd
dZde	e
 fddZdS )BrowserbaseLoaderzLoad pre-rendered web pages using a headless browser hosted on Browserbase.

    Depends on `browserbase` and `playwright` packages.
    Get your API key from https://browserbase.com
    FNurlstext_contentapi_key
project_id
session_idproxyc                 C   sV   || _ || _|| _|| _|| _zddlm} W n ty"   tdw ||d| _d S )Nr   )BrowserbasezZYou must run `pip install --upgrade browserbase playwright` to use the Browserbase loader.)r   )r
   r   r   r   r   browserbaser   ImportError)selfr
   r   r   r   r   r   r    r   |/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/langchain_community/document_loaders/browserbase.py__init__   s   	zBrowserbaseLoader.__init__returnc              	   c   s6   zddl m} W n ty   tdw | jD ]}| s}| jr+| jjj| jd}n"| js2t	dd| ji}| j
durCt| j
|d< | jjjdi |}|j|j}|jd }|jd }|| | jrp|d	}	t|	}
n| }t|}
|  |  t|
d
|idV  W d   n1 sw   Y  qdS )zLoad pages from URLsr   )sync_playwrightz\playwright is required for BrowserbaseLoader. Please run `pip install --upgrade playwright`.)idz*project_id is required to create a sessionr   Nr   bodyurl)Zpage_contentmetadatar   )Zplaywright.sync_apir   r   r
   r   r   sessionsretriever   
ValueErrorr   boolcreateZchromiumZconnect_over_cdpZconnect_urlZcontextsZpagesZgotor   Z
inner_textstrcontentcloser   )r   r   r   Z
playwrightsessionZsession_paramsZbrowsercontextpageZ	page_textr#   Z	page_htmlr   r   r   	lazy_load*   sJ   








zBrowserbaseLoader.lazy_load)FNNNN)__name__
__module____qualname____doc__r   r"   r    r   r   r   r   r(   r   r   r   r   r	      s*    	
r	   N)typingr   r   r   r   r   Zlangchain_core.documentsr   Z)langchain_community.document_loaders.baser   r	   r   r   r   r   <module>   s    