o
    |qi                     @   sf   d dl Z d dlZd dlmZmZmZmZmZmZ d dl	m
Z
 d dlmZ eeZG dd deZdS )    N)AsyncIteratorDictIteratorListOptionalSequence)Document)
BaseLoaderc                   @   s   e Zd ZdZddddddedededee d	eee  d
eee  deddfddZ	de
e fddZdee fddZdee fddZde
e fddZdedefddZdee fddZ	d"dedee dedefd d!ZdS )#MongodbLoaderzLoad MongoDB documents.NT)filter_criteriafield_namesmetadata_names!include_db_collection_in_metadataconnection_stringdb_namecollection_namer   r   r   r   returnc          
   
   C   s   zddl m} W n ty }	 ztd|	d}	~	ww |s td|s&td|s,td||| _|| _|| _|p:g | _|p?i | _|pDg | _	|| _
| j|| _| j|| _dS )a  
        Initializes the MongoDB loader with necessary database connection
        details and configurations.

        Args:
            connection_string (str): MongoDB connection URI.
            db_name (str):Name of the database to connect to.
            collection_name (str): Name of the collection to fetch documents from.
            filter_criteria (Optional[Dict]): MongoDB filter criteria for querying
            documents.
            field_names (Optional[Sequence[str]]): List of field names to retrieve
            from documents.
            metadata_names (Optional[Sequence[str]]): Additional metadata fields to
            extract from documents.
            include_db_collection_in_metadata (bool): Flag to include database and
            collection names in metadata.

        Raises:
            ImportError: If the motor library is not installed.
            ValueError: If any necessary argument is missing.
        r   )AsyncIOMotorClientzBCannot import from motor, please install with `pip install motor`.Nz#connection_string must be provided.zdb_name must be provided.z!collection_name must be provided.)Zmotor.motor_asyncior   ImportError
ValueErrorclientr   r   r   r   r   r   Zget_databasedbZget_collection
collection)
selfr   r   r   r   r   r   r   r   e r   s/home/app/PaddleOCR-VL/.venv_paddleocr/lib/python3.10/site-packages/langchain_community/document_loaders/mongodb.py__init__   s2    



zMongodbLoader.__init__c                 C   s   t |  S )a  Load data into Document objects.

        Attention:

        This implementation starts an asyncio event loop which
        will only work if running in a sync env. In an async env, it should
        fail since there is already an event loop running.

        This code should be updated to kick off the event loop from a separate
        thread if running within an async context.
        )asynciorunaload)r   r   r   r   loadJ   s   zMongodbLoader.loadc                 c   sn    zt  }W n ty   t  }t | Y nw |  }	 z|| }|V  W n
 ty5   Y dS w q)a  A lazy loader for MongoDB documents.

        Attention:

        This implementation starts an asyncio event loop which
        will only work if running in a sync env. In an async env, it should
        fail since there is already an event loop running.

        This code should be updated to kick off the event loop from a separate
        thread if running within an async context.

        Yields:
            Document: A document from the MongoDB collection.
        TN)	r   get_running_loopRuntimeErrornew_event_loopset_event_loop
alazy_loadrun_until_complete	__anext__StopAsyncIteration)r   Z
event_loopasync_generatordocumentr   r   r   	lazy_loadX   s    
zMongodbLoader.lazy_loadc                 C  s:   |   }| j| j|2 z3 dH W }| |V  q6 dS )zAsynchronously yields Document objects one at a time.

        Yields:
            Document: A document from the MongoDB collection.
        N)_construct_projectionr   findr   _process_document)r   
projectiondocr   r   r   r&   v   s
   zMongodbLoader.alazy_loadc                    s~   g }| j | jI dH }|  }| j | j|2 z3 dH W }|| | q6 t||kr=t	dt| d| d |S )z0Asynchronously loads data into Document objects.Nz6Only partial collection of documents returned. Loaded z docs, expected .)
r   Zcount_documentsr   r-   r.   appendr/   lenloggerwarning)r   resultZ
total_docsr0   r1   r   r   r   r       s    zMongodbLoader.aloadr1   c                 C   sz   | j || jdd}| jr|| j| jd | jdur3| j || jdd}dd | D }d|}nt	|}t
||dS )	zProcess a single MongoDB document into a Document object.

        Args:
            doc: The MongoDB document dictionary to process into a Document object.
         )default)Zdatabaser   Nc                 S   s   g | ]}t |qS r   )str).0valuer   r   r   
<listcomp>   s    z3MongodbLoader._process_document.<locals>.<listcomp> )Zpage_contentmetadata)_extract_fieldsr   r   updater   r   r   valuesjoinr:   r   )r   r1   r?   fieldsZtextstextr   r   r   r/      s   
zMongodbLoader._process_documentc                 C   s:   t | jpg }t | jpg }|| }|rdd |D S dS )zuConstructs the projection dictionary for MongoDB query based
        on the specified field names and metadata names.c                 S   s   i | ]}|d qS )   r   )r;   fieldr   r   r   
<dictcomp>   s    z7MongodbLoader._construct_projection.<locals>.<dictcomp>N)listr   r   )r   r   r   Z
all_fieldsr   r   r   r-      s   z#MongodbLoader._construct_projectionr8   r+   rD   r9   c           	      C   sV   i }|pg D ]"}|}| dD ]}|||}||kr nq|dd}|||< q|S )zAExtracts and returns values for specified fields from a document.r2   _)splitgetreplace)	r   r+   rD   r9   Z	extractedrG   r<   keyZnew_field_namer   r   r   r@      s   
zMongodbLoader._extract_fields)r8   )__name__
__module____qualname____doc__r:   r   r   r   boolr   r   r   r!   r   r,   r   r&   r    r/   r-   r@   r   r   r   r   r
      sN    

	

;r
   )r   loggingtypingr   r   r   r   r   r   Zlangchain_core.documentsr   Z)langchain_community.document_loaders.baser	   	getLoggerrO   r5   r
   r   r   r   r   <module>   s     
