o
    |qiI$                     @  s   d Z ddlmZ ddlZddlmZ ddlmZmZm	Z	m
Z
mZ ddlZddlmZ ddlmZ ddlmZ dd	lmZ G d
d deeZdS )z7Loader that loads data from Sharepoint Document Library    )annotationsN)Path)AnyDictIteratorListOptional)
BaseLoader)Document)Field)O365BaseLoaderc                   @  s   e Zd ZU dZedZded< 	 dZded< 	 dZded	< 	 dZ	ded
< 	 dZ
ded< 	 e d d Zded< 	 dZded< 	 ed$ddZd%ddZd&ddZd'dd Zd(d"d#ZdS ))SharePointLoaderzLoad  from `SharePoint`..strdocument_library_idNzOptional[str]folder_pathzOptional[List[str]]
object_ids	folder_idFzOptional[bool]	load_authz.credentialszo365_token.txtr   
token_pathload_extended_metadatareturn	List[str]c                 C  s   ddgS )zcReturn required scopes.
        Returns:
            List[str]: A list of required scopes.
        Z
sharepointbasic )selfr   r   v/home/app/PaddleOCR-VL/.venv_paddleocr/lib/python3.10/site-packages/langchain_community/document_loaders/sharepoint.py_scopes%   s   zSharePointLoader._scopesIterator[Document]c                 c  s   z
ddl m}m} W n ty   tdw |   | j}t||s/t	d| j d| j
r|| j
}t||sFt	d| j
 d| |D ]G}t|jd}| jdu r_| |}| jdu rq| |}|d	|ji | j|D ]}	| jdu r||	jd
< | jdu r|	j| |	V  qwqK| jr|| j}t||st	d| j
 d| |D ]G}t|jd}| jdu r| |}| jdu r| |}|d	|ji | j|D ]}	| jdu r||	jd
< | jdu r|	j| |	V  qq| jrH| || jD ]E}t|jd}| jdu r| |}| jdu r"| |}| j|D ]}	| jdu r5||	jd
< | jdu rA|	j| |	V  q(q| j
s| js| js| }t||sbt	d| |D ][}t|jd}| jdu r|| |}| jdu r| |}| j|D ]-}
|
j|j | jdu r||
jd
< | jdu r|
j| |
jd	|ji |
V  qqgdS dS dS dS )z
        Load documents lazily. Use this when working at a large scale.
        Yields:
            Document: A document object representing the parsed blob.
        r   )DriveFolderzAO365 package not found, please install it with `pip install o365`zThere isn't a Drive with id .zThere isn't a folder with path idTZsource_full_urlauthorized_identitieszUnable to fetch root folderN)Z
O365.driver   r   ImportErrorZ_authZstorageZ	get_driver   
isinstance
ValueErrorr   Zget_item_by_pathZ_load_from_folderr   metadatagetr   r"   r   get_extended_metadataupdateZweb_urlZ_blob_parserZ
lazy_parser   Zget_itemr   Z_load_from_object_idsZget_root_folder)r   r   r   driveZtarget_folderZblobfile_idZauth_identitiesZextended_metadataZparsed_blobZ	blob_partr   r   r   	lazy_load-   s   
























zSharePointLoader.lazy_loadr+   r   c                 C  s   |   }|d}d| j d| d}dd| i}tjd||d}| }g }|d	D ]-}	|	d
rZ|	d
dpK|	d
dpK|	d
d}
|
rZ|
d}|rZ|| q-|S )a  
        Retrieve the access identities (user/group emails) for a given file.
        Args:
            file_id (str): The ID of the file.
        Returns:
            List: A list of group names (email addresses) that have
                  access to the file.
        access_token(https://graph.microsoft.com/v1.0/drives//items/z/permissionsAuthorizationBearer GETheadersvalueZgrantedToV2ZsiteUserusergroupemail)_fetch_access_tokenr'   r   requestsrequestjsonappend)r   r+   datar-   urlr4   responseZaccess_listZgroup_namesZaccess_dataZ	site_datar8   r   r   r   r"      s2   	



z&SharePointLoader.authorized_identitiesr   c                 C  sD   t | jdd}| }W d   n1 sw   Y  t|}|S )z|
        Fetch the access token from the token file.
        Returns:
            The access token as a dictionary.
        zutf-8)encodingN)openr   readr<   loads)r   fsr>   r   r   r   r9      s
   

z$SharePointLoader._fetch_access_tokenr   c           	      C  s   |   }|d}d| j d| d}dd| i}tjd||d}| }|d	d
|di di dd|di dddd d |dd d}|S )a  
        Retrieve extended metadata for a file in SharePoint.
        As of today, following fields are supported in the extended metadata:
        - size: size of the source file.
        - owner: display name of the owner of the source file.
        - full_path: pretty human readable path of the source file.
        Args:
            file_id (str): The ID of the file.
        Returns:
            dict: A dictionary containing the extended metadata of the file,
                  including size, owner, and full path.
        r-   r.   r/   z,?$select=size,createdBy,parentReference,namer0   r1   r2   r3   sizer   Z	createdByr6   ZdisplayName ZparentReferencepath:/name)rG   owner	full_path)r9   r'   r   r:   r;   r<   split)	r   r+   r>   r-   r?   r4   r@   r&   Zstaged_metadatar   r   r   r(      s2   




z&SharePointLoader.get_extended_metadata)r   r   )r   r   )r+   r   r   r   )r   r   )r+   r   r   r   )__name__
__module____qualname____doc__r   r   __annotations__r   r   r   r   r   homer   r   propertyr   r,   r"   r9   r(   r   r   r   r   r      s,   
 

T
"r   )rT   
__future__r   r<   pathlibr   typingr   r   r   r   r   r:   Zlangchain_core.document_loadersr	   Zlangchain_core.documentsr
   Zpydanticr   Z.langchain_community.document_loaders.base_o365r   r   r   r   r   r   <module>   s    