o
    ưi                     @   sd   d Z ddlmZmZmZ ddlZddlmZ ddlm	Z	m
Z
mZmZ ddlmZ G dd de	ZdS )	z,
Mistral OCR transformation implementation.
    )AnyDictOptionalN)verbose_logger)BaseOCRConfigDocumentTypeOCRRequestDataOCRResponse)get_secret_strc                       s   e Zd ZdZd fddZdedefddZd	ed
ededefddZ				dde
dedee dee dee de
fddZ	ddee ded
edee def
ddZdeded
ededef
ddZdedejdedefddZ  ZS )MistralOCRConfigzl
    Mistral OCR transformation configuration.
    
    Reference: https://docs.mistral.ai/api/#tag/ocr
    returnNc                    s   t    d S N)super__init__)self	__class__ ^/home/app/Keep/.python/lib/python3.10/site-packages/litellm/llms/mistral/ocr/transformation.pyr      s   zMistralOCRConfig.__init__modelc                 C   s   g dS )a  
        Get supported OCR parameters for Mistral OCR.
        
        Mistral OCR supports:
        - pages: List of page numbers to process
        - include_image_base64: Whether to include base64 encoded images
        - image_limit: Maximum number of images to return
        - image_min_size: Minimum size of images to include
        - bbox_annotation_format: Format for bounding box annotations
        - document_annotation_format: Format for document annotations
        )pagesZinclude_image_base64Zimage_limitZimage_min_sizeZbbox_annotation_formatZdocument_annotation_formatr   )r   r   r   r   r   get_supported_ocr_params   s   z)MistralOCRConfig.get_supported_ocr_paramsnon_default_paramsoptional_paramsc                 C   s6   | j |d}i }| D ]\}}||v r|||< q|S )z
        Map OCR parameters to Mistral-specific format.
        
        Mistral accepts these parameters directly, so no transformation needed.
        Just filter out unsupported params.
        )r   )r   items)r   r   r   r   Zsupported_paramsZmapped_paramsparamvaluer   r   r   map_ocr_params1   s   zMistralOCRConfig.map_ocr_paramsheadersapi_keyapi_baselitellm_paramsc                 K   s6   |du rt d}|du rtddd| i|}|S )zJ
        Validate environment and return headers for Mistral OCR.
        NZMISTRAL_API_KEYz}Missing Mistral API Key - A call is being made to Mistral but no key is set either in the environment variables or via paramsAuthorizationzBearer )r
   
ValueError)r   r   r   r   r    r!   kwargsr   r   r   validate_environmentG   s   
z%MistralOCRConfig.validate_environmentc                 K   s4   |du rd}| d}|dr| dS | dS )zt
        Get complete URL for Mistral OCR endpoint.
        
        Returns: https://api.mistral.ai/v1/ocr
        Nzhttps://api.mistral.ai/v1/z/v1z/ocrz/v1/ocr)rstripendswith)r   r    r   r   r!   r$   r   r   r   get_complete_urlg   s   



z!MistralOCRConfig.get_complete_urldocumentc                 K   sL   t d|  t|tstdt| ||d}|| t|ddS )a  
        Transform OCR request to Mistral-specific format.
        
        Mistral OCR API accepts:
        {
            "model": "mistral-ocr-latest",
            "document": {
                "type": "document_url",
                "document_url": "<https-url or data-uri>"
            },
            "pages": [0],  # optional
            "include_image_base64": false,  # optional
            ...
        }
        
        Args:
            model: Model name (e.g., "mistral-ocr-latest")
            document: Document dict from user (Mistral format) - already validated in main.py
            optional_params: Already mapped optional parameters
            headers: Request headers
            
        Returns:
            OCRRequestData with JSON data
        z+Mistral OCR transform_ocr_request - model: zExpected document dict, got )r   r*   N)datafiles)r   debug
isinstancedictr#   typeupdater   )r   r   r*   r   r   r$   r+   r   r   r   transform_ocr_request   s    

z&MistralOCRConfig.transform_ocr_requestraw_responselogging_objc              
   K   sz   z&|  }td|   t|dg |d||d|dddW S  ty< } z
td|  |d	}~ww )
a  
        Return Mistral OCR response in native format.
        
        Mistral OCR is the standard format for LiteLLM OCR responses.
        No transformation needed - return native response.
        
        Mistral OCR returns:
        {
            "pages": [
                {
                    "index": 0,
                    "markdown": "extracted text content",
                    "images": [...],
                    "dimensions": {...}
                },
                ...
            ],
            "model": "mistral-ocr-2505-completion",
            "document_annotation": null,
            "usage_info": {...}
        }
        zMistral OCR response keys: r   r   document_annotation
usage_infoZocr)r   r   r5   r6   objectz$Error parsing Mistral OCR response: N)jsonr   r-   keysr	   get	Exceptionerror)r   r   r3   r4   r$   Zresponse_jsoner   r   r   transform_ocr_response   s   

z'MistralOCRConfig.transform_ocr_response)r   N)NNNr   )__name__
__module____qualname____doc__r   strlistr   r/   r   r   r   r%   r)   r   r   r2   httpxResponser   r	   r>   __classcell__r   r   r   r   r      sz    

%

3r   )rB   typingr   r   r   rE   Zlitellm._loggingr   Z(litellm.llms.base_llm.ocr.transformationr   r   r   r	   Zlitellm.secret_managers.mainr
   r   r   r   r   r   <module>   s    