o
    ưi$                  
   @   sJ  d dl Z d dlmZmZmZmZ d dlZd dlmZm	Z	m
Z
mZmZ d dlmZ d dlmZ d dlmZmZ d dlT d dlmZmZ d d	lmZ e Zd
edee dee deeef fddZde
deeef fddZde
deeef fddZej de	egedgdej de	egedgde	efde
dedefddZ!dS )    N)AnyDictOptionalcast)	APIRouterDependsRequestResponse
UploadFile)ORJSONResponse)verbose_proxy_logger)%convert_file_document_to_url_documentget_mime_type)*)UserAPIKeyAuthuser_api_key_auth)ProxyBaseLLMRequestProcessingfile_contentfilenamecontent_typereturnc                 C   sF   |r| dd  nd}|r|dkr|rt|}td| |pddS )z
    Convert uploaded file bytes into a Mistral-format document dict with base64 data URI.

    Delegates to convert_file_document_to_url_document after resolving MIME type
    from the upload's content_type header or filename.
    ;r   Nzapplication/octet-streamfile)typer   	mime_type)splitstripr   r   )r   r   r   r    r   \/home/app/Keep/.python/lib/python3.10/site-packages/litellm/proxy/ocr_endpoints/endpoints.py_build_document_from_upload   s   r   requestc           	   
      sL  z	|   I dH }W n ty  } z
tdt| dd}~ww |d}|du s4t|ts8t|ds8tdtt|}|	dI dH  |
 I dH }|sRtdt||j|jd	}d
|i}| D ],\}}|dv rlqct|trz	t|||< W qc tjtfy   |||< Y qcw |||< qctd|d d|d  d|j  |S )z
    Extract OCR data from a multipart form request.

    Uses the cached form if already parsed by auth middleware,
    otherwise parses the form from the request.

    Returns:
        A dict with 'document', 'model', and any other OCR params.
    Nz%Failed to parse multipart form data: u   . When using curl with --form/-F, do NOT set the Content-Type header manually — curl will set it automatically with the required boundary.r   readzNMultipart OCR request must include a 'file' field with the document to processr   zUploaded file is empty)r   r   r   document)r   r"   z+OCR multipart form request parsed - model: modelz, document_type: r   z, filename: )form	Exception
ValueErrorstrget
isinstancer
   hasattrr   seekr!   r   r   r   itemsjsonloadsJSONDecodeErrorr   debug)	r    r$   eZuploaded_filer   r"   data
field_nameZfield_valuer   r   r   _parse_multipart_form,   s^   




r4   c              
      s   | j dd}d| v rt| I dH S z	|  I dH }W n ty)   d}Y nw |sDt| dddur@td t| I dH S t	dzt
|}W n t
jy` } zt	d	| d
d}~ww t|trk|dnd}t|tr}|ddkr}t	d|S )a  
    Parse an OCR request, supporting both JSON and multipart form data.

    JSON body (existing behavior):
        {
            "model": "mistral/mistral-ocr-latest",
            "document": {"type": "document_url", "document_url": "https://..."}
        }

    Multipart form data (new):
        - file: the uploaded file
        - model: model name (form field)
        - Any other OCR params as form fields (pages, include_image_base64, etc.)

    Returns:
        A dict suitable for passing to the OCR processing pipeline.
    zcontent-type zmultipart/form-dataN    Z_formuf   OCR request body is empty but form data is available from middleware — processing as multipart form.zEmpty request body. For file uploads, use multipart/form-data content type with a file field. When using curl with --form/-F, do NOT set the Content-Type header manually.zInvalid JSON in request body: zy. Ensure the request body is valid JSON with Content-Type: application/json, or use multipart/form-data for file uploads.r"   r   r   zdocument type 'file' is not supported through the JSON API. To upload a local file, use multipart/form-data with a 'file' field. For JSON requests, use 'document_url' or 'image_url' document types.)headersr(   lowerr4   bodyRuntimeErrorgetattrr   r0   r&   orjsonr.   r/   r)   dict)r    r   r9   r2   r1   docr   r   r   _parse_ocr_requesto   s@   
r?   z/v1/ocrocr)dependenciesresponse_classtagsz/ocrfastapi_responseuser_api_key_dictc              
      s  ddl m}m}m}m}m}m}m}	m}
m	}m
}m} i }zFt| I dH }t|d}|jdi d| d|d|dd	d
|d|d|d|d|ddd|
d|d|d|	d|d|I dH W S  ty } zt|d}|j||||dI dH d}~ww )a  
    OCR endpoint for extracting text from documents and images.

    Supports two input modes:

    **1. JSON body** (Mistral OCR API compatible):
    ```bash
    curl -X POST "http://localhost:4000/v1/ocr"         -H "Authorization: Bearer sk-1234"         -H "Content-Type: application/json"         -d '{
            "model": "mistral-ocr",
            "document": {
                "type": "document_url",
                "document_url": "https://arxiv.org/pdf/2201.04234"
            }
        }'
    ```

    **2. Multipart form file upload**:
    ```bash
    curl -X POST "http://localhost:4000/v1/ocr"         -H "Authorization: Bearer sk-1234"         -F "model=mistral-ocr"         -F "file=@document.pdf"
    ```
    r   )general_settings
llm_routerproxy_configproxy_logging_objselect_data_generatoruser_api_baseuser_max_tokens
user_modeluser_request_timeoutuser_temperatureversionN)r2   r    rD   rE   Z
route_typeZaocrrI   rG   rF   rH   rJ   r#   rM   rO   rN   rL   rK   rP   )r1   rE   rI   rP   r   )Zlitellm.proxy.proxy_serverrF   rG   rH   rI   rJ   rK   rL   rM   rN   rO   rP   r?   r   Zbase_process_llm_requestr%   Z_handle_llm_api_exception)r    rD   rE   rF   rG   rH   rI   rJ   rK   rL   rM   rN   rO   rP   r2   	processorr1   r   r   r   r@      sb   4,

	

)"r-   typingr   r   r   r   r<   Zfastapir   r   r   r	   r
   Zfastapi.responsesr   Zlitellm._loggingr   Zlitellm.ocr.mainr   r   Zlitellm.proxy._typesZ$litellm.proxy.auth.user_api_key_authr   r   Z'litellm.proxy.common_request_processingr   Zrouterbytesr'   r   r4   r?   postr@   r   r   r   r   <module>   sT   

CI	