o
    ưil                     @  s   d Z ddlmZ ddlZddlZddlZddlmZmZm	Z	m
Z
mZmZ ddlmZ ddlmZ ddlmZ erDddlmZ dd	lmZ dddZdddZdddZG dd deeZdS )aj  
Bedrock-specific RAG Ingestion implementation.

Bedrock Knowledge Bases handle embedding internally when files are ingested,
so this implementation uploads files to S3 and triggers ingestion jobs.

Supports two modes:
1. Use existing KB: Provide vector_store_id (KB ID)
2. Auto-create KB: Don't provide vector_store_id - creates all AWS resources automatically
    )annotationsN)TYPE_CHECKINGAnyDictListOptionalTuple)verbose_logger)
BaseAWSLLM)BaseRAGIngestion)Router)RAGIngestOptionsvaluer   returnOptional[str]c                 C  s   | durt | S dS )z#Cast config value to Optional[str].N)str)r    r   ^/home/app/Keep/.python/lib/python3.10/site-packages/litellm/rag/ingestion/bedrock_ingestion.py_get_str_or_none   s   r   defaultintc                 C  s   | du r|S t | S )z&Cast config value to int with default.N)r   )r   r   r   r   r   _get_int!   s   r   
caller_arnr   
account_idc                 C  s:   d| v r|  d}t|dkr|d }d| d| S | S )a  
    Normalize a caller ARN to the format required by OpenSearch data access policies.
    
    OpenSearch Serverless data access policies require:
    - IAM users: arn:aws:iam::account-id:user/user-name
    - IAM roles: arn:aws:iam::account-id:role/role-name
    
    But get_caller_identity() returns for assumed roles:
    - arn:aws:sts::account-id:assumed-role/role-name/session-name
    
    This function converts assumed-role ARNs to the proper IAM role ARN format.
    z:assumed-role//      arn:aws:iam::z:role/)splitlen)r   r   parts	role_namer   r   r   _normalize_principal_arn(   s   
r"   c                   @  s   e Zd ZdZ	d6d7ddZd	d
 Zdd Zdd Zd8ddZd9ddZ	d:ddZ
d;ddZd<d!d"Zd=d#d$Zd>d&d'Zd?d+d,Zd@d4d5ZdS )ABedrockRAGIngestiona  
    Bedrock Knowledge Base RAG ingestion.

    Supports two modes:
    1. **Use existing KB**: Provide vector_store_id
    2. **Auto-create KB**: Don't provide vector_store_id - creates S3 bucket,
       OpenSearch Serverless collection, IAM role, KB, and data source automatically

    Optional config:
    - vector_store_id: Existing KB ID (if not provided, auto-creates)
    - s3_bucket: S3 bucket (auto-created if not provided)
    - embedding_model: Bedrock embedding model (default: amazon.titan-embed-text-v2:0)
    - wait_for_ingestion: Wait for completion (default: True)
    - ingestion_timeout: Max seconds to wait (default: 300)

    AWS Auth (uses BaseAWSLLM):
    - aws_access_key_id, aws_secret_access_key, aws_session_token
    - aws_region_name (default: us-west-2)
    - aws_role_name, aws_session_name, aws_profile_name
    - aws_web_identity_token, aws_sts_endpoint, aws_external_id
    Ningest_options'RAGIngestOptions'routerOptional['Router']c                 C  s   t j| ||d t|  | jdp| jd| _| jd| _| jd| _| jdr6t| jdnd | _	| jdp@d| _
| jd	d
| _t| jdd| _| jd}| j|rbt|nd d| _d | _d | _| j	pqd| _d
| _i | _d S )N)r$   r&   Zvector_store_idknowledge_base_iddata_source_id	s3_bucket	s3_prefixembedding_modelzamazon.titan-embed-text-v2:0wait_for_ingestionFingestion_timeouti,  aws_region_name)r/   zdata/)r   __init__r
   vector_store_configgetr(   _data_source_id
_s3_bucketr   
_s3_prefixr,   r-   r   r.   Z)get_aws_region_name_for_non_llm_api_callsr/   r)   r*   r+   _config_initialized_created_resources)selfr$   r&   Z_aws_regionr   r   r   r0   V   s4   

"
zBedrockRAGIngestion.__init__c                   s4   | j rdS | jr|   n|  I dH  d| _ dS )zHLazily initialize KB config - either detect from existing or create new.NT)r6   r(   _auto_detect_config%_create_knowledge_base_infrastructure)r8   r   r   r   _ensure_config_initialized}   s   

z.BedrockRAGIngestion._ensure_config_initializedc                 C  s2  t d| j  | d}|j| jd}|dg }|s&td| j d| jr.| j| _n|d d | _t 	d	| j  |j
| j| jd
}|di di di }|dd}|r| jpf|dd | _t 	d| j  |dg }|r| js|d | _dS dS dS | jstd| j d| j| _dS )zFAuto-detect data source ID and S3 bucket from existing Knowledge Base.z0Auto-detecting data source and S3 bucket for KB=bedrock-agentknowledgeBaseIdZdataSourceSummariesz)No data sources found for Knowledge Base zL. Please create a data source first or provide data_source_id and s3_bucket.r   dataSourceIdzAuto-detected data source: r>   r?   
dataSourcedataSourceConfigurations3Configuration	bucketArn :zAuto-detected S3 bucket: inclusionPrefixesz0Could not auto-detect S3 bucket for data source z%. Please provide s3_bucket in config.N)r	   debugr(   _get_boto3_clientZlist_data_sourcesr2   
ValueErrorr3   r)   infoZget_data_sourcer4   r   r*   r5   r+   )r8   bedrock_agentZds_responseZdata_sourcesZ
ds_detailsZ	s3_configZ
bucket_arnprefixesr   r   r   r9      sJ   




z'BedrockRAGIngestion._auto_detect_configc           
        s   t d t jdd }| jpd| }| d}| }|d }|d }| jp/| 	|| _
| |||I dH \}}| |I dH  | |||I dH }	| ||	|I dH | _| || _t d| j d	| j d
| j
  dS )z9Create all AWS resources needed for a new Knowledge Base.z5Creating new Bedrock Knowledge Base infrastructure...N   litellm-kb-stsZAccountArnz!Created KB infrastructure: kb_id=z, ds_id=z	, bucket=)r	   rL   uuiduuid4hexZingest_namerJ   Zget_caller_identityr4   _create_s3_bucketr*   _create_opensearch_collection_create_opensearch_index_create_bedrock_role_create_knowledge_baser(   _create_data_sourcer)   )
r8   	unique_idkb_namerQ   Zcaller_identityr   r   collection_namecollection_arnrole_arnr   r   r   r:      s2   


z9BedrockRAGIngestion._create_knowledge_base_infrastructurer\   r   r   c                 C  sr   |  d}d| }td|  d|i}| jdkr"d| ji|d< |jdi | || jd< td	|  |S )z$Create S3 bucket for KB data source.s3rP   zCreating S3 bucket: Bucketz	us-east-1ZLocationConstraintZCreateBucketConfigurationr*   zCreated S3 bucket: Nr   )rJ   r	   rI   r/   Zcreate_bucketr7   rL   )r8   r\   ra   Zbucket_nameZcreate_paramsr   r   r   rV      s   



z%BedrockRAGIngestion._create_s3_bucketr   r   Tuple[str, str]c              
     s  |  d}d| }td|  |j| ddtdd| gdgd	d
d |j| ddtdd| gddd| gdgd	dgd t||}td| d|  d| d|g}tt|}|j	| ddtdd| dgdgddd| gdgdg|dgd |j
|dd}|d d }	|| jd < td! td"D ]}
|j|	gd#}|d$ d% d& }|d'kr ntd(I d)H  qtd*|d$ d% d+ }td,|  td- td"I d)H  ||fS ).z;Create OpenSearch Serverless collection for vector storage.opensearchserverlessrP   z+Creating OpenSearch Serverless collection: z-encZ
encryptionZ
collectionzcollection/)ResourceTypeResourceT)RulesZAWSOwnedKey)nametypepolicyz-netnetworkZ	dashboard)rg   ZAllowFromPubliczCaller ARN: z, Normalized: r   z:rootz-accessdataindexzindex//*zaoss:*)re   rf   Z
Permission)rg   	PrincipalZVECTORSEARCH)rh   ri   ZcreateCollectionDetailidZopensearch_collectionz1Waiting for OpenSearch collection to be active...<   )idscollectionDetailsr   statusACTIVE   Nz3OpenSearch collection did not become active in timeZarnzCreated OpenSearch collection: z4Waiting for data access policy to propagate (60s)...)rJ   r	   rI   Zcreate_security_policyjsondumpsr"   listsetZcreate_access_policyZcreate_collectionr7   rangebatch_get_collectionasynciosleepTimeoutErrorrL   )r8   r\   r   r   ossr^   Znormalized_caller_arnZ
principalsresponseZcollection_id_Zstatus_responsert   r_   r   r   r   rW      sn   






z1BedrockRAGIngestion._create_opensearch_collectionr^   c                   s  ddl m}m} ddlm} | jt| jdt| jdt| jd| j	d}| 
d}|j|gd	}|d
 d d }|dd}	||j|j| j	d|jd}
||	ddg|
dd|d}d}ddddiddddddddd d!d"d#d id$id%}d&}d'}d(}t|D ]U}z|jj||d) td*|  W  d(S  ty } z3|}t|}d+| v sd,| v rtd-|d.  d/| d0| d1 t|I d(H  n W Y d(}~qd(}~ww td2| d3| )4z>Create vector index in OpenSearch collection with retry logic.r   )
OpenSearchRequestsHttpConnection)AWS4Authaws_access_key_idaws_secret_access_keyaws_session_token)r   r   r   r/   rd   )namesrs   ZcollectionEndpointzhttps://rE   Zaoss)Zsession_tokeni  )hostportT)hostsZ	http_authZuse_sslZverify_certsZconnection_classbedrock-kb-indexrm   i   )Zknnzknn.algo_param.ef_search
propertiesZ
knn_vectori   ZfaissZhnswl2)Zenginerh   Z
space_type)ri   	dimensionmethodtextF)ri   rm   ri   )%bedrock-knowledge-base-default-vectorAMAZON_BEDROCK_METADATAAMAZON_BEDROCK_TEXT_CHUNK)settingsZmappingsrO      N)rm   bodyzCreated OpenSearch index: Zauthorization_exceptionZsecurity_exceptionz"OpenSearch index creation attempt r   r   z& failed due to authorization. Waiting zs for policy propagation...z(Failed to create OpenSearch index after zC attempts. Data access policy may not have propagated. Last error: )Zopensearchpyr   r   Zrequests_aws4authr   get_credentialsr   r1   r2   r/   rJ   r|   replace
access_key
secret_keytokenr{   indicescreater	   rL   	Exceptionr   lowerwarningr}   r~   RuntimeError)r8   r^   r   r   r   credentialsr   collectionsZendpointr   authclientZ
index_nameZ
index_bodymax_retriesZretry_delayZ
last_errorattempteZ	error_strr   r   r   rX   Q  s   



z,BedrockRAGIngestion._create_opensearch_indexr_   c           
        s"  |  d}d| }td|  ddddidd	|id
d| j d| diddgd}|j|t|d}|d d }|| jd< dddgd| j d| j gdddg|gddddgd| j	 d| j	 dgdgd}	|j
|| dt|	d tdI d H  td!|  |S )"zCreate IAM role for Bedrock KB.iamzlitellm-bedrock-kb-zCreating IAM role: z
2012-10-17ZAllowZServicezbedrock.amazonaws.comzsts:AssumeRolezaws:SourceAccountzaws:SourceArnarn:aws:bedrock:rF   z:knowledge-base/*)ZStringEqualsZArnLike)Effectro   Action	Condition)VersionZ	Statement)RoleNameZAssumeRolePolicyDocumentZRolerR   Ziam_rolezbedrock:InvokeModel::foundation-model/)r   r   rf   zaoss:APIAccessAllzs3:GetObjectzs3:ListBucketarn:aws:s3:::rn   z-policy)r   Z
PolicyNameZPolicyDocument
   NzCreated IAM role: )rJ   r	   rI   r/   Zcreate_rolerw   rx   r7   r,   r*   Zput_role_policyr}   r~   rL   )
r8   r\   r   r_   r   r!   Ztrust_policyr   r`   Zpermissions_policyr   r   r   rY     sT   


z(BedrockRAGIngestion._create_bedrock_roler]   r`   c           
   
     s   |  d}td|  |j||ddd| j d| j idd|d	d
dddddd}|d d }|| jd< td tdD ]}|j|d}|d d }	|	dkrW nt	
dI dH  qCtdtd|  |S )zCreate Bedrock Knowledge Base.r<   zCreating Knowledge Base: ZVECTORZembeddingModelArnr   r   )ri   Z vectorKnowledgeBaseConfigurationZOPENSEARCH_SERVERLESSr   r   r   )ZmetadataFieldZ	textFieldZvectorFieldr   )ZcollectionArnZfieldMappingZvectorIndexName)ri   Z!opensearchServerlessConfiguration)rh   ZroleArnZknowledgeBaseConfigurationZstorageConfigurationZknowledgeBaser>   Zknowledge_basez*Waiting for Knowledge Base to be active...   r=   rt   ru   r   Nz,Knowledge Base did not become active in timezCreated Knowledge Base: )rJ   r	   rI   Zcreate_knowledge_baser/   r,   r7   r{   Zget_knowledge_baser}   r~   r   rL   )
r8   r]   r`   r_   rM   r   Zkb_idr   Z	kb_statusrt   r   r   r   rZ     s@   


z*BedrockRAGIngestion._create_knowledge_basec                 C  sv   |  d}td| j  |j| j| ddd| j | jgddd}|d	 d
 }|| jd< td|  |S )z*Create Data Source for the Knowledge Base.r<   zCreating Data Source for KB: z
-s3-sourceZS3r   )rD   rH   )ri   rC   )r>   rh   rB   rA   r?   Zdata_sourcezCreated Data Source: )	rJ   r	   rI   r(   Zcreate_data_sourcer*   r+   r7   rL   )r8   r]   rM   r   Zds_idr   r   r   r[     s   


z'BedrockRAGIngestion._create_data_sourceservice_namec                 C  s   zddl }W n ty   tdw | jt| jdt| jdt| jd| jt| jdt| jdt| jd	t| jd
t| jdt| jdd
}|j|j|j	|j
| jd}||S )zCGet a boto3 client for the specified service using BaseAWSLLM auth.r   NzHboto3 is required for Bedrock ingestion. Install with: pip install boto3r   r   r   aws_session_nameaws_profile_nameaws_role_nameaws_web_identity_tokenaws_sts_endpointaws_external_id)
r   r   r   r/   r   r   r   r   r   r   )r   r   r   Zregion_name)boto3ImportErrorr   r   r1   r2   r/   Sessionr   r   r   r   )r8   r   r   r   sessionr   r   r   rJ   (  s0   
z%BedrockRAGIngestion._get_boto3_clientchunks	List[str]Optional[List[List[float]]]c                   s   dS )z
        Bedrock handles embedding internally - skip this step.

        Returns:
            None (Bedrock embeds when files are ingested)
        Nr   )r8   r   r   r   r   embedG  s   
zBedrockRAGIngestion.embedfile_contentOptional[bytes]filenamer   content_type
embeddings#Tuple[Optional[str], Optional[str]]c                   s   |   I dH  |r|std t| jdfS | d}| jd d| }td| j	 d|  |j
| j	|||p=dd td| j	 d|  | d	}td
| j d| j  |j| j| jd}	|	d d }
td|
  | jrddl}| }| | | jk r|j| j| j|
d}|d d }td|
 d|  |dkr|d di }td|dd d n4|dkr|d dg }td|  n|dv rtdI dH  n	td|  n	| | | jk s| jrt| j|fS d|fS ) aK  
        Store content in Bedrock Knowledge Base.

        Bedrock workflow:
        1. Auto-detect data source and S3 bucket (if not provided)
        2. Upload file to S3 bucket
        3. Start ingestion job
        4. (Optional) Wait for ingestion to complete

        Args:
            file_content: Raw file bytes
            filename: Name of the file
            content_type: MIME type
            chunks: Ignored - Bedrock handles chunking
            embeddings: Ignored - Bedrock handles embedding

        Returns:
            Tuple of (knowledge_base_id, file_key)
        Nz:No file content or filename provided for Bedrock ingestionra   r   zUploading file to s3://zapplication/octet-stream)rb   KeyZBodyZContentTypezUploaded file to s3://r<   zStarting ingestion job for KB=z, DS=r@   ZingestionJobingestionJobIdzStarted ingestion job: r   )r>   r?   r   rt   zIngestion job z	 status: ZCOMPLETE
statisticszIngestion complete: ZnumberOfNewDocumentsIndexedz docs indexedZFAILEDZfailureReasonszIngestion failed: )ZSTARTINGZIN_PROGRESSr   zUnknown ingestion status: )r;   r	   r   r   r(   rJ   r+   rstriprI   r*   Z
put_objectrL   r)   Zstart_ingestion_jobr-   timer.   Zget_ingestion_jobr2   errorr}   r~   r   )r8   r   r   r   r   r   Z	s3_clientZs3_keyrM   Zingestion_responseZjob_idZtime_module
start_timeZ
job_statusrt   statsZfailure_reasonsr   r   r   storeS  sh   


zBedrockRAGIngestion.store)N)r$   r%   r&   r'   )r\   r   r   r   )r\   r   r   r   r   r   r   rc   )r^   r   )r\   r   r   r   r_   r   r   r   )r]   r   r`   r   r_   r   r   r   )r]   r   r   r   )r   r   )r   r   r   r   )r   r   r   r   r   r   r   r   r   r   r   r   )__name__
__module____qualname____doc__r0   r;   r9   r:   rV   rW   rX   rY   rZ   r[   rJ   r   r   r   r   r   r   r#   ?   s     '9
)

Q
R
?
/

r#   )r   r   r   r   )r   r   r   r   r   r   )r   r   r   r   r   r   )r   
__future__r   r}   rw   rS   typingr   r   r   r   r   r   Zlitellm._loggingr	   Z!litellm.llms.bedrock.base_aws_llmr
   Z$litellm.rag.ingestion.base_ingestionr   Zlitellmr   Zlitellm.types.ragr   r   r   r"   r#   r   r   r   r   <module>   s      


