o
    iJI                     @  s  U d dl mZ d dlZd dlmZmZ d dlmZ d dlm	Z	 d dl
mZ d dlmZmZmZmZ d dlmZ d dlmZmZmZ d d	lmZ d d
lmZmZmZmZ d dlm Z  er{d dl!Z"d dl#Z$d dl%m&Z& d dl'm(Z( d dl)m*Z* d dl+m,Z, eddG dd dZ-G dd deZ.eddG dd de.Z/eddG dd de.Z0dJddZ1dKd!d"Z2i d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4d5d6d7d8d9d:d;d<d=d>d?d@dAdBdCdDdEdFdGZ3dHe4dI< dS )L    )annotationsN)ABCabstractmethod)	dataclass)partial)perf_counter)TYPE_CHECKINGAnyFinalLiteral)eprintverboseverbose_print_sensitive)ComputeError)IcebergStatisticsLoader)IdentityTransformedPartitionValuesBuilder#_normalize_windows_iceberg_file_uritry_convert_pyarrow_predicate)ScanCastOptions)Table)StorageOptionsDict)NoPickleOption)	LazyFrameT)kw_onlyc                   @  s   e Zd ZU dZded< ded< ded< ded	< d
ed< ded< ded< ded< d1ddZd1ddZddddddd2dd Zddddddd3d"d#Zd4d%d&Z	d5d(d)Z
d6d+d,Zd7d/d0ZdS )8IcebergDatasetz Dataset interface for PyIceberg.zNoPickleOption[Table]table_
str | Nonemetadata_path_
int | Nonesnapshot_idStorageOptionsDict | Noneiceberg_storage_propertiesz%Literal['native', 'pyiceberg'] | Nonereader_overridebooluse_metadata_statisticsfast_deletion_countuse_pyiceberg_filterreturn	pa.schemac                 C  s   |   S )zFetch the schema of the table.)arrow_schemaself r,   Q/home/app/Keep/.python/lib/python3.10/site-packages/polars/io/iceberg/_dataset.pyschema3   s   zIcebergDataset.schemac                 C  s   ddl m} ||   S )z$Fetch the arrow schema of the table.r   schema_to_pyarrow)pyiceberg.io.pyarrowr0   tabler.   )r+   r0   r,   r,   r-   r)   7   s   zIcebergDataset.arrow_schemaNexisting_resolved_version_keylimit
projectionfilter_columnspyarrow_predicater4   r5   r6   list[str] | Noner7   r8   tuple[LazyFrame, str] | Nonec                C  s.   | j |||||d }du rdS | |jfS )zConstruct a LazyFrame scan.r3   N)_to_dataset_scan_implto_lazyframesnapshot_id_key)r+   r4   r5   r6   r7   r8   Z	scan_datar,   r,   r-   to_dataset_scan=   s   	zIcebergDataset.to_dataset_scan2_NativeIcebergScanData | _PyIcebergScanData | Nonec          /        sH  ddl m} dd l}|jj }d  d ur | jr | jr t |rMd ur(dnd}	 d ur0dnd}
t	d| j
 d| d| d| d	|	 d
|
 d| j  t fdd |  }|ret	d|jj  | j
}d }|d ur||}|d u rd| }t||j}|d u rd| d}t|| | }|j
 }n| }|jj}|  }d ur|j
 nd}|d ur||kr|rt	d|d d S | jptd}|r|dvrd| d}t||dkrdn|jdksd|j nd }|d u rdnt|}|dkr|n|j| }g }t||}| jr|d urt||j| nd }i }d}d}|dkr|sddlm}m } |r>t	d t! }|j"|||d }  d urS| # } d}!t$| % D ]\}"}#|#j&j'|j(krpd!|#j&j' } nk|#j)rg ||"< |#j)D ]2}$|$j*|j+krd"|$j* } n"|$j'|j(krd#|$j' } n||" ,|$j- |!d$7 }!||$j.7 }q{|r n(|j/|"|#j&j0|#j&j1d% |d ur|2|#j& ||#j&j.7 }|,t3|#j&j- q[|rt! | }%t	d&|%d'd( |sd|rt4|d$krdnd)}&|!d$krdnd)}'t	d*t4| d+|& d,| d-| d.|! d/|'  ||}(|5 })|d ur3|5t4||)nd }*| j6d ur@t7| j6nd }+t8|||(|)||*||+| jr_| j9sX|dkr_||f|d0
S d |d0
S |d1krrd2| }t:||r|t	d3|  dd l;}t<|j=j>jj?|||| d4},|| }-t@jAjB|-|,d5d5d6}.tC|.|d7S )8Nr   r/   zSome(<redacted>)Nonez0IcebergDataset: to_dataset_scan(): snapshot ID: z	, limit: z, projection: z, filter_columns: z, pyarrow_predicate: z, iceberg_table_filter: z , self.use_metadata_statistics: c                     s   dd S )Nz7IcebergDataset: to_dataset_scan(): pyarrow_predicate = z, iceberg_table_filter = r,   r,   iceberg_table_filterr8   r,   r-   <lambda>   s    z6IcebergDataset._to_dataset_scan_impl.<locals>.<lambda>zEIcebergDataset: to_dataset_scan(): tbl.metadata.current_snapshot_id: ziceberg snapshot ID not found: z#IcebergDataset: requested snapshot z did not contain a schema ID zCIcebergDataset: to_dataset_scan(): early return (snapshot_id_key = )ZPOLARS_ICEBERG_READER_OVERRIDE)native	pyicebergz-iceberg: unknown value for reader_override: 'z*', expected one of ('native', 'pyiceberg')rG   z"forced reader_override='pyiceberg'   z"unsupported table format version: )*)DataFileContent
FileFormatz7IcebergDataset: to_dataset_scan(): begin path expansion)r   r5   selected_fieldsznon-parquet format: z unsupported deletion file type: z"unsupported deletion file format:    )Zcurrent_indexZpartition_spec_idZpartition_valuesz:IcebergDataset: to_dataset_scan(): finish path expansion (z.3fzs)sz:IcebergDataset: to_dataset_scan(): native scan_parquet(): z sourcez, snapshot ID: z, schema ID: z, z deletion file)
sourcesprojected_iceberg_schemacolumn_mappingdefault_valuesdeletion_filesmin_max_statisticsstatistics_loaderstorage_options	row_countr=   rF   z)iceberg reader_override='native' failed: zGIcebergDataset: to_dataset_scan(): fallback to python[pyiceberg] scan: )r   Zn_rowsZwith_columnsrB   T)pyarrowZis_pure)lfr=   )Dr1   r0   polars._utils.logging_utilsloggingr   r$   r&   r   r   r   r   r2   metadataZcurrent_snapshot_idZsnapshot_by_id
ValueError	schema_idZschemasr.   Zcurrent_schema_idZcurrent_snapshotr"   osgetenvformat_versiontupleselectr   r   Zpyiceberg.manifestrJ   rK   r   scanfilter	enumerateZ
plan_filesfileZfile_formatZPARQUETZdelete_filescontentZPOSITION_DELETESappend	file_pathZrecord_countZpush_partition_valuesZspec_id	partitionZpush_file_statisticsr   lenfinishr!   0_convert_iceberg_to_object_store_storage_options_NativeIcebergScanDatar%   r   polars.io.iceberg._utilsr   ioicebergZ_scan_pyarrow_dataset_implplr   Z_scan_python_function_PyIcebergScanData)/r+   r4   r5   r6   r7   r8   r0   Zpolarsr   Zpyarrow_predicate_displayZiceberg_table_filter_displaytblr   r_   ZsnapshotmsgZiceberg_schemar=   vr"   Zfallback_reasonrL   rP   rO   Zmissing_field_defaultsrU   rS   Ztotal_physical_rowsZtotal_deleted_rowsrJ   rK   
start_timere   Ztotal_deletion_filesi	file_infoZdeletion_fileelapsedrN   s2rQ   Zidentity_transformed_valuesrT   rV   funcr)   rY   r,   rA   r-   r;   T   s  	



















	z$IcebergDataset._to_dataset_scan_implstrc                 C  s6   | j du r| j du rd}t||  j| _ | j S )zFetch the metadata path.N1impl error: both metadata_path and table are None)r   r   getr^   r2   metadata_location)r+   rw   r,   r,   r-   metadata_path  s   
zIcebergDataset.metadata_pathr   c                 C  sn   | j  du r2| jdu rd}t|t rtd| j ddlm} | j |j	| j| j
p.i d | j  S )z!Fetch the PyIceberg Table object.Nr   z;IcebergDataset: construct table from self.metadata_path_ = r   )StaticTable)r   
properties)r   r   r   r^   r   r   pyiceberg.tabler   setZfrom_metadatar!   )r+   rw   r   r,   r,   r-   r2     s   

zIcebergDataset.tabledict[str, Any]c                 C  s   |    | jS N)r   __dict__r*   r,   r,   r-   __getstate__  s   zIcebergDataset.__getstate__stater@   c                 C  s
   || _ d S r   )r   )r+   r   r,   r,   r-   __setstate__  s   
zIcebergDataset.__setstate__)r'   r(   )r4   r   r5   r   r6   r9   r7   r9   r8   r   r'   r:   )r4   r   r5   r   r6   r9   r7   r9   r8   r   r'   r?   )r'   r   )r'   r   )r'   r   )r   r   r'   r@   )__name__
__module____qualname____doc____annotations__r.   r)   r>   r;   r   r2   r   r   r,   r,   r,   r-   r      s<   
 

	  
/

r   c                   @  s   e Zd ZedddZdS )_ResolvedScanDataBaser'   pl.LazyFramec                 C  s   d S r   r,   r*   r,   r,   r-   r<     s   z"_ResolvedScanDataBase.to_lazyframeNr'   r   )r   r   r   r   r<   r,   r,   r,   r-   r     s    r   c                   @  sl   e Zd ZU dZded< ded< ded< ded	< d
ed< ded< ded< ded< ded< ded< dddZdS )rp   z.Resolved parameters for a native Iceberg scan.z	list[str]rO   zpyiceberg.schema.SchemarP   z	pa.SchemarQ   zdict[int, pl.Series | str]rR   zdict[int, list[str]]rS   zpl.DataFrame | NonerT   zIcebergStatisticsLoader | NonerU   r    rV   ztuple[int, int] | NonerW   r   r=   r'   r   c                 C  sF   ddl m} || jt dd| jd| jfd| jfd| jf| j	| j
d
S )	Nr   )scan_parquetinsertignoreziceberg-column-mappingrs   ziceberg-position-delete)	Zcast_optionsZmissing_columnsZextra_columnsrV   Z_column_mappingZ_default_valuesZ_deletion_filesZ_table_statisticsZ
_row_count)Zpolars.io.parquet.functionsr   rO   r   Z_default_icebergrV   rQ   rR   rS   rT   rW   )r+   r   r,   r,   r-   r<     s   z#_NativeIcebergScanData.to_lazyframeNr   r   r   r   r   r   r<   r,   r,   r,   r-   rp     s   
 rp   c                   @  s,   e Zd ZU dZded< ded< d
ddZd	S )ru   z.Resolved parameters for reading via PyIceberg.r   rY   r   r=   r'   c                 C  s   | j S r   )rY   r*   r,   r,   r-   r<     s   z_PyIcebergScanData.to_lazyframeNr   r   r,   r,   r,   r-   ru     s
   
 ru   objr	   r'   c                 C  s8   t | trt|  dS | d urdt| j dS dS )NZREDACTED<z object>r@   )
isinstancedictfromkeyskeystyper   )r   r,   r,   r-   _redact_dict_values  s   r   r!   dict[str, str]c                 C  sF   i }|   D ]\}}t| }d ur|||< qd|vr |||< q|S )N.)items&ICEBERG_TO_OBJECT_STORE_CONFIG_KEY_MAPr   )r!   rV   krx   Ztranslated_keyr,   r,   r-   ro     s   
ro   zs3.endpointZaws_endpoint_urlzs3.access-key-idZaws_access_key_idzs3.secret-access-keyZaws_secret_access_keyzs3.session-tokenZaws_session_tokenz	s3.regionZ
aws_regionzs3.proxy-uri	proxy_urlzs3.connect-timeoutconnect_timeoutzs3.request-timeouttimeoutzs3.force-virtual-addressingZ aws_virtual_hosted_style_requestzadls.account-nameZazure_storage_account_namezadls.account-keyZazure_storage_account_keyzadls.sas-tokenZazure_storage_sas_keyzadls.tenant-idZazure_storage_tenant_idzadls.client-idZazure_storage_client_idzadls.client-secretZazure_storage_client_secretzadls.account-hostZazure_storage_authority_hostz
adls.tokenZazure_storage_tokenZbearer_tokentoken)zgcs.oauth2.tokenzhf.tokenzFinal[dict[str, str]]r   )r   r	   r'   r	   )r!   r   r'   r   )5
__future__r   r`   abcr   r   dataclassesr   	functoolsr   timer   typingr   r	   r
   r   Zpolars._reexportZ	_reexportrt   rZ   r   r   r   Zpolars.exceptionsr   rq   r   r   r   r   Z#polars.io.scan_options.cast_optionsr   rX   paZpyiceberg.schemarG   r   r   Zpolars._typingr   Zpolars.io.cloud._utilsr   Zpolars.lazyframe.framer   r   r   rp   ru   r   ro   r   r   r,   r,   r,   r-   <module>   s       $


	
