o
    iθ                     @  s  d dl mZ d dlZd dlmZmZmZ d dlmZmZm	Z	m
Z
 d dlmZ d dlmZmZ d dlmZ d dlmZmZ d dlmZ d dlm  mZ d d	lmZ d d
lm Z m!Z!m"Z"m#Z#m$Z$ d dlm%Z& d dlm'Z( d dlm)Z* d dl+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3 d dl4m5Z5m6Z6m7Z7m8Z8 d dl9m:Z:m;Z;m<Z<m=Z=m>Z>m?Z?m@Z@mAZAmBZBmCZC d dlDmEZEmFZF d dlGmHZH eIeJ d dlKmLZL W d   n1 sw   Y  erd dlmMZMmNZNmOZO d dlmPZPmQZQ d dlKmRZR d dlSmTZTmUZUmVZVmWZW dZX	dddddddd(d)ZYdddd*dd0d1ZZ	ddd2dd7d8Z[		ddd9dd=d>Z\ddddd?ddBdCZ]dd9ddFdGZ^	dddde:ddHddMdNZ_edddOddRdSZ`e`aebddTddVdWZcddZd[Zde`aeeddTdd]d^Zfe`aee`aegdd`daZhe`aeiddbdcZjddedfZkddhdiZldd9ddjdkZmddldmZnddqdrZoddvdwZp	ddddde:ddxdd|d}ZqdddZr	ddddddddddZsdddZt	ddddddddZu	dddddddddZv		ddd9dddZw	dddddddZxdS )    )annotationsN)	GeneratorMappingSequence)datedatetimetime	timedelta)singledispatch)islicezip_longest)
itemgetter)TYPE_CHECKINGAny)	functions)_NUMPY_AVAILABLE_PYARROW_AVAILABLE_check_for_numpy_check_for_pandasdataclasses)numpy)pandas)pyarrow)contains_nestedget_first_non_noneis_namedtupleis_pydantic_model$is_simple_numpy_backed_pandas_seriesis_sqlalchemy_row	nt_unpacktry_get_type_hints)_is_generatorarrlenissue_warningparse_version)
N_INFER_DEFAULTCategoricalDurationEnumStringStructUnknownis_polars_dtypeparse_into_dtypetry_parse_into_dtype)DataOrientationWarning
ShapeError)thread_pool_size)PyDataFrame)CallableIterableMutableMapping)	DataFrameSeries)PySeries)OrientationPolarsDataTypeSchemaDefinition
SchemaDict  TF)schema_overridesstrictnan_to_nullallow_multithreadeddataHMapping[str, Sequence[object] | Mapping[str, Sequence[object]] | Series]schemaSchemaDefinition | Noner>   SchemaDict | Noner?   boolr@   rA   returnr2   c                  s  t tr!r!tfddD sd}t|fddD t d\}|s2t}rtrt fdd	 D }|dkrz4d	d
l
}	t }
|	j|
}tt||fddt ddW d
   n1 svw   Y  W n ty   tdd Y S w srfdd|D }ndd td	 D }t||dd}t|}r| t	 krt||d}|S )z7Construct a PyDataFrame from a dictionary of sequences.c                 3      | ]}| v V  qd S N .0colrD   rK   [/home/app/Keep/.python/lib/python3.10/site-packages/polars/_utils/construction/dataframe.py	<genexpr>W       zdict_to_pydf.<locals>.<genexpr>z>the given column-schema names do not match the data dictionaryc                   s   i | ]}| | qS rK   rK   rL   rB   rK   rP   
<dictcomp>Z       z dict_to_pydf.<locals>.<dictcomp>)lookup_namesr>   c                 3  sN    | ]"}t  o!t|o!t|tjo!t|tko!o!|jtjtj	fv V  qd S rJ   )
intr   
isinstancenpndarraylen"_MIN_NUMPY_SIZE_FOR_MULTITHREADINGdtypeZfloat32Zfloat64rM   val)rA   r@   rK   rP   rQ   d   s     



   r   Nc                   s0   t | d tjrtj| d | d  dS | d S )N   r   r@   )rX   rY   rZ   plr7   )trb   rK   rP   <lambda>   s   zdict_to_pydf.<locals>.<lambda>Tr?   F)rB   rD   r>   r?   r@   rA   c              	     s(   g | ]}t j|g | d jqS )r]   r?   r@   rc   r7   get_s)rM   name)r@   r>   r?   rK   rP   
<listcomp>   s    z dict_to_pydf.<locals>.<listcomp>c                 S     g | ]}|j qS rK   )rj   rM   srK   rK   rP   rl      s    )r>   r?   r@   )columns	from_dictr>   r?   )rX   r   all
ValueError_unpack_schemakeyslistr   sumvaluesZmultiprocessing.dummyr1   dummyZPooldictzipmapitemsFileNotFoundErrordict_to_pydf_expand_dict_values_handle_columns_argr2   dtypes_post_apply_columns)rB   rD   r>   r?   r@   rA   msgcolumn_namesZcount_numpymultiprocessingZ	pool_sizepooldata_seriespydfrK   )rA   rB   r@   rD   r>   r?   rP   r   L   sz   








r   )r>   
n_expectedrV   r   
int | NonerV   Iterable[str] | Nonetuple[list[str], SchemaDict]c                  sd  ddd 	dd fdd}||}| s'|dur!dd t |D ng }||fS t| tr7t| }t|  } n*g }t| D ]#\}}t|trW| oL||v}	|	rTd| n|}n|d }|| q=|durqt||krqd}
t	|
|r}dd t
||D nd}i }| D ]#}t|trq|\}}|du rq |}|r|||n|}|||< q|r|| ||fS )z
    Unpack column names and create dtype lookup.

    Works for any (name, dtype) pairs or schema dict input,
    overriding any inferred dtypes with explicit dtypes if supplied.
    r]   r   rH   r:   c                 S  s   t | ddr| S t| S )z1Parse non-Polars data types as Polars data types.T)Zinclude_unknown)r,   r-   )r]   rK   rK   rP   _normalize_dtype   s   z(_unpack_schema.<locals>._normalize_dtypeNr>   rF   dict[str, PolarsDataType]c                   s"   | du ri S  fdd|   D S )zCParse schema overrides as a dictionary of name to Polars data type.Nc                   s   i | ]	\}}| |qS rK   rK   )rM   rk   r]   r   rK   rP   rT      s    zC_unpack_schema.<locals>._parse_schema_overrides.<locals>.<dictcomp>)r~   r>   r   rK   rP   _parse_schema_overrides   s
   
z/_unpack_schema.<locals>._parse_schema_overridesc                 S  s   g | ]}d | qS )column_rK   rM   irK   rK   rP   rl      rU   z"_unpack_schema.<locals>.<listcomp>r   r   z)data does not match the number of columnsc                 S  s   i | ]\}}|d ur||qS rJ   rK   )rM   rN   rk   rK   rK   rP   rT      s
    z"_unpack_schema.<locals>.<dictcomp>)r]   r   rH   r:   rJ   )r>   rF   rH   r   )rangerX   r   rw   r~   	enumeratestrappendr[   r0   r   ri   update)rD   r>   r   rV   r   rp   r   r   rN   Zunnamedr   lookupZcolumn_dtypesrk   r]   rK   r   rP   ru      sP   


	



ru   )rq   list[PySeries]rp   Sequence[str] | Nonerq   c                  s   |du r| S | sdd |D S t | t |kr(dt | dt |  d}t||rEdd | D  t fd	d
|D rE fdd|D S t|D ]\}}|| |  krd| |  | |< | | | qI| S )z*Rename data according to columns argument.Nc                 S  s   g | ]	}t j|d jqS )rk   )rc   r7   rj   rM   crK   rK   rP   rl         z'_handle_columns_arg.<locals>.<listcomp>zdimensions of columns arg () must match data dimensions ()c                 S  s   i | ]}|  |qS rK   r   rn   rK   rK   rP   rT     rU   z'_handle_columns_arg.<locals>.<dictcomp>c                 3  rI   rJ   rK   rL   Z
series_maprK   rP   rQ     rR   z&_handle_columns_arg.<locals>.<genexpr>c                   s   g | ]} | qS rK   rK   rL   r   rK   rP   rl         )r[   rt   rs   r   rk   clonerename)rB   rp   rq   r   r   r   rK   r   rP   r     s"   r   rf   r   structsdict[str, Struct] | Nonec                C  s  |   |  }}t|p||d\}}g }||kr3t|t|k r.||dt| kr.|}n| | g }	t|D ]\}
}||}||
 }|t  krP|kran n|	t	
|jt|dj q9|t  krk|kr|n n|	t	
|j||dj q9|r|| }r||kr|	t	
|j||dj q9|dur|tkr||kr| r|tkr|tkrt	
|jj||dj}|	| q9|	t	
|j||dj q9|	s|r|  }|	r||	}|r|dd |D }|jddd} | S )zGApply 'columns' param *after* PyDataFrame creation (if no alternative).r   Nrf   c                 S  s   g | ]}t |jqS rK   )FrN   _pyexprrL   rK   rK   rP   rl   S      z'_post_apply_columns.<locals>.<listcomp>z	in-memory)ZengineZlambda_post_opt)rp   r   ru   r[   Zset_column_namesr   ri   r&   r   r   rN   castr   r(   r+   Zis_temporalr'   r)   r   strptimeZlazyZwith_columnsselectZcollect)r   rp   r   r>   r?   Zpydf_columnsZpydf_dtypesr   Zcolumn_subsetZcolumn_castsr   rN   r]   Z
pydf_dtypestructZtemporal_castZpyldfrK   rK   rP   r   )  sB   	
$


r   )r>   r?   orderr@   r   dict[str, Series]c                  s  i | r=t dd |  D rd}t||pi }t| ||d} tdd |  D   dkr|  D ]\}}||}	t|tr|	t	krt
j||djdkrw dkrwtdd j D rw fd	d
| D }
t
|
|}n|}||< q4t|t
jr||jkr||n|}|	r|	|jkr|j|	|d}||< q4t|dust|rt
j|||	||d|< q4|du st|ttttttttfrtj | |	dd!||< q4t
j||g  |	|d|< q4nPtdd |  D r|  D ]\}}t
j|||||d|< qn,tdd |  D r=|  D ]\}}t
j|t|r/|n|g|||d|< q!|rPt"|krPfdd
|D S S )zCExpand any scalar values in dict data (propagate literal as array).c                 s  s    | ]	}t |tjV  qd S rJ   )rX   rc   Exprr^   rK   rK   rP   rQ   d      z&_expand_dict_values.<locals>.<genexpr>zpassing Expr objects to the DataFrame constructor is not supported

Hint: Try evaluating the expression first using `select`, or if you meant to create an Object column containing expressions, pass a list of Expr objects instead.rf   c                 s  s    | ]	}t |p	d V  qdS r   Nr"   r^   rK   rK   rP   rQ   o  r   r   ra   c                 s  s    | ]}|   V  qd S rJ   )Z	is_nestedrM   drK   rK   rP   rQ   x  s    c                   s(   i | ]\}}|| j | d  dqS )ra   )n)Zextend_constant)rM   nmv)	array_lenvdfrK   rP   rT   z  s    z'_expand_dict_values.<locals>.<dictcomp>Nrk   ry   r]   r?   r@   T)r]   eager)rk   ry   r]   r?   c                 s  s    | ]	}t |d kV  qdS r   r   r^   rK   rK   rP   rQ     r   )ry   r]   r?   c                 s  s    | ]	}t |d u V  qd S rJ   r   r^   rK   rK   rP   rQ     r   c                   s   i | ]}|  |qS rK   )poprL   )updated_datarK   rP   rT     r   )#anyry   	TypeError_expand_dict_datamaxr~   ri   rX   r{   r*   rc   r6   heightrs   rD   Z	to_structr7   rk   r   r]   r   r"   r!   rW   floatr   rG   r   r   r   r	   r   repeataliasrw   )rB   r>   r?   r   r@   r   r   rk   r_   r]   Zs_valsstro   rK   )r   r   r   rP   r   Y  s   	





+r   r   r<   c                C  sB   i }|   D ]\}}t|rtj|||||dn|||< q|S )z
    Expand any unsized generators/iterators.

    (Note that `range` is sized, and will take a fast-path on Series init).
    rf   )r~   r!   rc   r7   ri   )rB   r   r?   Zexpanded_datark   r_   rK   rK   rP   r     s   r   )r>   r?   orientinfer_schema_lengthr@   Sequence[Any]r   Orientation | Noner   c             
   C  s.   | s	t i ||dS tt| | ||||||dS )z(Construct a PyDataFrame from a sequence.)rD   r>   rB   rD   r>   r?   r   r   r@   )r   _sequence_to_pydf_dispatcherr   r   rK   rK   rP   sequence_to_pydf  s   r   )r?   r@   first_elementr   c                C  s  |||||||d}d}	t | tr!t}
dd |D }|d } d}	nMt | tjr*t}
nDt| r7t | tjr7t	}
n7t
| rIt | tjtjtjfrIt}
n%t| rQt}
nt| rXt}
nt| r_t}
nt | trlt | tslt}
nt}
|	rxtt| |
 | |d< |
di |S )	Nr   Tc                 S     g | ]}t |qS rK   )rw   )rM   rowrK   rK   rP   rl     r   z0_sequence_to_pydf_dispatcher.<locals>.<listcomp>r   Fr   rK   )rX   r   _sequence_of_sequence_to_pydfrc   r7   _sequence_of_series_to_pydfr   rY   rZ   _sequence_of_numpy_to_pydfr   pdIndexZDatetimeIndex_sequence_of_pandas_to_pydfr   is_dataclass _sequence_of_dataclasses_to_pydfr   $_sequence_of_pydantic_models_to_pydfr   _sequence_of_tuple_to_pydfr   r   _sequence_of_elements_to_pydfr   registertype)r   rB   rD   r>   r?   r   r   r@   Zcommon_paramsZregister_with_singledispatchZto_pydfrK   rK   rP   r     sF   



r   rb   $Sequence[Any] | np.ndarray[Any, Any]c                  s  |d u r(|d u rd}nt |t | kot |t |k}|rdnd}|r(tdt |dkrt|t | d\ r>t ni }	d}
|	 D ]#\}}|ttfv rUt|	|< qF|
si|	 t
tfv ritt| |d jt}
qF|
r~dd |D }tj|d d |d}n
tj||	pd |d	} srt| d
}|S |dkrt|t |d\  fddt|D }t|S d|}t|)NrN   r   zRow orientation inferred during DataFrame construction. Explicitly specify the orientation by passing `orient="row"` to silence this warning.r>   r   Fc                 S  r   rK   )r   r   rK   rK   rP   rl   R  r   z1_sequence_of_sequence_to_pydf.<locals>.<listcomp>rD   r>   r?   r   rD   r   rr   c              	     s4   g | ]\}}t j | | | d jqS rg   rh   )rM   r   elementr   r@   r>   r?   rK   rP   rl   j  s    2`orient` must be one of {'col', 'row', None}, got )r[   r#   r/   ru   _include_unknownsr~   r&   r(   r)   	base_typer+   r*   r   getattr	__class__r   r2   
from_dicts	from_rowsr   r   rt   )r   rB   rD   r>   r?   r   r   r@   Zis_row_orientedZlocal_schema_overrideunpack_nestedrN   tpdictsr   r   r   rK   r   rP   r   !  st   






r   r7   kwargsc                K  s   dd |D }t |p||t|d\}}g }t|D ]*\}	}
|
js(|
||	 }
|||	 }|r>||
jkr>|
j||dd}
||
j	 qt
||d}t|S )Nc                 S  rm   rK   r   rn   rK   rK   rP   rl         z/_sequence_of_series_to_pydf.<locals>.<listcomp>r   Fr?   Zwrap_numericalrp   )ru   r[   r   rk   r   ri   r]   r   r   rj   r   r2   )r   rB   rD   r>   r?   r   Zseries_namesr   r   r   ro   	new_dtyperK   rK   rP   r   {  s    	
r   tuple[Any, ...]c          	   
   C  sx   t | js	t| r0|d u r*| j}t| dd }|r*t|t|kr*dd | j D }|d u r0d}t| |||||||dS )N__annotations__c                 S  s   g | ]
\}}|t |fqS rK   )r.   )rM   rk   r   rK   rK   rP   rl     s    
z._sequence_of_tuple_to_pydf.<locals>.<listcomp>r   r   )	r   r   r   _fieldsr   r[   r   r~   r   )	r   rB   rD   r>   r?   r   r   r@   r   rK   rK   rP   r     s(   r   dict[str, Any]c          
      K  sB   t ||d\}}|rt||pt|nd }tj|||||d}	|	S )Nr   )r?   r   )ru   r   rw   r2   r   )
r   rB   rD   r>   r?   r   r   r   Zdicts_schemar   rK   rK   rP   _sequence_of_dict_to_pydf  s   
r   c                K  sJ   t ||dd\}}tj|d |||d |djg}t||d}t|S )Nra   r   r   rf   r   )ru   rc   r7   ri   rj   r   r2   )r   rB   rD   r>   r?   r   r   r   rK   rK   rP   r     s   

r   np.ndarray[Any, Any]c                 K  s*   | j dkrt| fi |S t| fi |S )Nra   )ndimr   r   )r   r   rK   rK   rP   r     s   
r   1pd.Series[Any] | pd.Index[Any] | pd.DatetimeIndexc                K  s   |d u rg }n	t ||dd\}}|pi }g }t|D ]0\}}	|r$|| n|	j}
tj|
|	d}||
}|d urE|| krE|j||dd}|| qt	|S )Nra   r   )rk   ry   Fr   )
ru   r   rk   plcZpandas_to_pyseriesri   r]   r   r   r2   )r   rB   rD   r>   r?   r   r   r   r   ro   rk   Zpyseriesr]   rK   rK   rP   r     s   	

r   c                  s   ddl m m t| ||dd\}}}}	|r* fdd|D }
tj|
dd||d}nfdd|D }tj||	p9d|d	}|	rQd
d |	 D }t|||||d}|S )z-Initialize DataFrame from Python dataclasses.r   asdictastupleNmodel_fieldsc                      g | ]} |qS rK   rK   rM   mdr  rK   rP   rl   2  r   z4_sequence_of_dataclasses_to_pydf.<locals>.<listcomp>r   c                   r  rK   rK   )rM   dc)r  rK   rP   rl   ;  r   r   c                 S      i | ]\}}t |tr||qS rK   rX   r*   rM   r   r   rK   rK   rP   rT   C       z4_sequence_of_dataclasses_to_pydf.<locals>.<dictcomp>rf   )	r   r  r  $_establish_dataclass_or_model_schemar2   r   r   r~   r   )r   rB   rD   r>   r   r?   r   r   r   	overridesr   r   rowsr   rK   r  rP   r     s<   
r   c                  s  ddl }t|jdk }t|r| jn| jj}	t| |||	\}
}}}|
r>|r,dd |D ndd |D }tj	|dd||d}n-t
|	dkrZt|	   fd	d|D }tj|||d
}ndd |D }tj	||d||d}|rdd | D }t|||||d}|S )z1Initialise DataFrame from pydantic model objects.r   N)   r   c                 S     g | ]}|  qS rK   )r{   r  rK   rK   rP   rl   j  r   z8_sequence_of_pydantic_models_to_pydf.<locals>.<listcomp>c                 S  s   g | ]}|j d dqS )python)mode)Z
model_dumpr  rK   rK   rP   rl   l  r   r   2   c                   s   g | ]} |j qS rK   __dict__r  Z
get_valuesrK   rP   rl   y  rU   r   c                 S  rm   rK   r  r  rK   rK   rP   rl     r   c                 S  r  rK   r  r  rK   rK   rP   rT     r  z8_sequence_of_pydantic_models_to_pydf.<locals>.<dictcomp>rf   )pydanticr$   __version__rw   Z
__fields__r   r  r  r2   r   r[   r   r   r~   r   )r   rB   rD   r>   r   r?   r   r  Zold_pydanticr  r   r   r  r   r   r  r   rK   r  rP   r   K  s^   

r   r  list[str] | None.tuple[bool, list[str], SchemaDict, SchemaDict]c           
        s&  ddl m} d}|rt|d\}fdd|D }n0g }fddt| j D }r4| nsJt||   fdd| D  }n|| D ]'\}}	|	tt	fv r_t
||< qP|sw|	 ttfv rwtt| |d	rstnt j}qPrtt|krtt| d
d}|||fS )zLShared utility code for establishing dataclasses/pydantic model cols/schema.r   r	  Fr   c                   s   i | ]	}|  |tqS rK   ri   r+   rL   r   rK   rP   rT     r   z8_establish_dataclass_or_model_schema.<locals>.<dictcomp>c                   s4   i | ]\}} r| v rn|d kr|t |ptqS )	__slots__)r.   r+   )rM   rN   r   r  rK   rP   rT     s    c                   s   i | ]\}}| v r||qS rK   rK   )rM   r   r   )	dc_fieldsrK   rP   rT     s    NTrf   )r   r  ru   r    r   r~   r   setr&   r(   r)   r   r+   r*   r   r   r   r   r[   r{   r|   ry   )
r   rD   r>   r  r  r   r   r  rN   r   rK   )r   r  r>   rP   r    s>   




r  colsSequence[str]#MutableMapping[str, PolarsDataType]c                   s    fdd|D S )z7Complete partial schema dict by including Unknown type.c                   s   i | ]}|  |tptqS rK   r  rL   rO   rK   rP   rT     s    z%_include_unknowns.<locals>.<dictcomp>rK   )rD   r"  rK   rO   rP   r     s   
r   )r>   r?   r   
chunk_sizer   rechunkIterable[Any]r%  r&  c                  s  |}g  i |durt |d\ n
rt |d\}	t| ts&t| } |dkrJ r9r9fddt D t fddt| D jS dfdd}
d}d}|r\|}n re|t  }nd}d}du rodnt	|ptd}	 t
t| |}|sn2|
||}|du r|}|st
|j }||kr|j }dkr||  }}n|j|dd |d7 }qw|du r|
g |}|dkr|r| }|jS )z3Construct a PyDataFrame from an iterable/generator.Nr   rN   c                   s   i | ]\}}|  |tqS rK   r  )rM   idxrN   r   rK   rP   rT     s    z$iterable_to_pydf.<locals>.<dictcomp>c                   s:   i | ]\}} r | nd | t j||dqS )r   )r]   r?   )rc   r7   ri   )rM   r(  Zcoldata)r   dtypes_by_idxr?   rK   rP   rT     s    ry   	list[Any]rD   rE   rH   r6   c                   s   t j| |d dS )Nr   )rB   rD   r?   r   r   r>   )rc   r6   )ry   rD   )r   r>   r?   rK   rP   to_frame_chunk  s   z(iterable_to_pydf.<locals>.to_frame_chunkr   i@B r=   T)Zin_placera   )ry   r*  rD   rE   rH   r6   )ru   rX   r   iterr   rc   r6   _dfr[   r   rw   r   rD   r~   widthZvstackr&  )rB   rD   r>   r?   r   r%  r   r&  Zoriginal_schema_r+  Zn_chunksZn_chunk_elemsZadaptive_chunk_sizedfry   Zframe_chunk	n_columnsrK   )r   r)  r   r>   r?   rP   iterable_to_pydf  sr   

	


r2  pd.DataFrameinclude_indexNonec                C  s   dd | j D }|rdd | jjD nt }t|t| j k }|r+t|t| jjk nd}|s1|r7d}t|||@ }t|dkrGd}t|dS )	z:Check pandas dataframe columns can be converted to polars.c                 S     h | ]}t |qS rK   r   rL   rK   rK   rP   	<setcomp>*  r   z(_check_pandas_columns.<locals>.<setcomp>c                 S  r6  rK   r7  )rM   r(  rK   rK   rP   r8  ,  r   Fz|Pandas dataframe contains non-unique indices and/or column names. Polars dataframes require unique string names for columns.r   z1Pandas indices and column names must not overlap.N)rp   indexnamesr!  r[   rt   )rB   r4  Zstringified_colsZstringified_indexZnon_unique_colsZnon_unique_indicesr   Zoverlapping_cols_and_indicesrK   rK   rP   _check_pandas_columns(  s   r;  )r>   r?   r&  r@   r4  c                  s  t  |d |ot  }|s= jd dkrt jd S t fdd jD r=tj fdd jD ||||dj	S t
sEd	}t|i }	 jd }
|re jjD ]}tj j|||
d
|	t|< qR  D ]\}}tj|||
d
|	t|< qit|	}t|||||dS )z0Construct a PyDataFrame from a pandas DataFrame.)r4  ra   r   c                 3  s    | ]	}t  | V  qd S rJ   )r   rL   rS   rK   rP   rQ   S  r   z!pandas_to_pydf.<locals>.<genexpr>c                   s   i | ]}t | |  qS rK   )r   Zto_numpyrL   rS   rK   rP   rT   V  s    z"pandas_to_pydf.<locals>.<dictcomp>)rD   r?   r>   r@   zpyarrow is required for converting a pandas dataframe to Polars, unless each of its columns is a simple numpy-backed one (e.g. 'int64', 'bool', 'float32' - not 'Int64'))r@   length)rD   r>   r?   r&  )r;  _pandas_has_default_indexshaper2   empty_with_heightrs   rp   rc   r6   r-  r   ImportErrorr9  r:  r   Zpandas_series_to_arrowZget_level_valuesr   r~   patablearrow_to_pydf)rB   rD   r>   r?   r&  r@   r4  Zconvert_indexr   Z
arrow_dictr<  ZidxcolZcol_idxZcol_dataZarrow_tablerK   rS   rP   pandas_to_pydf@  sP   


rD  r0  c                 C  s   ddl m} | jj}t|dks|dgdgfvrdS | j|dt| ddr)dS t| jjd	oAt	| j
 tt| k S )
zFIdentify if the pandas frame only has a default (or equivalent) index.r   )
RangeIndexra   N F)startstopstepTrW   )Zpandas.core.indexes.rangerE  r9  r:  r[   equalsr   r]   
startswithrG   Zsort_valuesrY   Zarangers   )r0  rE  Z
index_colsrK   rK   rP   r=    s   r=  )r>   r?   r&  pa.Table | pa.RecordBatchc          
   
   C  s   t |p| jj|d\}}z|| jjkr| |} W n tjy- } zd}t||d}~ww t| tjr8| g}n| j	dkrCt
| jS |  }t
|| j}	|rT|	 }	|dur`t|	|||d}	|	S )z;Construct a PyDataFrame from an Arrow Table or RecordBatch.r   z4dimensions of columns arg must match data dimensionsNr   rr   )ru   rD   r:  Zrename_columnsrA  ZArrowInvalidrt   rX   ZRecordBatchZnum_columnsr2   r?  Znum_rowsZ
to_batchesZfrom_arrow_record_batchesr&  r   )
rB   rD   r>   r?   r&  r   er   Zbatchesr   rK   rK   rP   rC    s8   	



rC  )r>   r   r?   r@   c                  s  j tdkjjdur,d\}}tjj}t||D ]}| j q|s+|}ndg }}dkr8dntdkrAdntdkrd dkrTtd S |du rc|du rcd d}nf|du r|durt|}	|	d kr|	d krd	}d nGjd
 rd d krd	}|	n5d}d n.|dkrd n%|d	krd nd|}
t|
dkrd}
t|
d }
t|
|durt|krt| }	dkrd|	 d d}
t|
|	t	|d\ |rfddt
 |ddD }nRdkrdkrg }nEtdkr0tj d  d djg}n*|dkrH fddtD }n fddtD }t| d}t|S )zMConstruct a PyDataFrame from a NumPy ndarray (including structured ndarrays).r  N)TrN   F)r   r   ra   r   rN   ZF_CONTIGUOUSr   rK   z3cannot create DataFrame from zero-dimensional arrayzJcannot create DataFrame from array with more than two dimensions; shape = zdimensions of `schema` (r   r   r   c              	     s0   g | ]\}}t j| | |d jqS )r   rh   )rM   series_nameZrecord_name)rB   r@   r>   r?   rK   rP   rl     s    z!numpy_to_pydf.<locals>.<listcomp>Trf   r   c              	     sX   g | ](}t j | rd krd  d krndd|f  | djqS )ra   Nr   rh   r   r   rB   r1  r@   r>   r>  r?   Ztwo_drK   rP   rl   (  s    
c              	     sP   g | ]$}t j | rd krd  d krn|  | djqS )ra   r   rh   r   rO  rK   rP   rl   7  s    
"r   )r>  r[   r]   r:  rw   r2   r?  flagsrt   ru   r|   rc   r7   ri   rj   r   r   )rB   rD   r>   r   r?   r@   Zstructured_arrayZrecord_namesr   Zn_schema_colsr   r   rK   rO  rP   numpy_to_pydf  s   












rQ  c                C  s   |du r|du rt | jgS | jg}dd |D }t|p||dd\}}|r?tt| }|| jkr?|d j||dd|d< t||d	}t |S )
z-Construct a PyDataFrame from a Polars Series.Nc                 S  r  rK   r   rn   rK   rK   rP   rl   T  r   z"series_to_pydf.<locals>.<listcomp>ra   r   r   Fr   r   )	r2   rj   ru   nextr,  ry   r]   r   r   )rB   rD   r>   r?   r   rN  r   r   rK   rK   rP   series_to_pydfH  s   



rS  rr   r6   c          
      C  s   |du r|du r| j  S dd | D }t|p| j|d\}}|r?| j}| D ]\}}||| kr>|| j||dd||< q(tt|	 |d}	t
|	S )z:Construct a PyDataFrame from an existing Polars DataFrame.Nc                 S  s   i | ]}|j |jqS rK   )rk   rj   r   rK   rK   rP   rT   n  rU   z%dataframe_to_pydf.<locals>.<dictcomp>r   Fr   r   )r-  r   ru   rp   rD   r~   r   r   rw   ry   r2   )
rB   rD   r>   r?   r   r   Zexisting_schemark   r   Zseries_colsrK   rK   rP   dataframe_to_pydfc  s    



rT  rJ   )rB   rC   rD   rE   r>   rF   r?   rG   r@   rG   rA   rG   rH   r2   )
rD   rE   r>   rF   r   r   rV   r   rH   r   )rB   r   rp   r   rq   rG   rH   r   )NN)r   r2   rp   rE   r   r   r>   rF   r?   rG   rH   r2   )rB   rC   r>   rF   r?   rG   r   r   r@   rG   rH   r   )rB   rC   r   r<   r?   rG   rH   rC   )rB   r   rD   rE   r>   rF   r?   rG   r   r   r   r   r@   rG   rH   r2   )r   r   rB   r   rD   rE   r>   rF   r?   rG   r   r   r   r   r@   rG   rH   r2   )r   r   rB   r   rD   rE   r>   rF   r?   rG   r   r   r   r   r@   rG   rH   r2   )r   r7   rB   r   rD   rE   r>   rF   r?   rG   r   r   rH   r2   )r   r   rB   r   rD   rE   r>   rF   r?   rG   r   r   r   r   r@   rG   rH   r2   )r   r   rB   r   rD   rE   r>   rF   r?   rG   r   r   r   r   rH   r2   )r   r   rB   r   rD   rE   r>   rF   r?   rG   r   r   rH   r2   )r   r   r   r   rH   r2   )r   r   rB   r   rD   rE   r>   rF   r?   rG   r   r   rH   r2   )r   r   rB   r   rD   rE   r>   rF   r   r   r?   rG   r   r   rH   r2   )
r   r   rD   rE   r>   rF   r  r  rH   r  )rD   r<   r"  r#  rH   r$  )rB   r'  rD   rE   r>   rF   r?   rG   r   r   r%  r   r   r   r&  rG   rH   r2   )rB   r3  r4  rG   rH   r5  )rB   r3  rD   rE   r>   rF   r?   rG   r&  rG   r@   rG   r4  rG   rH   r2   )r0  r3  rH   rG   )rB   rL  rD   rE   r>   rF   r?   rG   r&  rG   rH   r2   )rB   r   rD   rE   r>   rF   r   r   r?   rG   r@   rG   rH   r2   )
rB   r7   rD   rE   r>   rF   r?   rG   rH   r2   )
rB   r6   rD   rE   r>   rF   r?   rG   rH   r2   )y
__future__r   
contextlibcollections.abcr   r   r   r   r   r   r	   	functoolsr
   	itertoolsr   r   operatorr   typingr   r   Zpolars._reexportZ	_reexportrc   Zpolars._utils.construction_utilsconstructionr   Zpolarsr   r   Zpolars._dependenciesr   r   r   r   r   r   rY   r   r   r   rA  Z polars._utils.construction.utilsr   r   r   r   r   r   r   r    Zpolars._utils.variousr!   r"   r#   r$   Zpolars.datatypesr%   r&   r'   r(   r)   r*   r+   r,   r-   r.   Zpolars.exceptionsr/   r0   Zpolars.metar1   suppressr@  Zpolars._plrr2   r3   r4   r5   r6   r7   r8   Zpolars._typingr9   r:   r;   r<   r\   r   ru   r   r   r   r   r   r   r   rw   r   r   tupler   r{   r   r   r   r   r   r   r   r  r   r2  r;  rD  r=  rC  rQ  rS  rT  rK   rK   rK   rP   <module>   s   (
0j\3ZB

Y
%


$
/
F
/
^
@. 