o
    1 iKl                     @   s  d dl Z d dlZd dlZd dlmZ d dlmZmZmZm	Z	 d dl
mZ er*d dlZdZdZe eZe add Zd	d
 Zdd Zdd ZdfddZdee	ed df  ddddfddZddde	ed df fddZd ed d!dddfd"d#ZeG d$d dZdgd&d'Zdhd)d*Zd!ddefd+d,Z d!ddefd-d.Z!did0d1Z"dhd2d3Z#dhd4d5Z$dhd6d7Z%	(	8		djd9d:Z&dkd<d=Z'dld?d@Z(	(	A		dmdBdCZ)dndEdFZ*	(	G		dodHdIZ+dJdKd!ed dLe,dMe,ddKf
dNdOZ-dJdKdPe,dLe,dMe,ddKf
dQdRZ.dJdKdLe,dMe,ddKfdSdTZ/dJdKdUddLe,dMe,de	dKe,e,f f
dVdWZ0dXe,de,fdYdZZ1dJdKd[e,d\e,ddKfd]d^Z2dpd`daZ3dJe4ddfdbdcZ5d!ddefdddeZ6dS )q    N)	dataclass)TYPE_CHECKINGListOptionalTuple)
is_in_test3RAY_DISABLE_CUSTOM_ARROW_JSON_OPTIONS_SERIALIZATION+RAY_DISABLE_CUSTOM_ARROW_DATA_SERIALIZATIONc                 C   s>   zdd l }W n
 ty   Y d S w t|  t|  t|  d S )Nr   )pyarrowModuleNotFoundError_register_arrow_data_serializer+_register_arrow_json_readoptions_serializer,_register_arrow_json_parseoptions_serializerserialization_contextpa r   l/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/ray/_private/arrow_serialization.py%_register_custom_datasets_serializers   s   r   c                    D   t jtddkrd S dd lm  | j jdd  fddd d S )N01r   c                 S   s   | j | jfS N)Zuse_threads
block_sizeoptsr   r   r   <lambda>:   s    z=_register_arrow_json_readoptions_serializer.<locals>.<lambda>c                    
    j |  S r   )ReadOptionsargsZpajsonr   r   r   ;      
 Zcustom_serializerZcustom_deserializer)osenvirongetr   pyarrow.jsonjson _register_cloudpickle_serializerr   r   r   r!   r   r   ,   s   

r   c                    r   )Nr   r   r   c                 S   s   | j | j| jfS r   )Zexplicit_schemaZnewlines_in_valuesZunexpected_field_behaviorr   r   r   r   r   M   s   z>_register_arrow_json_parseoptions_serializer.<locals>.<lambda>c                    r   r   )ParseOptionsr   r!   r   r   r   R   r"   r#   )r$   r%   r&   r   r'   r(   r)   r+   r*   r   r!   r   r   ?   s   

r   c                 C   s0   t jtddkrdS ddl}| |jt dS )ay  Custom reducer for Arrow data that works around a zero-copy slicing pickling
    bug by using the Arrow IPC format for the underlying serialization.

    Background:
        Arrow has both array-level slicing and buffer-level slicing; both are zero-copy,
        but the former has a serialization bug where the entire buffer is serialized
        instead of just the slice, while the latter's serialization works as expected
        and only serializes the slice of the buffer. I.e., array-level slicing doesn't
        propagate the slice down to the buffer when serializing the array.

        We work around this by registering a custom cloudpickle reducers for Arrow
        Tables that delegates serialization to the Arrow IPC format; thankfully, Arrow's
        IPC serialization has fixed this buffer truncation bug.

    See https://issues.apache.org/jira/browse/ARROW-10739.
    r   r   Nr   )r$   r%   r&   r	   r
   Z_register_cloudpickle_reducerTable_arrow_table_reducer   r   r   r   r   X   s   r   tpyarrow.Tablec                 C   s   g }| j D ]S}| | }zt|}W nA tyR } z5t|js$t r$|dt|jtvrBtjd| d|j ddd t	t|j t
| W  Y d}~  S d}~ww || qt|| jffS )a/  Custom reducer for Arrow Tables that works around a zero-copy slice pickling bug.
    Background:
        Arrow has both array-level slicing and buffer-level slicing; both are zero-copy,
        but the former has a serialization bug where the entire buffer is serialized
        instead of just the slice, while the latter's serialization works as expected
        and only serializes the slice of the buffer. I.e., array-level slicing doesn't
        propagate the slice down to the buffer when serializing the array.
        All that these copy methods do is, at serialization time, take the array-level
        slicing and translate them to buffer-level slicing, so only the buffer slice is
        sent over the wire instead of the entire buffer.
    See https://issues.apache.org/jira/browse/ARROW-10739.
    NzTFailed to complete optimized serialization of Arrow Table, serialization of column 'z
' of type z failed, so we're falling back to Arrow IPC serialization for the table. Note that this may result in slower serialization and more worker memory utilization. Serialization error:T)exc_info)Zcolumn_names_arrow_chunked_array_reduce	Exception_is_dense_uniontyper   _serialization_fallback_setloggerwarningadd_arrow_table_ipc_reduceappend_reconstruct_tableschema)r.   reduced_columnsZcolumn_namecolumnZreduced_columner   r   r   r-   q   s.   
r-   r=   pyarrow.Arraypyarrow.DataTyper<   zpyarrow.Schemareturnc                 C   s:   ddl }g }| D ]\}}|t|| q|jj||dS )zERestore a serialized Arrow Table, reconstructing each reduced column.r   Nr<   )r
   r:   _reconstruct_chunked_arrayr,   from_arrays)r=   r<   r   columnsZchunks_payloadtype_r   r   r   r;      s
   r;   cazpyarrow.ChunkedArrayPicklableArrayPayloadc                 C   s.   g }| j D ]}t|}|| q|| jfS )zCustom reducer for Arrow ChunkedArrays that works around a zero-copy slice
    pickling bug. This reducer does not return a reconstruction function, since it's
    expected to be reconstructed by the Arrow Table reconstructor.
    )chunksrI   
from_arrayr:   r4   )rH   Zchunk_payloadschunkZchunk_payloadr   r   r   r1      s
   


r1   rJ   rG   c                 C   s"   ddl }dd | D } || |S )z=Restore a serialized Arrow ChunkedArray from chunks and type.r   Nc                 S      g | ]}|  qS r   to_array).0rL   r   r   r   
<listcomp>       z._reconstruct_chunked_array.<locals>.<listcomp>)r
   Zchunked_array)rJ   rG   r   r   r   r   rD      s   rD   c                   @   sb   e Zd ZU dZded< eed< ed ed< eed< eed< ed  ed	< edddZdddZ	dS )rI   zPicklable array payload, holding data buffers and array metadata.

    This is a helper container for pickling and reconstructing nested Arrow Arrays while
    ensuring that the buffers that underly zero-copy slice views are properly truncated.
    rA   r4   lengthpyarrow.Bufferbuffers
null_countoffsetchildrenar@   rB   c                 C   s   t |S )a  Create a picklable array payload from an Arrow Array.

        This will recursively accumulate data buffer and metadata payloads that are
        ready for pickling; namely, the data buffers underlying zero-copy slice views
        will be properly truncated.
        )_array_to_array_payload)selfrY   r   r   r   rK      s   z PicklableArrayPayload.from_arrayc                 C   s   t | S )z7Reconstruct an Arrow Array from this picklable payload.)_array_payload_to_array)r[   r   r   r   rO      s   zPicklableArrayPayload.to_arrayNrY   r@   rB   rI   )rB   r@   )
__name__
__module____qualname____doc____annotations__intr   classmethodrK   rO   r   r   r   r   rI      s   
 	payloadc           	      C   s   ddl }dd | jD }|j| jr*t|dksJ t||\}}|j||S |j| jrPt|dkrPt|dksCJ t||\}}}|j	|||S t
| j|jrmt|dkscJ t||d }| j|S |jj| j| j| j| j| j|dS )	zHReconstruct an Arrow Array from a possibly nested PicklableArrayPayload.r   Nc                 S   rM   r   rN   )rP   Zchild_payloadr   r   r   rQ      rR   z+_array_payload_to_array.<locals>.<listcomp>         r4   rS   rU   rV   rW   rX   )r
   rX   typesis_dictionaryr4   lenZDictionaryArrayrE   is_mapZMapArray
isinstanceBaseExtensionTypeZ
wrap_arrayArrayfrom_buffersrS   rU   rV   rW   )	re   r   rX   indices
dictionaryoffsetskeysitemsstorager   r   r   r\      s,   
r\   rY   c                 C   s  ddl }t| jrtd|j| jrt| S t| jr!t| S t	| jr*t
| S |j| js8|j| jr<t| S |j| jrGt| S |j| jrRt| S |j| jr]t| S |j| jrht| S |j| jrst| S t| j|jr~t| S td| j)zSerialize an Arrow Array to an PicklableArrayPayload for later pickling.

    This function's primary purpose is to dispatch to the handler for the input array
    type.
    r   NzKCustom slice view serialization of dense union arrays is not yet supported.zUnhandled Arrow array type:)r
   r3   r4   NotImplementedErrorrj   Zis_null_null_array_to_array_payload_is_primitive!_primitive_array_to_array_payload
_is_binary_binary_array_to_array_payloadZis_listis_large_list_list_array_to_array_payloadZis_fixed_size_list'_fixed_size_list_array_to_array_payloadZ	is_struct_struct_array_to_array_payloadis_union_union_array_to_array_payloadrk   "_dictionary_array_to_array_payloadrm   _map_array_to_array_payloadrn   ro   !_extension_array_to_array_payload
ValueError)rY   r   r   r   r   rZ     s4   


rZ   c                 C   sP   ddl }|j| p'|j| p'|j| p'|j| p'|j| p'|j| S )zcWhether the provided Array type is primitive (boolean, numeric, temporal or
    fixed-size binary).r   N)r
   rj   
is_integerZis_floatingZ
is_decimal
is_booleanZis_temporalZis_fixed_size_binaryrG   r   r   r   r   rz   8  s   




rz   c                 C   s8   ddl }|j| p|j| p|j| p|j| S )z@Whether the provided Array type is a variable-sized binary type.r   N)r
   rj   Z	is_stringis_large_stringZ	is_binaryis_large_binaryr   r   r   r   r|   G  s   


r|   pyarrow.NullArrayc                 C   s   t | jt| dg| jdg dS )z.Serialize null array to PicklableArrayPayload.Nr   ri   )rI   r4   rl   rV   rY   r   r   r   ry   S  s   ry   c                 C   s   t | js
J | j|  }t|dksJ t||d }| jdkr-t|| jt| }nd}|d }|durDt|d | j| jt| }t| jt| ||g| jdg dS )zZSerialize primitive (numeric, temporal, boolean) arrays to
    PicklableArrayPayload.
    rf   r   Nrg   ri   )	rz   r4   rU   rl   rV    _copy_bitpacked_buffer_if_neededrW   _copy_buffer_if_neededrI   )rY   rU   
bitmap_bufdata_bufr   r   r   r{   `  s$   
r{   c                 C   s   t | js
J | j|  }t|dksJ t|| jdkr+t|d | jt| }nd}|d }t|| j| jt| \}}}|d }t|d||}t	| jt| |||g| jdg dS )zZSerialize binary (variable-sized binary, string) arrays to
    PicklableArrayPayload.
    rh   r   Nrg   rf   ri   )
r|   r4   rU   rl   rV   r   rW   _copy_offsets_buffer_if_neededr   rI   )rY   rU   r   
offset_bufdata_offsetdata_lengthr   r   r   r   r}     s(   

r}   c                 C   s   |   }t|dksJ t|| jdkr!t|d | jt| }nd}|d }t|| j| jt| \}}}| j||}t	| jt| ||g| jdt
|gdS )zCSerialize list (regular and large) arrays to PicklableArrayPayload.rg   r   Nri   )rU   rl   rV   r   rW   r   r4   valuesslicerI   rZ   )rY   rU   r   r   child_offsetchild_lengthchildr   r   r   r     s$   

r   pyarrow.FixedSizeListArrayc                 C   s   |   }t|dksJ t|| jdkr!t|d | jt| }nd}| jj| j }| jjt|  }| j||}t	| jt| |g| jdt
|gdS )z:Serialize fixed size list arrays to PicklableArrayPayload.rg   r   Nri   )rU   rl   rV   r   rW   r4   Z	list_sizer   r   rI   rZ   )rY   rU   r   r   r   r   r   r   r   r     s    
r   pyarrow.StructArrayc                    s~      }t|dksJ t| jdkr!t|d  jt }nd} fddt jjD }t jt |g jd|dS )z1Serialize struct arrays to PicklableArrayPayload.rg   r   Nc                       g | ]	}t  |qS r   rZ   fieldrP   ir   r   r   rQ         z2_struct_array_to_array_payload.<locals>.<listcomp>ri   )	rU   rl   rV   r   rW   ranger4   
num_fieldsrI   )rY   rU   r   rX   r   r   r   r     s   
r   pyarrow.UnionArrayc                    s   ddl }t jrJ   }t|dksJ t||d }|du s'J ||d }t||  jt } fddt jj	D }t
 jt ||g jd|dS )z0Serialize union arrays to PicklableArrayPayload.r   Nrg   c                    r   r   r   r   r   r   r   rQ     r   z1_union_array_to_array_payload.<locals>.<listcomp>ri   )r
   r3   r4   rU   rl   r   Zint8rW   r   r   rI   rV   )rY   r   rU   r   Ztype_code_bufrX   r   r   r   r     s"   r   pyarrow.DictionaryArrayc                 C   s4   t | j}t | j}t| jt| g | jd||gdS )z5Serialize dictionary arrays to PicklableArrayPayload.r   ri   )rZ   rr   rs   rI   r4   rl   rV   )rY   Zindices_payloadZdictionary_payloadr   r   r   r     s   

r   pyarrow.MapArrayc                 C   s4  ddl }|  }t|dksJ t|| jdkr%t|d | jt| }nd}|g}|d }t|| j| jt| \}}}t| |j	j
rS|| t| j||g}n9|  }t|dkscJ t||j| t| d ||g}	| j||}
| j||}t|	t|
t|g}t| jt| || jd|dS )z.Serialize map arrays to PicklableArrayPayload.r   Nrg   rf   ri   )r
   rU   rl   rV   r   rW   r   r4   rn   libZ	ListArrayr:   rZ   r   r   rp   rq   int32ru   rv   rI   )rY   r   rU   r   Znew_buffersr   r   r   rX   rt   ru   rv   r   r   r   r   /  sB   


r   pyarrow.ExtensionArrayc                 C   s(   t | j}t| jt| g | jd|gdS )Nr   ri   )rZ   rw   rI   r4   rl   rV   )rY   Zstorage_payloadr   r   r   r   g  s   
r   bufrT   rW   rS   c                 C   sT   ddl }|dur|j|rt| ||} | S |dur|jd nd}t| |||} | S )Copy buffer, if needed.r   N   rg   )r
   rj   r   r   Z	bit_width_copy_normal_buffer_if_needed)r   rG   rW   rS   r   Ztype_bytewidthr   r   r   r   u  s   r   
byte_widthc                 C   s2   || }|| }|dks|| j k r| ||} | S )r   r   )sizer   )r   r   rW   rS   byte_offsetbyte_lengthr   r   r   r     s
   r   c                 C   sV   |d }|d }t || d }|dks|| jk r)| ||} |dkr)t| ||} | S )z)Copy bit-packed binary buffer, if needed.r   r   )_bytes_for_bitsr   r   _align_bit_offset)r   rW   rS   
bit_offsetr   r   r   r   r   r     s   r   arr_typec           
      C   s   ddl }ddlm} |j|s"|j|s"|j|s"|j|r'| }n|	 }t
| |||d } |j||d d| g}|d  }|d  | }	|||}|j|ra|j|dd}| d } | ||	fS )zvCopy the provided offsets buffer, returning the copied buffer and the
    offset + length of the underlying data.
    r   Nrg   F)safe)r
   Zpyarrow.computeZcomputerj   r~   r   r   Zis_large_unicodeZint64r   r   rp   rq   Zas_pysubtractZis_int32castrU   )
r   r   rW   rS   r   ZpacZoffset_typert   r   r   r   r   r   r     s*   	





r   nc                 C   s   | d d@ S )zpRound up n to the nearest multiple of 8.
    This is used to get the byte-padded number of bits for n bits.
       ir   )r   r   r   r   r     s   r   r   r   c                 C   s>   ddl }|  }t|tj}||L }||tj}||S )z}Align the bit offset into the buffer with the front of the buffer by shifting
    the buffer and eliminating the offset.
    r   N)r
   Z
to_pybytesrc   
from_bytessys	byteorderto_bytesZ	py_buffer)r   r   r   r   bytes_Zbytes_as_intr   r   r   r     s   
r   tablec                 C   sd   ddl m} ddlm} | }||| jd}||  W d   n1 s&w   Y  t| ffS )a4  Custom reducer for Arrow Table that works around a zero-copy slicing pickling
    bug by using the Arrow IPC format for the underlying serialization.

    This is currently used as a fallback for unsupported types (or unknown bugs) for
    the manual buffer truncation workaround, e.g. for dense unions.
    r   )RecordBatchStreamWriter)BufferOutputStreamrC   N)pyarrow.ipcr   Zpyarrow.libr   r<   Zwrite_table_restore_table_from_ipcgetvalue)r   r   r   Zoutput_streamwrr   r   r   r9     s   r9   c                 C   s@   ddl m} || }| W  d   S 1 sw   Y  dS )z6Restore an Arrow Table serialized to Arrow IPC format.r   )RecordBatchStreamReaderN)r   r   Zread_all)r   r   readerr   r   r   r     s   
$r   c                 C   s   ddl }|j| o| jdkS )z1Whether the provided Arrow type is a dense union.r   NZdense)r
   rj   r   moder   r   r   r   r3     s   r3   )r.   r/   )re   rI   rB   r@   r]   )rY   r   rB   rI   )rY   r   rB   rI   )rY   r   rB   rI   )rY   r   rB   rI   )rY   r   rB   rI   )rY   r   rB   rI   )rY   r   rB   rI   )r   r/   )7loggingr$   r   dataclassesr   typingr   r   r   r   Zray._private.utilsr   r
   r   r	   	getLoggerr^   r6   setr5   r   r   r   r   r-   r;   r1   rD   rI   r\   rZ   boolrz   r|   ry   r{   r}   r   r   r   r   r   r   r   rc   r   r   r   r   r   r   r9   bytesr   r3   r   r   r   r   <module>   s   

.



#
")



 "


!

8




%

