o
    1 i0p                     @   s4  d dl Z d dlZd dlZd dlmZmZ d dlmZ d dlm	Z	m
Z
mZmZmZmZmZmZmZmZmZ d dlZd dlZd dlZd dlmZmZ d dlmZ d dlmZ d dl m!Z! e	r|d dl"Z"d dlZd d	l#m$Z$ d d
l%m&Z& d dl'm(Z( d dl)m*Z* edddZ+edddZ,edZ-edZ.ed Z/ee0ddf Z1ed Z2ed Z3e4e5Z6e!G dd deZ7e!G dd de8eZ9eddee8ej:f f Z;ee2ej:f Z<e0Z=G dd  d ee+e,f Z>eee+ge,f ee+gee, f d f Z?eeee/ d!f  Z@ed! ZAg d"ZBd#ZCd$ee1 d%eDfd&d'ZEd(ed) d%ed) fd*d+ZFd,ee8 d%e8fd-d.ZGe!d/ed! d%ed0 fd1d2ZHe!G d3d4 d4ZIG d5d6 d6ZJe!eG d7d0 d0ZKd8d9 eeKD ZLe!eG d:d! d!eKZMe!d;d<eG d=d> d>eMZNe!G d?d@ d@ZOe!dAd<G dBdC dCZPdDeQej: d%ej:fdEdFZRdS )G    N)	dataclassfields)Enum)TYPE_CHECKINGAnyCallableDictIteratorListOptionalProtocolTupleTypeVarUnion)_check_pyarrow_version_truncated_repr)	ObjectRef)log_once)DeveloperAPI)BlockBuilderPandasBlockSchema)SortKey)AggregateFnTT)contravariantU)	covariantKeyTypeAggType)pyarrow.Tablepandas.DataFramer   pyarrow.lib.Schema)pyarrow.ChunkedArraypyarrow.Arraypandas.Series)r%   z
np.ndarrayr$   r#   c                   @   s   e Zd ZdZdZdS )	BlockTypearrowpandasN)__name__
__module____qualname__ARROWPANDAS r.   r.   Z/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/ray/data/block.pyr&   D   s    r&   c                   @   s   e Zd ZdZdZdZdS )BatchFormatpyarrowr(   numpyN)r)   r*   r+   r,   r-   ZNUMPYr.   r.   r.   r/   r0   J   s    r0   r    r!   c                   @   s*   e Zd Zdedeeee f fddZdS )_CallableClassProtocol_CallableClassProtocol__argreturnc                 C   s   d S Nr.   )selfr4   r.   r.   r/   __call__`   s   z_CallableClassProtocol.__call__N)r)   r*   r+   r   r   r   r	   r8   r.   r.   r.   r/   r3   _   s    "r3   BlockMetadata)r(   r1   r2   Nr2   schemar5   c                 C   s,   ddl m} | d u pt| |r| j S |  S )Nr   r   )ray.data._internal.pandas_blockr   
isinstancenames)r:   r   r.   r.   r/   _is_empty_schemaw   s   r>   schemasSchemac                 C   s   | D ]
}t |s|  S qdS )zReturn the first non-empty schema from an iterator of schemas.

    Args:
        schemas: Iterator of schemas to check.

    Returns:
        The first non-empty schema, or None if all schemas are empty.
    N)r>   )r?   r:   r.   r.   r/   _take_first_non_empty_schema   s
   	rA   given_batch_formatc                 C   s.   | dkrt } | tvrtd|  dt d| S )NdefaultzThe given batch format z isn't allowed (must be one of z).)DEFAULT_BATCH_FORMATVALID_BATCH_FORMATS
ValueError)rB   r.   r.   r/   _apply_batch_format   s   rG   metas
BlockStatsc                 C   s   dd | D S )Nc                 S   s   g | ]}|  qS r.   )to_stats).0mr.   r.   r/   
<listcomp>   s    zto_stats.<locals>.<listcomp>r.   )rH   r.   r.   r/   rJ      s   rJ   c                   @   s.   e Zd ZdZdd ZedddZdd	 Zd
S )BlockExecStatsa  Execution stats for this block.

    Attributes:
        wall_time_s: The wall-clock time it took to compute this block.
        cpu_time_s: The CPU time it took to compute this block.
        node_id: A unique id for the node that computed this block.
        max_uss_bytes: An estimate of the maximum amount of physical memory that the
            process was using while computing this block.
    c                 C   s>   d | _ d | _d | _d| _d | _tj  | _	d| _
d | _d S )Nr   )start_time_s
end_time_swall_time_s
udf_time_s
cpu_time_srayZruntime_contextZget_runtime_contextZget_node_idnode_idZmax_uss_bytesZtask_idxr7   r.   r.   r/   __init__   s   
zBlockExecStats.__init__r5   _BlockExecStatsBuilderc                   C      t  S r6   )rX   r.   r.   r.   r/   builder      zBlockExecStats.builderc                 C   s   t | j| j| j| jdS )N)rQ   rS   rR   rU   )reprrQ   rS   rR   rU   rV   r.   r.   r/   __repr__   s   zBlockExecStats.__repr__N)r5   rX   )r)   r*   r+   __doc__rW   staticmethodrZ   r]   r.   r.   r.   r/   rN      s    

rN   c                   @   s"   e Zd ZdZdd Zd	ddZdS )
rX   zHelper class for building block stats.

    When this class is created, we record the start time. When build() is
    called, the time delta is saved as part of the stats.
    c                 C   s   t  | _t  | _d S r6   )timeperf_counter_start_timeprocess_time
_start_cpurV   r.   r.   r/   rW      s   
z_BlockExecStatsBuilder.__init__r5   rN   c                 C   s@   t  }t  }t }| j|_||_|| j |_|| j |_	|S r6   )
r`   ra   rc   rN   rb   rO   rP   rQ   rd   rS   )r7   end_timeZend_cpustatsr.   r.   r/   build   s   z_BlockExecStatsBuilder.buildN)r5   rN   )r)   r*   r+   r^   rW   rg   r.   r.   r.   r/   rX      s    rX   c                   @   s>   e Zd ZU dZee ed< ee ed< ee ed< dd ZdS )rI   z#Statistics about the block producednum_rows
size_bytes
exec_statsc                 C   s"   | j d urt| j tsJ d S d S r6   )ri   r<   intrV   r.   r.   r/   __post_init__   s   
zBlockStats.__post_init__N)	r)   r*   r+   r^   r   rk   __annotations__rN   rl   r.   r.   r.   r/   rI      s   
 c                 C   s   h | ]}|j qS r.   )name)rK   fr.   r.   r/   	<setcomp>   s    rp   c                       s:   e Zd ZU dZeee  ed< dd Z fddZ	  Z
S )r9   zMetadata about the block.input_filesc                    s   t di  fddtD S )Nc                    s   i | ]}|  |qS r.   )__getattribute__)rK   keyrV   r.   r/   
<dictcomp>  s    z*BlockMetadata.to_stats.<locals>.<dictcomp>r.   )rI   _BLOCK_STATS_FIELD_NAMESrV   r.   rV   r/   rJ      s   zBlockMetadata.to_statsc                    s"   t    | jd u rg | _d S d S r6   )superrl   rq   rV   	__class__r.   r/   rl     s   


zBlockMetadata.__post_init__)r)   r*   r+   r^   r   r
   strrm   rJ   rl   __classcell__r.   r.   rw   r/   r9      s
   
 alpha)Z	stabilityc                       sn   e Zd ZU dZee ed< ddeded f fddZ	dde	ded	 d
d fddZ
ed
efddZ  ZS )BlockMetadataWithSchemaNr:   metadatar@   c                    s&   t  j|j|j|j|jd || _d S )N)rq   ri   rh   rj   )rv   rW   rq   ri   rh   rj   r:   )r7   r}   r:   rw   r.   r/   rW     s   
z BlockMetadataWithSchema.__init__blockrf   rN   r5   c                 C   s*   t | }|j|d}| }t||dS )N)rj   )r}   r:   )BlockAccessor	for_blockget_metadatar:   r|   )r~   rf   accessormetar:   r.   r.   r/   
from_block  s   
z"BlockMetadataWithSchema.from_blockc                 C   s   t | j| j| j| jdS )N)rh   ri   rj   rq   )r9   rh   ri   rj   rq   rV   r.   r.   r/   r}   !  s   z BlockMetadataWithSchema.metadatar6   )r)   r*   r+   r:   r   r@   rm   r9   rW   Blockr   propertyr}   rz   r.   r.   rw   r/   r|     s   
 

r|   c                   @   s  e Zd ZdZdefddZdedee fddZ	dxd	ed
edede
fddZdee de
fddZdee de
fddZdeee  de
fddZdeeef de
fddZdedede
fddZdee de
fddZdyd!d"Z	#dzdeeeee f  deejeeejf f fd$d%Zd{d'd(Zde
fd)d*Zde
fd+d,Zd-ee defd.d/Z defd0d1Z!dee"d2f fd3d4Z#	#	#d|d5eee  d6ee$ de%fd7d8Z&d}d;d<Z'e(d~d>d?Z)e*	#dzd@edAee+ de
fdBdCZ,e*d@eee-f de
fdDdEZ.e*d@eee-f de
fdFdGZ/e(dHe
ddIfdJdKZ0dLedMdNdd:fdOdPZ1dxdQedRedee2 fdSdTZ3dQedRedee2 fdUdVZ4dQedRedee2 fdWdXZ5dQedRedee2 fdYdZZ6dQedRedee2 fd[d\Z7	#dzdQedRed]ee2 dee2 fd^d_Z8dd`daZ9dbee dMdNded: fdcddZ:dedNdfe;dg de
fdhdiZ<e(djed: dMdNde;e
e=f fdkdlZ>e(	mddjee
 dMdNdfe;dg dnede;e
e=f f
dodpZ?dbee;e-  dMdNdee
 fdqdrZ@de+fdsdtZAduee dejfdvdwZBd#S )r   a  Provides accessor methods for a specific block.

    Ideally, we wouldn't need a separate accessor classes for blocks. However,
    this is needed if we want to support storing ``pyarrow.Table`` directly
    as a top-level Ray object, without a wrapping class (issue #17186).
    r5   c                 C      t )z2Return the number of rows contained in this block.NotImplementedErrorrV   r.   r.   r/   rh   4     zBlockAccessor.num_rowspublic_row_formatc                 C   r   )zIterate over the rows of this block.

        Args:
            public_row_format: Whether to cast rows into the public Dict row
                format (this incurs extra copy conversions).
        r   )r7   r   r.   r.   r/   	iter_rows8     zBlockAccessor.iter_rowsFstartendcopyc                 C   r   )a(  Return a slice of this block.

        Args:
            start: The starting index of the slice (inclusive).
            end: The ending index of the slice (exclusive).
            copy: Whether to perform a data copy for the slice.

        Returns:
            The sliced block result.
        r   )r7   r   r   r   r.   r.   r/   sliceA  s   zBlockAccessor.sliceindicesc                 C   r   )zReturn a new block containing the provided row indices.

        Args:
            indices: The row indices to return.

        Returns:
            A new block containing the provided row indices.
        r   )r7   r   r.   r.   r/   takeN  s   	zBlockAccessor.takecolumnsc                 C   r   )z<Return a new block with the list of provided columns droppedr   r7   r   r.   r.   r/   dropY  r   zBlockAccessor.dropc                 C   r   )z3Return a new block containing the provided columns.r   r   r.   r.   r/   select]  r   zBlockAccessor.selectcolumns_renamec                 C   r   )z0Return the block reflecting the renamed columns.r   )r7   r   r.   r.   r/   rename_columnsa  r   zBlockAccessor.rename_columnscolumn_namecolumn_datac                 C      t  )aq  
        Upserts a column into the block. If the column already exists, it will be replaced.

        Args:
            column_name: The name of the column to upsert.
            column_data: The data to upsert into the column. (Arrow Array/ChunkedArray for Arrow blocks, Series or array-like for Pandas blocks)

        Returns:
            The updated block.
        r   )r7   r   r   r.   r.   r/   upsert_columne  s   zBlockAccessor.upsert_columnrandom_seedc                 C   r   )zRandomly shuffle this block.r   )r7   r   r.   r.   r/   random_shuffler  r   zBlockAccessor.random_shuffler!   c                 C   r   )z+Convert this block into a Pandas dataframe.r   rV   r.   r.   r/   	to_pandasv  r   zBlockAccessor.to_pandasNc                 C   r   )zConvert this block (or columns of block) into a NumPy ndarray.

        Args:
            columns: Name of columns to convert, or None if converting all columns.
        r   r   r.   r.   r/   to_numpyz     zBlockAccessor.to_numpyr    c                 C   r   )z'Convert this block into an Arrow table.r   rV   r.   r.   r/   to_arrow  r   zBlockAccessor.to_arrowc                 C   r   )z/Return the base block that this accessor wraps.r   rV   r.   r.   r/   to_block  r   zBlockAccessor.to_blockc                 C   s   |   S )z1Return the default data format for this accessor.)r   rV   r.   r.   r/   
to_default  s   zBlockAccessor.to_defaultbatch_formatc                 C   sl   |du r|   S |dks|dkr|  S |dkr|  S |dkr$|  S |dkr,|  S tdt d| )	zConvert this block into the provided batch format.

        Args:
            batch_format: The batch format to convert this block to.

        Returns:
            This block formatted as the provided batch format.
        NrC   nativer(   r1   r2   z The batch format must be one of z, got: )r   r   r   r   r   rF   rE   )r7   r   r.   r.   r/   to_batch_format  s   	zBlockAccessor.to_batch_formatc                 C   r   )z3Return the approximate size in bytes of this block.r   rV   r.   r.   r/   ri     r   zBlockAccessor.size_bytesr"   c                 C   r   )z7Return the Python type or pyarrow schema of this block.r   rV   r.   r.   r/   r:     r   zBlockAccessor.schemarq   rj   c                 C   s   t |  |  ||dS )z)Create a metadata object from this block.)rh   ri   rq   rj   )r9   rh   ri   )r7   rq   rj   r.   r.   r/   r     s   zBlockAccessor.get_metadataotherr   c                 C   r   )z<Zip this block with another block of the same type and size.r   )r7   r   r.   r.   r/   zip  r   zBlockAccessor.zipr   c                   C   r   )z%Create a builder for this block type.r   r.   r.   r.   r/   rZ     s   zBlockAccessor.builderbatch
block_typec              
   C   s   t |tjrtdt| dt |tjjrc|du s |tj	krWddl
m} z| |W S  |yV } ztdrAtd| d |du rP| |W  Y d}~S |d}~ww |tjks^J | |S |S )	z-Create a block from user-facing data formats.Error validating z: Standalone numpy arrays are not allowed in Ray 2.5. Return a dict of field -> array, e.g., `{'data': array}` instead of `array`.Nr   )ArrowConversionErrorZ!_fallback_to_pandas_block_warningz)Failed to convert batch to Arrow due to: z; falling back to Pandas block)r<   npndarrayrF   r   collectionsabcMappingr&   r,   Z$ray.air.util.tensor_extensions.arrowr   batch_to_arrow_blockr   loggerwarningbatch_to_pandas_blockr-   )clsr   r   r   er.   r.   r/   batch_to_block  s,   

zBlockAccessor.batch_to_blockc                 C      ddl m} ||S )z4Create an Arrow block from user-facing data formats.r   )ArrowBlockBuilder)ray.data._internal.arrow_blockr   _table_from_pydict)r   r   r   r.   r.   r/   r        
z"BlockAccessor.batch_to_arrow_blockc                 C   r   )z4Create a Pandas block from user-facing data formats.r   )PandasBlockBuilder)r;   r   r   )r   r   r   r.   r.   r/   r     r   z#BlockAccessor.batch_to_pandas_blockr~   zBlockAccessor[T]c                 C   s   t   ddl}ddl}t| |j|jfrddlm} || S t| |jr.ddl	m
} || S t| tr>ddlm} || S t| trMtdt|  dtd| t| )z,Create a block accessor for the given block.r   N)ArrowBlockAccessor)PandasBlockAccessorr   z: Standalone Python objects are not allowed in Ray 2.5. To use Python objects in a dataset, wrap them in a dict of numpy arrays, e.g., return `{'item': batch}` instead of just `batch`.zNot a block type: {} ({}))r   r(   r1   r<   TableZRecordBatchr   r   Z	DataFramer;   r   bytes
from_byteslistrF   r   	TypeErrorformattype)r~   r(   r1   r   r   r.   r.   r/   r     s"   


zBlockAccessor.for_block	n_samplessort_keyr   c                 C   r   )z0Return a random sample of items from this block.r   )r7   r   r   r.   r.   r/   sample  r   zBlockAccessor.sampleonignore_nullsc                 C   r   )z=Returns a count of the distinct values in the provided columnr   r7   r   r   r.   r.   r/   count  r   zBlockAccessor.countc                 C   r   )z2Returns a sum of the values in the provided columnr   r   r.   r.   r/   sum!  r   zBlockAccessor.sumc                 C   r   )z2Returns a min of the values in the provided columnr   r   r.   r.   r/   min%  r   zBlockAccessor.minc                 C   r   )z2Returns a max of the values in the provided columnr   r   r.   r.   r/   max)  r   zBlockAccessor.maxc                 C   r   )z3Returns a mean of the values in the provided columnr   r   r.   r.   r/   mean-  r   zBlockAccessor.meanr   c                 C   r   )zBReturns a sum of diffs (from mean) squared for the provided columnr   )r7   r   r   r   r.   r.   r/   sum_of_squared_diffs_from_mean1  r   z,BlockAccessor.sum_of_squared_diffs_from_meanc                 C   r   )z9Returns new block sorted according to provided `sort_key`r   )r7   r   r.   r.   r/   sort:  r   zBlockAccessor.sort
boundariesc                 C   r   )z1Return a list of sorted partitions of this block.r   r7   r   r   r.   r.   r/   sort_and_partition>  s   z BlockAccessor.sort_and_partitionrs   aggsr   c                 C   r   )z3Combine rows with the same key into an accumulator.r   )r7   rs   r   r.   r.   r/   
_aggregateD  r   zBlockAccessor._aggregateblocksc                 C   r   )z9Return a sorted block by merging a list of sorted blocks.r   )r   r   r.   r.   r/   merge_sorted_blocksH  s   z!BlockAccessor.merge_sorted_blocksTfinalizec                 C   r   )z/Aggregate partially combined and sorted blocks.r   )r   r   r   r   r.   r.   r/   _combine_aggregated_blocksO  r   z(BlockAccessor._combine_aggregated_blocksc                 C   r   )zNOTE: PLEASE READ CAREFULLY

        Returns dataset partitioned using list of boundaries

        This method requires that
            - Block being sorted (according to `sort_key`)
            - Boundaries is a sorted list of tuples
        r   r   r.   r.   r/   _find_partitions_sortedY  s   z%BlockAccessor._find_partitions_sortedc                 C   r   )z$Return the block type of this block.r   rV   r.   r.   r/   r   h  r   zBlockAccessor.block_typekeysc                 C   sL   |   dkrtjg tjdS |std|   gS | |}tt| S )a  
        NOTE: THIS METHOD ASSUMES THAT PROVIDED BLOCK IS ALREADY SORTED

        Compute boundaries of the groups within a block based on provided
        key (a column or a list of columns)

        NOTE: In each column, NaNs/None are considered to be the same group.

        Args:
            block: sorted block for which grouping of rows will be determined
                    based on provided key
            keys: list of columns determining the key for every row based on
                    which the block will be grouped

        Returns:
            A list of starting indices of each group and an end index of the last
            group, i.e., there are ``num_groups + 1`` entries and the first and last
            entries are 0 and ``len(array)`` respectively.
        r   )dtype)rh   r   arrayZint32r   "_get_group_boundaries_sorted_numpyr   values)r7   r   Zprojected_blockr.   r.   r/   _get_group_boundaries_sortedl  s   
z*BlockAccessor._get_group_boundaries_sortedF)r5   r!   r6   )r5   r    )NN)r   r   r5   r   )r5   r   )r   r   r5   r   )T)Cr)   r*   r+   r^   rk   rh   boolr	   r   r   r   r   r
   r   ry   r   r   r   r   r   BlockColumnr   r   r   r   r   r   r   r   r   r   	DataBatchr   ri   r   r:   rN   r9   r   r   r_   rZ   classmethodr&   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r|   r   r   r   r   r   r.   r.   r.   r/   r   +  s    	






%

	


	

r   betac                
   @   s  e Zd ZdZdefddZdddeded	ee fd
dZ	dddeded	ee fddZ
dddeded	ee fddZdddeded	ee fddZdddeded	ee fddZdddededed	ee fddZd	efddZd	eeef fddZd	efddZd	efddZd	efdd Zd4d"ee d	efd#d$Zd!dd%ded&ee ded	ee fd'd(Zd	ee fd)d*Zd5d,ed	ejfd-d.Zd	e ee d/f fd0d1Z!e"ded	d fd2d3Z#d!S )6BlockColumnAccessorzbProvides vendor-neutral interface to apply common operations
    to block's (Pandas/Arrow) columnscolc                 C   s
   || _ d S r6   )_column)r7   r   r.   r.   r/   rW     s   
zBlockColumnAccessor.__init__T)as_pyr   r   r5   c                C   r   )z4Returns a count of the distinct values in the columnr   r7   r   r   r.   r.   r/   r     r[   zBlockColumnAccessor.countc                C   rY   )z)Returns a sum of the values in the columnr   r   r.   r.   r/   r     r[   zBlockColumnAccessor.sumc                C   r   )z)Returns a min of the values in the columnr   r   r.   r.   r/   r     r[   zBlockColumnAccessor.minc                C   r   )z)Returns a max of the values in the columnr   r   r.   r.   r/   r     r[   zBlockColumnAccessor.maxc                C   r   )z*Returns a mean of the values in the columnr   r   r.   r.   r/   r     r[   zBlockColumnAccessor.meanqc                C   r   )z.Returns requested quantile of the given columnr   )r7   r   r   r   r.   r.   r/   quantile  s   zBlockColumnAccessor.quantilec                 C   r   )zBReturns new column holding only distinct values of the current oner   rV   r.   r.   r/   unique  r[   zBlockColumnAccessor.uniquec                 C   r   r6   r   rV   r.   r.   r/   value_counts     z BlockColumnAccessor.value_countsc                 C   r   )a  
        Computes a 64-bit hash value for each row in the column.

        Provides a unified hashing method across supported backends.
        Handles complex types like lists or nested structures by producing a single hash per row.
        These hashes are useful for downstream operations such as deduplication, grouping, or partitioning.

        Internally, Polars is used to compute row-level hashes even when the original column
        is backed by Pandas or PyArrow.

        :return: A column of 64-bit integer hashes, returned in the same format as the underlying backend
             (e.g., Pandas Series or PyArrow Array).
        r   rV   r.   r.   r/   hash  s   zBlockColumnAccessor.hashc                 C   r   )z;Flattens nested lists merging them into top-level containerr   rV   r.   r.   r/   flatten  s   zBlockColumnAccessor.flattenc                 C   r   r6   r   rV   r.   r.   r/   dropna  r   zBlockColumnAccessor.dropnaNtypesc                 C   r   )a  
        Checks whether the column is composed of list-like elements.

        :param types: Optional tuple of backend-specific types to check against.
                      If not provided, defaults to list-like types appropriate
                      for the underlying backend (e.g., PyArrow list types).
        :return: True if the column is made up of list-like values; False otherwise.
        r   )r7   r   r.   r.   r/   is_composed_of_lists  s   	z(BlockColumnAccessor.is_composed_of_lists)r   r   r   c                C   r   )z9Returns a sum of diffs (from mean) squared for the columnr   )r7   r   r   r   r.   r.   r/   r     s   z2BlockColumnAccessor.sum_of_squared_diffs_from_meanc                 C   r   )z8Converts block column to a list of Python native objectsr   rV   r.   r.   r/   	to_pylist  r[   zBlockColumnAccessor.to_pylistFzero_copy_onlyc                 C   r   )z#Converts underlying column to Numpyr   )r7   r   r.   r.   r/   r     r[   zBlockColumnAccessor.to_numpyr$   c                 C   r   )zAConverts block column into a representation compatible with Arrowr   rV   r.   r.   r/   _as_arrow_compatible  r[   z(BlockColumnAccessor._as_arrow_compatiblec                 C   sn   t   ddl}t| tjst| tjrddlm} || S t| |jr-ddl	m
} || S tdt|  d)z-Create a column accessor for the given columnr   N)ArrowBlockColumnAccessor)PandasBlockColumnAccessorzEExpected either a pandas.Series or pyarrow.Array (ChunkedArray) (got ))r   r(   r<   paZArrayZChunkedArrayr   r   ZSeriesr;   r   r   r   )r   pdr   r   r.   r.   r/   
for_column  s   zBlockColumnAccessor.for_columnr6   r   )$r)   r*   r+   r^   r   rW   r   r   r   r   r   r   r   r   floatr   r   r   ry   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r_   r  r.   r.   r.   r/   r     sP         


r   r   c                 C   s@  g }g }g }| D ]0}t |jt jrt |d r|| qt |jt js3|d d u r3|| q|| qg }t|dkrR|t dd |D jdd t|dkri|t dd |D jdd t|dkr|t dd |D jdd t 	dgt 
|jdd d d t| d ggt}|S )	Nr   c                 S   s$   g | ]}|d d |dd kqS    Nr  r.   rK   arrr.   r.   r/   rM     s   $ z6_get_group_boundaries_sorted_numpy.<locals>.<listcomp>)Zaxisc              	   S   sH   g | ] }|d d |dd kt |d d t |dd B @ qS r  )r   isfiniter	  r.   r.   r/   rM   #  s    "c              	   S   sN   g | ]#}|d d |dd kt |d d dt |dd d@  @ qS r  )r   equalr	  r.   r.   r/   rM   0  s    (r  )r   Z
issubdtyper   numberisnanappendlenZvstackanyZhstackZcolumn_stackZnonzeroZastyperk   )r   Zgeneral_arraysZnum_arrays_with_nanZcat_arrays_with_noner
  Zdiffsr   r.   r.   r/   r     sV   	r   )Sr   loggingr`   dataclassesr   r   enumr   typingr   r   r   r   r	   r
   r   r   r   r   r   r2   r   r1   r  rT   Zray.data._internal.utilr   r   Z	ray.typesr   Zray.utilr   Zray.util.annotationsr   r(   Z ray.data._internal.block_builderr   r;   r   Z2ray.data._internal.planner.exchange.sort_task_specr   Zray.data.aggregater   r   r   r   r   r   r   r@   r   ZBatchColumn	getLoggerr)   r   r&   ry   r0   r   r   ZDataBatchColumnZCallableClassr3   ZUserDefinedFunctionZBlockPartitionZBlockPartitionMetadatarE   rD   r   r>   rA   rG   rJ   rN   rX   rI   ru   r9   r|   r   r   r   r   r.   r.   r.   r/   <module>   s    4
	
$  dw