B
    ln¦d+X  ã               @   s<  d dl mZmZmZmZmZmZmZmZm	Z	m
Z
mZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6m7Z7m8Z8m9Z9m:Z:m;Z;m<Z<m=Z=m>Z>m?Z?m@Z@mAZAmBZBmCZC d dlDmEZE d dlFZFd dlGmHZH d dlIZId dlJZKd dlJmLZL d dlMmNZN dd„ ZOeEd	d
ƒZPdd„ ZQdd„ ZRdd„ ZSdd„ ZTdd„ ZUdd„ ZVdd„ ZWdd„ ZXeXƒ  ddddfdd„ZYddfddœdd„ZZd dd!œd"d#„Z[d$d%„ Z\dfddœd&d'„Z]dfddœd(d)„Z^d*ddd+œd,d-„Z_d.d/„ Z`d0d1„ ZadS )2é    )CÚFunctionÚFunctionOptionsÚFunctionRegistryÚHashAggregateFunctionÚHashAggregateKernelÚKernelÚScalarAggregateFunctionÚScalarAggregateKernelÚScalarFunctionÚScalarKernelÚVectorFunctionÚVectorKernelÚArraySortOptionsÚAssumeTimezoneOptionsÚCastOptionsÚCountOptionsÚCumulativeSumOptionsÚDayOfWeekOptionsÚDictionaryEncodeOptionsÚRunEndEncodeOptionsÚElementWiseAggregateOptionsÚExtractRegexOptionsÚFilterOptionsÚIndexOptionsÚJoinOptionsÚListSliceOptionsÚMakeStructOptionsÚMapLookupOptionsÚMatchSubstringOptionsÚModeOptionsÚNullOptionsÚ
PadOptionsÚPartitionNthOptionsÚQuantileOptionsÚRandomOptionsÚRankOptionsÚReplaceSliceOptionsÚReplaceSubstringOptionsÚRoundBinaryOptionsÚRoundOptionsÚRoundTemporalOptionsÚRoundToMultipleOptionsÚScalarAggregateOptionsÚSelectKOptionsÚSetLookupOptionsÚSliceOptionsÚSortOptionsÚSplitOptionsÚSplitPatternOptionsÚStrftimeOptionsÚStrptimeOptionsÚStructFieldOptionsÚTakeOptionsÚTDigestOptionsÚTrimOptionsÚUtf8NormalizeOptionsÚVarianceOptionsÚWeekOptionsÚcall_functionÚfunction_registryÚget_functionÚlist_functionsÚcall_tabular_functionÚregister_scalar_functionÚregister_tabular_functionÚScalarUdfContextÚ
Expression)Ú
namedtupleN)Údedent)Ú_compute_docstrings)Ú	docscrapec             C   s   | j jS )N)Ú_docÚ	arg_names)Úfunc© rL   ú3/tmp/pip-unpacked-wheel-i2z0tf32/pyarrow/compute.pyÚ_get_arg_namesf   s    rN   Ú_OptionsClassDoc)Úparamsc             C   s"   | j s
d S t | j ¡}t|d ƒS )NZ
Parameters)Ú__doc__rH   ZNumpyDocStringrO   )Úoptions_classÚdocrL   rL   rM   Ú_scrape_options_class_docm   s    rT   c             C   sþ  |j }t|j|j|j|jd| _|| _|| _g }|j	}|sZ|jdkrHdnd}d 
|j|¡}| |› d¡ |j}|r„| |› d¡ tj |j¡}	| tdƒ¡ t|ƒ}
x@|
D ]8}|jd	krÂd
}nd}| |› d|› d¡ | d¡ q®W |d k	r¼t|ƒ}|rTx |jD ]D}| |j› d|j› d¡ x"|jD ]}| d|› d¡ q0W q
W nPt d|j› dt¡ t |¡}x.|j ¡ D ] }| td 
|j|j¡ƒ¡ q€W | td|j› dƒ¡ | tdƒ¡ |	d k	rî| d 
t|	ƒ d¡¡¡ d |¡| _ | S )N)ÚnameÚarityrR   Úoptions_requiredé   Ú	argumentsÚargumentz,Call compute function {!r} with the given {}z.

z

z.        Parameters
        ----------
        )ZvectorÚscalar_aggregatez
Array-likezArray-like or scalar-likez : Ú
z"    Argument to compute function.
z    zOptions class z does not have a docstringz¾                {0} : optional
                    Parameter for {1} constructor. Either `options`
                    or `{0}` can be passed, but not both at the same time.
                z&            options : pyarrow.compute.zK, optional
                Alternative way of passing options.
            z‰        memory_pool : pyarrow.MemoryPool, optional
            If not passed, will allocate memory from the default memory pool.
        z
{}
Ú )!rI   ÚdictrU   rV   rR   rW   Z__arrow_compute_function__Ú__name__Ú__qualname__ÚsummaryÚformatÚappendÚdescriptionrG   Zfunction_doc_additionsÚgetrF   rN   ÚkindrT   rP   ÚtypeÚdescÚwarningsÚwarnÚRuntimeWarningÚinspectÚ	signatureÚ
parametersÚvaluesÚstripÚjoinrQ   )ÚwrapperZexposed_namerK   rR   Zcpp_docZ
doc_piecesra   Zarg_strrd   Zdoc_additionrJ   Zarg_nameZarg_typeZoptions_class_docÚpÚsÚoptions_sigrL   rL   rM   Ú_decorate_compute_functiont   sb    



 

rv   c             C   sF   | j j}|sd S y
tƒ | S  tk
r@   t d |¡t¡ d S X d S )Nz!Python binding for {} not exposed)rI   rR   ÚglobalsÚKeyErrorri   rj   rb   rk   )rK   Ú
class_namerL   rL   rM   Ú_get_options_classÇ   s    

rz   c             C   sl   |s|r(|d k	rt d | ¡ƒ‚|||ŽS |d k	rht|tƒrD|f |ŽS t||ƒrR|S t d | |t|ƒ¡ƒ‚d S )NzMFunction {!r} called with both an 'options' argument and additional argumentsz-Function {!r} expected a {} parameter, got {})Ú	TypeErrorrb   Ú
isinstancer^   rg   )rU   rR   ÚoptionsÚargsÚkwargsrL   rL   rM   Ú_handle_optionsÓ   s    



r€   c                s>   ˆd kr d dœ‡ ‡‡fdd„
}nd d dœ‡ ‡‡‡fdd„
}|S )N)Úmemory_poolc                sb   ˆ t k	r2t|ƒˆ kr2tˆ› dˆ › dt|ƒ› dƒ‚|rTt|d tƒrTt ˆt|ƒ¡S ˆ |d | ¡S )Nz takes z positional argument(s), but z were givenr   )ÚEllipsisÚlenr{   r|   rD   Ú_callÚlistÚcall)r   r~   )rV   rK   Ú	func_namerL   rM   rr   ê   s    z&_make_generic_wrapper.<locals>.wrapper)r   r}   c                s’   ˆ t k	rLt|ƒˆ k r2tˆ› dˆ › dt|ƒ› dƒ‚|ˆ d … }|d ˆ … }nd}tˆˆ|||ƒ}|r„t|d tƒr„t ˆt|ƒ|¡S ˆ ||| ¡S )Nz takes z positional argument(s), but z were givenrL   r   )	r‚   rƒ   r{   r€   r|   rD   r„   r…   r†   )r   r}   r~   r   Zoption_args)rV   rK   r‡   rR   rL   rM   rr   ô   s    rL   )r‡   rK   rR   rV   rr   rL   )rV   rK   r‡   rR   rM   Ú_make_generic_wrapperè   s    
rˆ   c             C   sÞ   ddl m} g }x| D ]}| |||jƒ¡ qW x|D ]}| |||jƒ¡ q6W |d k	r¾t  |¡}xD|j ¡ D ]6}|j|j	|j
fksˆt‚|rš|j|j
d}| |¡ qnW | |d|j
d d¡ | |d|j
d d¡ t  |¡S )Nr   )Ú	Parameter)rf   r}   )Údefaultr   )rl   r‰   rc   ÚPOSITIONAL_ONLYÚVAR_POSITIONALrm   rn   ro   rf   ÚPOSITIONAL_OR_KEYWORDÚKEYWORD_ONLYÚAssertionErrorÚreplaceÚ	Signature)rJ   Úvar_arg_namesrR   r‰   rP   rU   ru   rs   rL   rL   rM   Ú_make_signature  s&    




r“   c             C   sj   t |ƒ}t|ƒ}|o |d  d¡}|r8| ¡  d¡g}ng }t| |||jd}t|||ƒ|_t	|| ||ƒS )NéÿÿÿÿÚ*)rV   )
rz   rN   Ú
startswithÚpopÚlstriprˆ   rV   r“   Ú__signature__rv   )rU   rK   rR   rJ   Z
has_varargr’   rr   rL   rL   rM   Ú_wrap_function  s    rš   c              C   sŠ   t ƒ } tƒ }dddœ}xn| ¡ D ]b}| ||¡}| |¡}|jdkrFq |jdkr\|jdkr\q || kslt|ƒ‚t||ƒ | |< | |< q W dS )z«
    Make global functions wrapping each compute function.

    Note that some of the automatically-generated wrappers may be overridden
    by custom versions below.
    Úand_Úor_)ÚandÚorZhash_aggregater[   r   N)	rw   r=   r?   re   r>   rf   rV   r   rš   )ÚgÚregZrewritesZcpp_namerU   rK   rL   rL   rM   Ú_make_global_functions.  s    

r¡   c             C   sh   |dk	p|dk	}|r$|dk	r$t dƒ‚|dkrXtjj |¡}|dkrNt |¡}n
t |¡}td| g||ƒS )a†  
    Cast array values to another data type. Can also be invoked as an array
    instance method.

    Parameters
    ----------
    arr : Array-like
    target_type : DataType or str
        Type to cast to
    safe : bool, default True
        Check for overflows or other unsafe conversions
    options : CastOptions, default None
        Additional checks pass by CastOptions
    memory_pool : MemoryPool, optional
        memory pool to use for allocations during function execution.

    Examples
    --------
    >>> from datetime import datetime
    >>> import pyarrow as pa
    >>> arr = pa.array([datetime(2010, 1, 1), datetime(2015, 1, 1)])
    >>> arr.type
    TimestampType(timestamp[us])

    You can use ``pyarrow.DataType`` objects to specify the target type:

    >>> cast(arr, pa.timestamp('ms'))
    <pyarrow.lib.TimestampArray object at ...>
    [
      2010-01-01 00:00:00.000,
      2015-01-01 00:00:00.000
    ]

    >>> cast(arr, pa.timestamp('ms')).type
    TimestampType(timestamp[ms])

    Alternatively, it is also supported to use the string aliases for these
    types:

    >>> arr.cast('timestamp[ms]')
    <pyarrow.lib.TimestampArray object at ...>
    [
      2010-01-01 00:00:00.000,
      2015-01-01 00:00:00.000
    ]
    >>> arr.cast('timestamp[ms]').type
    TimestampType(timestamp[ms])

    Returns
    -------
    casted : Array
        The cast result as a new Array
    NzRMust either pass values for 'target_type' and 'safe' or pass a value for 'options'FÚcast)	Ú
ValueErrorÚpaÚtypesÚlibÚensure_typer   ZunsafeÚsafer<   )ZarrZtarget_typer¨   r}   r   Zsafe_vars_passedrL   rL   rM   r¢   N  s    6
r¢   )r   c            C   sÌ   |dk	r.|dk	r"|   ||| ¡} qB|   |¡} n|dk	rB|   d|¡} t|tjƒs`tj|| jd}n | j|jkr€tj| ¡ | jd}t|d}td| g||ƒ}|dk	rÈ| ¡ dkrÈtj| ¡ | t 	¡ d}|S )a©  
    Find the index of the first occurrence of a given value.

    Parameters
    ----------
    data : Array-like
    value : Scalar-like object
        The value to search for.
    start : int, optional
    end : int, optional
    memory_pool : MemoryPool, optional
        If not passed, will allocate memory from the default memory pool.

    Returns
    -------
    index : int
        the index, or -1 if not found
    Nr   )rg   )ÚvalueÚindex)
Úslicer|   r¤   ÚScalarÚscalarrg   Úas_pyr   r<   Zint64)Údatar©   ÚstartÚendr   r}   ÚresultrL   rL   rM   rª   “  s    
rª   T)Úboundscheckr   c            C   s   t |d}td| |g||ƒS )a–  
    Select values (or records) from array- or table-like data given integer
    selection indices.

    The result will be of the same type(s) as the input, with elements taken
    from the input array (or record batch / table fields) at the given
    indices. If an index is null then the corresponding value in the output
    will be null.

    Parameters
    ----------
    data : Array, ChunkedArray, RecordBatch, or Table
    indices : Array, ChunkedArray
        Must be of integer type
    boundscheck : boolean, default True
        Whether to boundscheck the indices. If False and there is an out of
        bounds index, will likely cause the process to crash.
    memory_pool : MemoryPool, optional
        If not passed, will allocate memory from the default memory pool.

    Returns
    -------
    result : depends on inputs
        Selected values for the given indices

    Examples
    --------
    >>> import pyarrow as pa
    >>> arr = pa.array(["a", "b", "c", None, "e", "f"])
    >>> indices = pa.array([0, None, 4, 3])
    >>> arr.take(indices)
    <pyarrow.lib.StringArray object at ...>
    [
      "a",
      null,
      "e",
      null
    ]
    )r³   Útake)r6   r<   )r¯   Úindicesr³   r   r}   rL   rL   rM   r´   ¹  s    (
r´   c             C   sV   t |tjtjtjfƒs(tj|| jd}n | j|jkrHtj| ¡ | jd}td| |gƒS )a€  
    Replace each null element in values with fill_value. The fill_value must be
    the same type as values or able to be implicitly casted to the array's
    type.

    This is an alias for :func:`coalesce`.

    Parameters
    ----------
    values : Array, ChunkedArray, or Scalar-like object
        Each null element is replaced with the corresponding value
        from fill_value.
    fill_value : Array, ChunkedArray, or Scalar-like object
        If not same type as data will attempt to cast.

    Returns
    -------
    result : depends on inputs
        Values with all null elements replaced

    Examples
    --------
    >>> import pyarrow as pa
    >>> arr = pa.array([1, 2, None, 3], type=pa.int8())
    >>> fill_value = pa.scalar(5, type=pa.int8())
    >>> arr.fill_null(fill_value)
    <pyarrow.lib.Int8Array object at ...>
    [
      1,
      2,
      5,
      3
    ]
    )rg   Zcoalesce)	r|   r¤   ÚArrayÚChunkedArrayr¬   r­   rg   r®   r<   )ro   Z
fill_valuerL   rL   rM   Ú	fill_nullå  s
    #r¸   c            C   sR   |dkrg }t | tjtjfƒr*| d¡ ntdd„ |ƒ}t||ƒ}td| g||ƒS )a¸  
    Select the indices of the top-k ordered elements from array- or table-like
    data.

    This is a specialization for :func:`select_k_unstable`. Output is not
    guaranteed to be stable.

    Parameters
    ----------
    values : Array, ChunkedArray, RecordBatch, or Table
        Data to sort and get top indices from.
    k : int
        The number of `k` elements to keep.
    sort_keys : List-like
        Column key names to order by when input is table-like data.
    memory_pool : MemoryPool, optional
        If not passed, will allocate memory from the default memory pool.

    Returns
    -------
    result : Array
        Indices of the top-k ordered elements

    Examples
    --------
    >>> import pyarrow as pa
    >>> import pyarrow.compute as pc
    >>> arr = pa.array(["a", "b", "c", None, "e", "f"])
    >>> pc.top_k_unstable(arr, k=3)
    <pyarrow.lib.UInt64Array object at ...>
    [
      5,
      4,
      2
    ]
    N)ÚdummyÚ
descendingc             S   s   | dfS )Nrº   rL   )Úkey_namerL   rL   rM   Ú<lambda>:  ó    z top_k_unstable.<locals>.<lambda>Úselect_k_unstable)r|   r¤   r¶   r·   rc   Úmapr-   r<   )ro   ÚkÚ	sort_keysr   r}   rL   rL   rM   Útop_k_unstable  s    %
rÂ   c            C   sR   |dkrg }t | tjtjfƒr*| d¡ ntdd„ |ƒ}t||ƒ}td| g||ƒS )aÏ  
    Select the indices of the bottom-k ordered elements from
    array- or table-like data.

    This is a specialization for :func:`select_k_unstable`. Output is not
    guaranteed to be stable.

    Parameters
    ----------
    values : Array, ChunkedArray, RecordBatch, or Table
        Data to sort and get bottom indices from.
    k : int
        The number of `k` elements to keep.
    sort_keys : List-like
        Column key names to order by when input is table-like data.
    memory_pool : MemoryPool, optional
        If not passed, will allocate memory from the default memory pool.

    Returns
    -------
    result : Array of indices
        Indices of the bottom-k ordered elements

    Examples
    --------
    >>> import pyarrow as pa
    >>> import pyarrow.compute as pc
    >>> arr = pa.array(["a", "b", "c", None, "e", "f"])
    >>> pc.bottom_k_unstable(arr, k=3)
    <pyarrow.lib.UInt64Array object at ...>
    [
      0,
      1,
      2
    ]
    N)r¹   Ú	ascendingc             S   s   | dfS )NrÃ   rL   )r»   rL   rL   rM   r¼   i  r½   z#bottom_k_unstable.<locals>.<lambda>r¾   )r|   r¤   r¶   r·   rc   r¿   r-   r<   )ro   rÀ   rÁ   r   r}   rL   rL   rM   Úbottom_k_unstable?  s    %
rÄ   Úsystem)Úinitializerr}   r   c            C   s   t |d}tdg ||| dS )aB  
    Generate numbers in the range [0, 1).

    Generated values are uniformly-distributed, double-precision
    in range [0, 1). Algorithm and seed can be changed via RandomOptions.

    Parameters
    ----------
    n : int
        Number of values to generate, must be greater than or equal to 0
    initializer : int or str
        How to initialize the underlying random generator.
        If an integer is given, it is used as a seed.
        If "system" is given, the random generator is initialized with
        a system-specific source of (hopefully true) randomness.
        Other values are invalid.
    options : pyarrow.compute.RandomOptions, optional
        Alternative way of passing options.
    memory_pool : pyarrow.MemoryPool, optional
        If not passed, will allocate memory from the default memory pool.
    )rÆ   Úrandom)Úlength)r$   r<   )ÚnrÆ   r}   r   rL   rL   rM   rÇ   n  s    
rÇ   c              G   sr   t | ƒ}|dkrdt| d ttfƒr0t | d ¡S t| d tƒrLt | d ¡S tdt	| d ƒ› ƒ‚n
t | ¡S dS )a  Reference a column of the dataset.

    Stores only the field's name. Type and other information is known only when
    the expression is bound to a dataset having an explicit scheme.

    Nested references are allowed by passing multiple names or a tuple of
    names. For example ``('foo', 'bar')`` references the field named "bar"
    inside the field named "foo".

    Parameters
    ----------
    *name_or_index : string, multiple strings, tuple or int
        The name or index of the (possibly nested) field the expression
        references to.

    Returns
    -------
    field_expr : Expression
        Reference to the given field

    Examples
    --------
    >>> import pyarrow.compute as pc
    >>> pc.field("a")
    <pyarrow.compute.Expression a>
    >>> pc.field(1)
    <pyarrow.compute.Expression FieldPath(1)>
    >>> pc.field(("a", "b"))
    <pyarrow.compute.Expression FieldRef.Nested(FieldRef.Name(a) ...
    >>> pc.field("a", "b")
    <pyarrow.compute.Expression FieldRef.Nested(FieldRef.Name(a) ...
    rX   r   zCfield reference should be str, multiple str, tuple or integer, got N)
rƒ   r|   ÚstrÚintrD   Z_fieldÚtupleZ_nested_fieldr{   rg   )Zname_or_indexrÉ   rL   rL   rM   Úfieldˆ  s    !rÍ   c             C   s
   t  | ¡S )aB  Expression representing a scalar value.

    Parameters
    ----------
    value : bool, int, float or string
        Python value of the scalar. Note that only a subset of types are
        currently supported.

    Returns
    -------
    scalar_expr : Expression
        An Expression representing the scalar value
    )rD   Z_scalar)r©   rL   rL   rM   r­   ¹  s    r­   )bZpyarrow._computer   r   r   r   r   r   r   r	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   ÚcollectionsrE   rl   ÚtextwraprF   ri   Zpyarrowr¤   rG   Zpyarrow.vendoredrH   rN   rO   rT   rv   rz   r€   rˆ   r“   rš   r¡   r¢   rª   r´   r¸   rÂ   rÄ   rÇ   rÍ   r­   rL   rL   rL   rM   Ú<module>   s8   ÿ J
SE&,+//1