o
    0 i]                     @   s0  d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
 d dl	mZ d dlmZ zd dlZd dlmZ W n eyM   dZY nw ddd	Zejej Zd
d Zdd Zdd Zdd Zd(ddZdd Zdd Zdd Zdd Zdd Z dd Z!d d! Z"d"d# Z#d$d% Z$d&d' Z%dS ))    N)_accelerator)_util)_greedy_path)_optimal_path)_try_use_cutensornet)cutensorF)sum_ellipsisbroadcast_diagonalc                    sj   g }g }|D ] }| |r j|d  nd t fdd|D }| | q    ||dd  S )zTranspose and diagonal

    Args:
        a
        axeses (sequence of sequences of ints)

    Returns:
        ndarray: a with its axes permutated. A writeable view is returned
        whenever possible.
    r      c                 3   s    | ]} j | V  qd S N)strides.0axisa _/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/cupy/linalg/_einsum.py	<genexpr>/       z _transpose_ex.<locals>.<genexpr>T)appendshapesumviewZ_set_shape_and_strides)r   axesesr   r   axesZstrider   r   r   _transpose_ex   s   r   c                 C   sb   d}| D ]*}|t u r|d7 }qzt|}W n ty' } ztd|d }~ww |t| 7 }q|S )N @z=For this input type lists must contain either int or Ellipsis)Ellipsisoperatorindex	TypeErroreinsum_symbols)Zlist_subscriptZstr_subscriptser   r   r   _parse_int_subscript7   s    
r&   c                 C   s  t | dkr
tdt| d tr| d }t| dd }|D ]}|dv r&q|tvr0td| q|dd}d	|v r?td
d|v sGd|v rq|ddkpT|ddk}|d}|sbt |dkrftd|\}}|dd}n|}d}|ddd}t |t |krt |t |krdnd}t|d n1t| } g }g }t | dkr|	| 
d |	t| 
d t | dks| rt| d }nd}|||fS )a  Parse einsum operands.

    This function is based on `numpy.core.einsumfunc._parse_einsum_input`
    function in NumPy 1.14.

    Parameters
    ----------
    args : tuple
        The non-keyword arguments to einsum

    Returns
    -------
    input_strings : str
        Parsed input strings
    output_string : str
        Parsed output string
    operands : list of array_like
        The operands to use in the contraction

    Examples
    --------
    The operand list is simplified to reduce printing:

    >>> a = np.random.rand(4, 4)
    >>> b = np.random.rand(4, 4, 4)
    >>> _parse_einsum_input(('...a,...a->...', a, b))
    (['@a, @a'], 'xz', [a, b])

    >>> _parse_einsum_input((a, [Ellipsis, 0], b, [Ellipsis, 0]))
    (['@a, @a'], 'xz', [a, b])
    r   zmust specify the einstein sum subscripts string and at least one operand, or at least one operand and its corresponding subscripts listr
   Nz.,-> zTinvalid subscript '%s' in einstein sum subscripts string, subscripts must be lettersz...r   .zUeinstein sum subscripts string contains a '.' that is not part of an ellipsis ('...')->z->   zKeinstein sum subscript string does not contain proper '->' output specified r   ,moreZfewerzS operands provided to einstein sum function than specified in the subscripts string)len
ValueError
isinstancestrlistr#   replacecountsplitr   popr&   )argsZ
subscriptsoperandsr$   invalidinput_subscriptsoutput_subscriptmsgr   r   r   _parse_einsum_inputG   sf   !

r=   c                 C   s   | dk rd|  S t | S )Nr   z...[%d])chr)labelr   r   r   _chr   s   r@   c           	      C   s  |  d}t|dkr5|\}|dur.t||kr.t||kr&td||f td||f dd |D S t|dkr{|\}}|durM|t|t|  }|d	k rZtd
|||f g }|dd |D  |t| d	 |dd |D  |S td|du rd d|  )a  Parse a subscript that may contain ellipsis

    Args:
        subscript (str): An einsum subscript of an operand or an output. '...'
            should be replaced by '@'.
        idx (int or None): For error messages, give int idx for the idx-th
            operand or None for the output.
        ndim (int, optional): ndim of the operand
        ellipsis_len (int, optional): number of broadcast dimensions of the
            output.

    Returns:
        list of ints: The parsed subscript

    r   r
   NzMeinstein sum subscripts string %s contains too many subscripts for operand %dzoperand %d has more dimensions than subscripts string %s given in einstein sum, but no '...' ellipsis provided to broadcast the extra dimensions.c                 S      g | ]}t |qS r   ordr   r?   r   r   r   
<listcomp>       z-_parse_ellipsis_subscript.<locals>.<listcomp>r*   r   zReinstein sum subscripts string %s...%s contains too many subscripts for operand %dc                 s       | ]}t |V  qd S r   rB   rD   r   r   r   r          z,_parse_ellipsis_subscript.<locals>.<genexpr>c                 s   rG   r   rB   rD   r   r   r   r      rH   zVeinstein sum subscripts string contains a '.' that is not part of an ellipsis ('...') zin the outputzfor operand %d)r5   r.   r/   extendrange)	Z	subscriptidxndimellipsis_lensubssubZleft_subZ	right_subretr   r   r   _parse_ellipsis_subscript   sN   

rQ   c                    s  t t| D ]x}| | }||  tt|t|k r~i }t|D ]\}}||g | q t| }|D ]4\}}td rG fdd|D } fdd|D }t|dkrj|	 }	|	 }
t
d|t||	|
f q6t| \}}t|| |< t |||< qdS )	zGCompute diagonal for each operand

    This function mutates args.
    r	   c                    s   g | ]} j | d kr|qS )r
   r   r   arrr   r   rE      s    z%_einsum_diagonals.<locals>.<listcomp>c                    s   h | ]} j | qS r   rR   r   rS   r   r   	<setcomp>       z$_einsum_diagonals.<locals>.<setcomp>r*   zIdimensions in operand %d for collapsing index '%s' don't match (%d != %d)N)rJ   r.   set	enumerate
setdefaultr   r2   itemsoptionsr6   r/   r@   zipr   )r:   r8   rK   rO   r   r   r?   r   dimsZdim0Zdim1r   rS   r   _einsum_diagonals   s4   	r^   c                 c   sl    | D ]0}t dd |D sJ t|dkr3t|dd}|d |d fV  |dd D ]}d	|fV  q+qdS )
zDecompose path into binary path

    Args:
        path (sequence of tuples of ints)

    Yields:
        tuple of ints: pair (idx0, idx1) that represents the operation
            {pop(idx0); pop(idx1); append();}
    c                 s   s    | ]}|d kV  qdS r   Nr   )r   rK   r   r   r   r     rH   z#_iter_path_pairs.<locals>.<genexpr>r*   T)reverser   r
   N)allr.   sorted)pathindicesrK   r   r   r   _iter_path_pairs  s   rf   c                    sV   g }g }|D ]}| | | fdd|D  q |tdd |D |fS )zTranspose and flatten each

    Args:
        a
        axeses (sequence of sequences of ints)

    Returns:
        aT: a with its axes permutated and flatten
        shapes: flattened shapes
    c                    s   g | ]} j | qS r   rR   r   r   r   r   rE   (  rV   z&_flatten_transpose.<locals>.<listcomp>c                 S   s   g | ]	}t jj|qS r   )cupyZ_coreZinternalprod)r   r   r   r   r   rE   +  s    )rI   r   	transposereshapetuple)r   r   transpose_axesZshapesr   r   r   r   _flatten_transpose  s   

rm   c                 C   s:   t dsdS | |krdS | tjtjtjtjfvrdS dS )NcontractionFT)r   Zcheck_availabilityrg   Zfloat32Zfloat64Z	complex64Z
complex128)Zdtype0sub0Zdtype1sub1
batch_dimscontract_dimsr   r   r   _use_cutensor0  s   

rs   c                    s>   i  t | | || D ]\}}| |< q fdd|D }|S )Nc                       g | ]} | qS r   r   )r   iZextentr   r   rE   ?  rF   z"_get_out_shape.<locals>.<listcomp>)r\   )Zshape0ro   Zshape1rp   sub_outsizeru   	out_shaper   rv   r   _get_out_shape;  s
   
rz   c                 C   s^   t |}t | j}g }|D ]}||vr|| |d ||| qt| ||S )a  Return a reshaped and transposed array.

    The input array ``arr`` having ``mode`` as its modes is reshaped and
    transposed so that modes of the output becomes ``mode_out``.

    Example
        >>> import cupy
        >>> a = cupy.zeros((10, 20))
        >>> mode_a = ('A', 'B')
        >>> mode_out = ('B', 'C', 'A')
        >>> out = cupy.linalg.einsum._expand_dims_transpose(a, mode_a,
        ...                                                 mode_out)
        >>> out.shape
        (20, 1, 10)

    Args:
        arr (cupy.ndarray):
        mode (tuple or list): The modes of input array.
        mode_out (tuple or list): The modes of output array.

    Returns:
        cupy.ndarray: The reshaped and transposed array.

    r
   )r2   r   r   r!   rg   ri   rj   )rT   modeZmode_outr   r   ru   r   r   r   _expand_dims_transposeC  s   


r|   c                    sz  t  }t }t|t ksJ dt|tks J dt dks,tdkr4| |   fS t |}||@ }||@ }	||	 }
t |	|
\}}}t|	|
\}}} fdd|D }|fdd|D kslJ  fdd|D }fdd|D }|| | }t ||ksJ d	t|
dkrt|t|kr|}t|  |} t||}| | |fS t D ]P}|tjkrtd urt| j	 |j	|	|
rt|t|kr|}t
| j |j|}t|| j	}t| } t|}td
|  |d||}||f  S qt| |||g\}}t||||g\}}|d |d  |d  }|d |d ks0J t|||}||fS )Nz%operand 0 should be reduced: diagonalz%operand 1 should be reduced: diagonalr   c                    rt   r   r   r   ro   r   r   rE   x  rF   z)reduced_binary_einsum.<locals>.<listcomp>c                    rt   r   r   r   rp   r   r   rE   y  rF   c                    rt   r   r   r   r}   r   r   rE   z  rF   c                    rt   r   r   r   r~   r   r   rE   {  rF   z%operands should be reduced: unary sumg      ?g        r
   r*   )rW   r.   _make_transpose_axesr|   r   Zget_routine_acceleratorsZACCELERATOR_CUTENSORr   rs   dtyperz   r   rg   emptyZascontiguousarrayrn   rm   matmulrj   )arr0ro   arr1rp   
sub_othersZset0Zset1Z
set_othersZsharedrq   rr   Zbs0Zcs0Zts0Zbs1Zcs1Zts1Zsub_bZsub_lZsub_rrw   Zacceleratorry   arr_outZtmp0Zshapes0Ztmp1Zshapes1Z
shapes_outr   )ro   rp   r   reduced_binary_einsumg  sj   


r   c                 C   sr   g }g }g }t | D ]#\}}||v r|||f q
||v r&|||f q
|||f q
t|t|t|fS r   )rX   r   _tuple_sorted_by_0)rO   Zb_dimsZc_dimsbscstsr   r?   r   r   r   r     s   r   c                 C   s   t dd t| D S )Nc                 s   s    | ]\}}|V  qd S r   r   )r   _ru   r   r   r   r     rH   z%_tuple_sorted_by_0.<locals>.<genexpr>)rk   rc   )Zzsr   r   r   r     s   r   c            !   	      s  t | i |}|dur|S t| \}}} t|tsJ t| ts"J |dd}i  |dd}|du r6d}|rBtdt|  |du rKtj|  n|dd	 | D } d
d	 t	t
|| D }i t	|D ]D\}}| | j}	t	|D ]6\}
}|	|
 }| v r| dkr||< qs|d| fvr| }tdt||||f qs||< qsqf|du rttj|fdd	ttD }nZtd sd|vrdv rtdt|dtdd  D d}ttj||D ]}|vrtdt| qt|tt|kr|D ]}||dkrtdt| qt||  t| dkrtdd | D rEtjtfdd|D dS tt| D ]O}| | }d|jv rg }g }t	|| D ]\}
}|j|
 dkrt||
 qa|| qa|||< tj|t|d| |< | | jt|| ksJ ~qKt| dk}t	|D ]@\}}t  |||< ttjtfddt	|D rd}fdd	t	|D ||< | | jd| |< q|rdd	 | D } n
 fd d	| D } t!t"d!}|du rttt| g}not|r"|d" d#kr"|dd }n\z&t|dkrAt|d t#t$frA||d"  }t#|d }n|| }d$}W n tt%fyZ   td%t&| w d&d	 |D }t|}||||}td'd |D r~t'(d(t)j* t+|D ];\}}| |}||}| |}||}tt|tj|}t,|||||\}}| | || ~~q| \}|\}g } |D ]}||v r| |-| q|.| /fd)d	|D }|s|j0ksJ |S )*a  einsum(subscripts, *operands, dtype=None, optimize=False)

    Evaluates the Einstein summation convention on the operands.
    Using the Einstein summation convention, many common multi-dimensional
    array operations can be represented in a simple fashion. This function
    provides a way to compute such summations.

    .. note::

       - Memory contiguity of the returned array is not always compatible with
         that of :func:`numpy.einsum`.
       - ``out``, ``order``, and ``casting`` options are not supported.
       - If :envvar:`CUPY_ACCELERATORS` includes ``cutensornet``, the `einsum`
         calculation will be performed by the cuTensorNet backend if possible.

           - The support of the ``optimize`` option is limited (currently, only
             `False`, 'cutensornet', or a custom path for pairwise contraction
             is supported, and the maximum intermediate size is ignored). If
             you need finer control for path optimization, consider replacing
             :func:`cupy.einsum` by :func:`cuquantum.contract` instead.
           - Requires `cuQuantum Python`_ (v22.03+).

       - If :envvar:`CUPY_ACCELERATORS` includes ``cutensor``, `einsum` will be
         accelerated by the cuTENSOR backend whenever possible.

    Args:
        subscripts (str): Specifies the subscripts for summation.
        operands (sequence of arrays): These are the arrays for the operation.
        dtype: If provided, forces the calculation to use the data type
            specified. Default is None.
        optimize: Valid options include {`False`, `True`, 'greedy', 'optimal'}.
            Controls if intermediate optimization should occur. No optimization
            will occur if `False`, and `True` will default to the 'greedy'
            algorithm. Also accepts an explicit contraction list from
            :func:`numpy.einsum_path`. Defaults to `False`. If a pair is
            supplied, the second argument is assumed to be the maximum
            intermediate size created.

    Returns:
        cupy.ndarray:
            The calculation based on the Einstein summation convention.

    .. seealso:: :func:`numpy.einsum`
    .. _cuQuantum Python: https://docs.nvidia.com/cuda/cuquantum/python/
    Nr   optimizeFTgreedyz+Did not understand the following kwargs: %sc                 S   s   g | ]}t |qS r   )rg   Z
asanyarrayr   rT   r   r   r   rE      s    zeinsum.<locals>.<listcomp>c                 S   s$   g | ]\}\}}t |||jd qS ))rL   )rQ   rL   )r   rK   rO   rT   r   r   r   rE     s    
r
   zJSize of label '%s' for operand %d (%d) does not match previous terms (%d).c                    s&   g | ]}|d k s  |dkr|qS )r   r
   )r4   rD   )tmp_subscriptsr   r   rE      s
    r   r   ra   zoutput has more dimensions than subscripts given in einstein sum, but no '...' ellipsis provided to broadcast the extra dimensions.c                 s   s    | ]}|d k V  qdS r_   r   rD   r   r   r   r   .  rH   zeinsum.<locals>.<genexpr>)rM   z^einstein sum subscripts string included output subscript '%s' which never appeared in an inputr*   zLeinstein sum subscripts string includes output subscript '%s' multiple timesc                 s   s    | ]}|j d kV  qdS r_   )rx   r   r   r   r   r   D  r   c                 3   s    | ]} | V  qd S r   r   rD   dimension_dictr   r   r   F  rH   )r   )r   c                 3   s     | ]\}}| vr|V  qd S r   r   r   r   r?   )other_subscriptsr   r   r   c  s    c                    s   g | ]
\}}| vr|qS r   r   r   )sum_axesr   r   rE   j  s
    )r   r   c                 S   s   g | ]}|  qS r   )r   r   r   r   r   r   rE   t  rF   c                    s"   g | ]}|j fd di qS )copyF)Zastyper   )casting_kwargsresult_dtyper   r   rE   v  s    )r   Zoptimalr   Zeinsum_pathl        z*Did not understand the path (optimize): %sc                 S   rA   r   )rW   )r   rO   r   r   r   rE     rF   c                 s   s    | ]	}t |d kV  qdS )r*   N)r.   )r   re   r   r   r   r     s    z,memory efficient einsum is not supported yetc                    rt   r   r   rD   r   r   r   rE     s    )1r   r=   r0   r2   r6   r"   keysrg   Zresult_typerX   r\   r   r/   r@   	itertoolschainfrom_iterablerc   rW   r[   rQ   r   r.   r4   r^   anyZzerosrk   rJ   r   ZsqueezerL   r   r   r   intfloatKeyErrorr1   warningswarnr   ZPerformanceWarningrf   r   r!   ri   rj   r   )!r8   kwargsoutr:   r;   r   r   rK   rO   shr   r?   dimZdim_oldrT   Zsqueeze_indicesZreturns_viewZoptimize_algorithmsrd   algoZmemory_limitZ
input_setsZ
output_setZidx0Zidx1r   ro   r   rp   r   r   rw   rl   r   )r   r   r   r   r   r   r   einsum  sR  .











"








r   )NN)&r   r   r    stringr   rg   Z
cupy._corer   r   Zcupy.linalg._einsum_optr   r   Zcupy.linalg._einsum_cutnr   Z cupy_backends.cuda.libs.cutensorZcupy_backendsZcupyxr   ImportErrorr[   ascii_uppercaseascii_lowercaser#   r   r&   r=   r@   rQ   r^   rf   rm   rs   rz   r|   r   r   r   r   r   r   r   r   <module>   sH    a
2"$?