o
    0 ir2                     @   sD  d dl Z d dlZd dlmZmZ d dlZd dlmZ d dlmZ d dlmZ e	e
e
e
f Ze	eef Zeeee
e
f f Zee	eee
f  Ze	edf Ze jG dd dZd	ed
edefddZded	ed
edefddZde	edf de	e
df de	edf fddZde	edf de	edf fddZde	e
df dee
ee	edf  f deeef fddZdddee	e
df ge	e
df f dee	edf ge	edf f ddfdd Zd3d!d"Zd3d#d$Zd3d%d&Zd3d'd(Z	d4d)dd*dd+ed ddfd,d-Zd.ee
 d/ee
 d0eee e
   dee
ee	edf  f fd1d2Z!dS )5    N)CallableOptional)_array)_chunk)_modes.c                   @   s2   e Zd ZU ee ed< ee ed< ee ed< dS )	_Blockingi_partitionsj_partitionsk_partitionsN)__name__
__module____qualname__listint__annotations__ r   r   k/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/cupyx/distributed/array/_linalg.pyr      s   
 	r   location_map_alocation_map_breturnc                 C   s   g }g }g }dd }|   D ]\}}||| ||| q|  D ]\}}||| ||| q!dd }	|	|}|	|}|	|}dd }
|   D ]\}}|
|| |
|| qH|  D ]\}}|
|| |
|| q[t|||S )Nc                 S   s2   | \}}}|dkrt d|| || d S )N   z"Step other than 1 is not supported)RuntimeErrorappend)indices
partitionsstartstopstepr   r   r   add_to_partitions6   s
   

z)_find_blocking.<locals>.add_to_partitionsc                 S   sX   t | dkr
td|   | d g}t| | dd  D ]\}}||kr)|| q|S )Nr   zArray has no chunkr   )lenr   sortzipr   )r   resxyr   r   r   to_unique_sortedG   s   

z(_find_blocking.<locals>.to_unique_sortedc                 S   s.   | \}}}| |d | |krtdd S )Nr   zInconsistent index mapping)indexr   )r   r   r   r   _r   r   r   check_indicesX   s   
z%_find_blocking.<locals>.check_indices)keysr   )r   r   r   r	   r
   r   Z	i_indicesZ	k_indicesZ	j_indicesr%   r(   r   r   r   _find_blocking-   s,   	



r*   blockingc                 C   s   | j }| j}| j}g }t||dd  D ]V}t||dd  D ]J}t||dd  D ]>}	|d |	d f}
|	d |d f}t||
  }t||  }||@ }|r_| }||
||f q*td|
 d| qq|S )Nr   r   zAThere is no device that can perform multiplication between block z and )	r   r	   r
   r!   setr)   popr   r   )r+   r   r   r   r	   r
   planZi_rangeZj_rangeZk_rangeblock_ablock_bZ	devices_aZ	devices_bintersectiondevr   r   r   _make_execution_planh   s2   r4   slicesshapec                 C   s,   t | t |ks
J tdd t| |D S )Nc                 s   s    | ]
\}}| |V  qd S N)r   ).0slengthr   r   r   	<genexpr>   s    z%_convert_to_tuples.<locals>.<genexpr>)r   tupler!   )r5   r6   r   r   r   _convert_to_tuples   s   r=   tuplesc                 C   s   t dd | D S )Nc                 s   s    | ]}t | V  qd S r7   slice)r8   tr   r   r   r;      s    z%_convert_to_slices.<locals>.<genexpr>)r<   )r>   r   r   r   _convert_to_slices   s   rB   	index_mapc                 C   s|   i }|  D ]5\}}t|D ],\}}t|| }|d d |dd  }}	tt|	}	||i }
|
|	i }|||< qq|S )N)items	enumerater=   typingcast	_BlockIdx
setdefault)r6   rC   Zlocation_mapsr3   ZidxsZchunk_iidxZ
idx_tuples	batch_idxZ	block_idxZlocation_maplocationr   r   r   _group_by_batch   s   

	rN   arr_array.DistributedArrayf_shapef_idxc                    sl   dt jdt jf fddi }| j D ]\}}fdd|D ||< q| j}t|| j|| j| j	S )Nchunkr   c                    sH   | j | j j} | j} fdd| jD }t|| j||| jS )Nc                    s   g | ]
\}}| |fqS r   r   )r8   datarK   )rR   r   r   
<listcomp>   s    z>_reshape_array_with.<locals>.reshape_chunk.<locals>.<listcomp>)	arrayZreshaper6   r&   updatesr   _Chunkready
prevent_gc)rS   rT   r&   rW   )rR   rQ   r   r   reshape_chunk   s   
z*_reshape_array_with.<locals>.reshape_chunkc                    s   g | ]} |qS r   r   )r8   rS   )r[   r   r   rU      s    z'_reshape_array_with.<locals>.<listcomp>)
r   rX   _chunks_maprE   r6   r   DistributedArraydtype_mode_comms)rO   rQ   rR   
chunks_mapr3   chunksr6   r   )rR   rQ   r[   r   _reshape_array_with   s   
rc   c                 C      t | dd dd S )Nc                 S   s   d|  S Nr,   r   r6   r   r   r   <lambda>       z'_prepend_one_to_shape.<locals>.<lambda>c                 S   s   t d f|  S r7   r?   rK   r   r   r   rg          rc   rO   r   r   r   _prepend_one_to_shape   
   rm   c                 C   rd   )Nc                 S   s   | d S re   r   rf   r   r   r   rg      rh   z&_append_one_to_shape.<locals>.<lambda>c                 S   s   | t d f S r7   r?   ri   r   r   r   rg      rj   rk   rl   r   r   r   _append_one_to_shape   rn   ro   c                 C   &   | j d dks	J t| dd dd S )Nr   c                 S      | d d S Nrq   r   rf   r   r   r   rg          z!_pop_from_shape.<locals>.<lambda>c                 S   rr   rs   r   ri   r   r   r   rg      rt   r6   rc   rl   r   r   r   _pop_from_shape      rv   c                 C   rp   )Nr   r   c                 S      | dd  S Nr   r   rf   r   r   r   rg      rt   z'_pop_front_from_shape.<locals>.<lambda>c                 S   rx   ry   r   ri   r   r   r   rg      rt   ru   rl   r   r   r   _pop_front_from_shape   rw   rz   aboutc           "   
   K   s  |durt ddD ]}||v rt d| dq
t| tjr%t|tjs)t d| tj} |tj}d }}| jdkrDd	}t| } |jdkrOd	}t	|}| j
d
d \}}|j
d
d \}	}
||	kss| j
dd
 |j
dd
 krwtdt| j
| j}t|j
|j}| | krt ddd | jD }d}| D ]}|| }|| }t||}t|||}t|}|D ]y\}}}|| }|| }| j| ||  }|j| ||  }|tj |tj |t|d  t|d  f }| 3}||j tjjj|j|jfi |}tj||  |||fd}|| !| |j"}W d   n	1 s.w   Y  qq| j
dd
 ||
f } t| ||tj#| j$}!|rRt%|!}!|rYt&|!}!|!S )ak  Matrix multiplication between distributed arrays.

    The arguments must have compatible :attr:`~DistributedArray.shape` and
    :attr:`~DistributedArray.index_map`.

    This operation converts its operands into the replica mode, and compute
    their product in the sum mode.

    Args:
        a, b: Input distributed arrays.
        out (optional): A location into which the result is stored. This option
            is currently not supported.
    Returns:
        The matrix product of the inputs.

    Example:
        >>> A = distributed_array(
        ...     cupy.arange(6).reshape(2, 3),
        ...     make_2d_index_map([0, 2], [0, 1, 3],
        ...                       [[{0}, {1, 2}]]))
        >>> B = distributed_array(
        ...     cupy.arange(12).reshape(3, 4),
        ...     make_2d_index_map([0, 1, 3], [0, 2, 4],
        ...                       [[{0}, {0}],
        ...                        [{1}, {2}]]))
        >>> C = A @ B
        >>> C.mode
        'sum'
        >>> C.all_chunks()
        {0: [array([[0, 0],
                    [0, 3]]),
             array([[0, 0],
                    [6, 9]])],
         1: [array([[20, 23],
                    [56, 65]])],
         2: [array([[26, 29],
                    [74, 83]])]}
        >>> C
        array([[20, 23, 26, 29],
               [56, 68, 80, 92]])

    .. seealso:: :obj:`numpy.matmul`
    NzArgument `out` is not supported)ZsubokZaxesZaxisz
Argument `z` is not supportedzHMixing a distributed array with a non-distributed array is not supportedFr   TrD   zShapes are incompatiblezMismatched batch shapesc                 S   s   i | ]}|g qS r   r   )r8   r3   r   r   r   
<dictcomp>,  s    zmatmul.<locals>.<dictcomp>r   )rZ   )'r   
isinstancer   r]   Z_to_op_moder   ZREPLICAndimrm   ro   r6   
ValueErrorrN   rC   r)   devicesr*   r4   rB   r\   flushr@   Zon_readyZ
wait_eventrY   cupyZlinalgZ_productmatmulrV   r   rX   recordr   r^   ZSUMr`   rz   rv   )"r{   r|   r}   kwargsparamZone_prependedZone_appendednmm2pZlocation_maps_aZlocation_maps_bra   r^   rL   r   r   r+   r/   Zindex_prefixr0   r1   r3   Zloc_aZloc_bZchunk_aZchunk_br&   streamZchunk_ab_arrayZchunk_abr6   r"   r   r   r   r      s   /


$


r   r   r	   r   c                 C   s  | d dksJ t t| | ksJ |d dksJ t t||ks$J i }t|t| d ks2J tt|D ]K}t|| t|d ksHJ tt|| D ]2}| | }| |d  }|| }||d  }	t||t||	f}
|| | D ]}||g |
 qvqPq8|S )a  Create an ``index_map`` for a 2D matrix with a specified blocking.

    Args:
        i_partitions (list of ints): boundaries of blocks on the `i` axis
        j_partitions (list of ints): boundaries of blocks on the `j` axis
        devices (2D list of sets of ints): devices owning each block

    Returns:
        dict from int to array indices: index_map
            Indices for the chunks that devices with designated IDs are going
            to own.

    Example:
        >>> index_map = make_2d_index_map(
        ...     [0, 2, 4], [0, 3, 5],
        ...     [[{0}, {1}],
        ...      [{2}, {0, 1}]])
        >>> pprint(index_map)
        {0: [(slice(0, 2, None), slice(0, 3, None)),
             (slice(2, 4, None), slice(3, 5, None))],
         1: [(slice(0, 2, None), slice(3, 5, None)),
             (slice(2, 4, None), slice(3, 5, None))],
         2: [(slice(2, 4, None), slice(0, 3, None))]}
    r   r   )sortedr-   r   ranger@   rJ   r   )r   r	   r   rC   ijZi_startZi_stopZj_startZj_stoprK   r3   r   r   r   make_2d_index_mapX  s&   r   )r   rP   r7   )"dataclassesrG   r   r   r   Zcupyx.distributed.arrayr   r   r   r<   r   Z_SliceIndicesrI   dictZ_BlockLocationMapr   Z_ExecutionPlanZ	_BatchIdx	dataclassr   r*   r4   r@   r=   rB   rN   rc   rm   ro   rv   rz   r   r-   r   r   r   r   r   <module>   s    
;
#
















{