o
    0 ix                     @   s\  d dl Z d dl mZ d dlZd dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
 dadd	 Zd
d Zdd ZdFddZdFddZdd ZdFddZdd ZdFddZdFddZdFddZdFddZd d! Zd"d# Zd$d% Zd&d' Zd(d) Zd*d+ Zd,d- Zd.d/ Zd0d1 Z dGd3d4Z!d5d6 Z"d7d8 Z#d9d: Z$dHd=d>Z%dFd?d@Z&dIdBdCZ'dJdDdEZ(dS )K    N)linalg)_core)cublas)device)_util   c                   C   s   t S N_batched_gesv_limit r   r   W/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/cupy/cublas.pyget_batched_gesv_limit   s   r   c                 C   s   | a d S r   r	   )limitr   r   r   set_batched_gesv_limit   s   r   c                 C   s8  t | | t |  t |  | j|jks| j|jd kr/| jdd |jd| jd  ks3tdt | |\}}|jdkrGt	
|j|S |dkrNd}n|dkrUd}n|d	kr\d
}n|dkrcd}ntdtt|d }tt|d }| jdkrt| jdd nd}| jd }| j|jkr|jd nd}	|j}
| jj}|jj}t	j| |||ddd|d} t	j||||	ddd|d}| jj|kr|  } |jj|kr| }|t krtd|t  t }|}|| | j }t	j| jj| jj||  |t	jd}|}||	 |j }t	j|jj|jj||  |t	jd}t	j
||ftjd}t	j
|ftjd}tj
dtjd}||||jj||jj|jj| t  || ||tj!||	|jj||jj|jj||j"j| |d dkrd|j#}|d dk r|d|d  7 }t$%||ddd|
j&|ddS )a  Solves multiple linear matrix equations using cublas<t>getr[fs]Batched().

    Computes the solution to system of linear equation ``ax = b``.

    Args:
        a (cupy.ndarray): The matrix with dimension ``(..., M, M)``.
        b (cupy.ndarray): The matrix with dimension ``(..., M)`` or
            ``(..., M, K)``.

    Returns:
        cupy.ndarray:
            The matrix with dimension ``(..., M)`` or ``(..., M, K)``.
       NzEa must have (..., M, M) shape and b must have (..., M) or (..., M, K)r   fsdFcDzinvalid dtypeZgetrfBatchedZgetrsBatched   dtypez/The matrix size ({}) exceeds the set limit ({}))r   z Error reported by {} in cuBLAS. z)The {}-th parameter had an illegal value.F)copy)'r   Z_assert_cupy_arrayZ_assert_stacked_2dZ_assert_stacked_squarendimshape
ValueErrorZlinalg_common_typesizecupyempty	TypeErrorgetattrr   numpyproddataptrZascontiguousarrayZreshapeZ	transposer   r   warningswarnformatr   get_cublas_handleitemsizeZarangeZuintpZint32Z3_check_cublas_info_array_if_synchronization_allowedCUBLAS_OP_Nctypes__name__r   ZLinAlgErrorZastype)abr   Z	out_dtypetZgetrfZgetrsbsnZnrhsZb_shapeZ
a_data_ptrZ
b_data_ptrhandleldaZa_stepZa_arrayldbZb_stepZb_arrayZpivotZdinfoinfomsgr   r   r   batched_gesv   s   

"
"


 
r=   c                 C      t | |dS )zFinds the (smallest) index of the element with the maximum magnitude.

    Note: The result index is 1-based index (not 0-based index).
    Zamax	_iamaxminxoutr   r   r   iamaxs      rD   c                 C   r>   )zFinds the (smallest) index of the element with the minimum magnitude.

    Note: The result index is 1-based index (not 0-based index).
    Zaminr?   rA   r   r   r   iamin{   rE   rF   c              
   C   s   | j dkrtd| j | jj}|dkrd}n|dkrd}n|dkr&d}n|dkr-d	}ntd
ttd| | }t	 }d}t
|||\}}	}
z||| j| jjd| W t||
 nt||
 w |d u rl|	}|S |j|krwt|	| |S )Nr   !x must be a 1D array (actual: {})r   r   r   r   r   r   r   r   i)r   r!   r-   r   charr%   r&   r   r   r.   _setup_result_ptrr"   r)   r*   setPointerModer   elementwise_copy)rB   rC   namer   r5   funcr8   result_dtype
result_ptrresult	orig_moder   r   r   r@      s6   


r@   c           	   
   C      | j dkrtd| j | jj}|dkrtj}n|dkr!tj}n|dkr)tj}n|dkr1tj	}nt
dt }| }t|||\}}}z||| j| jjd| W t|| nt|| w |du ri|}|S |j|krtt|| |S )	z&Computes the sum of the absolute of x.r   rG   r   r   r   r   r   N)r   r!   r-   r   rI   r   ZsasumZdasumZscasumZdzasumr%   r   r.   lowerrJ   r"   r)   r*   rK   r   rL   	rB   rC   r   rN   r8   rO   rP   rQ   rR   r   r   r   asum   4   


rV   c              
   C   s   t || |jj}|dkrtj}n|dkrtj}n|dkr!tj}n|dkr)tj}ntdt	
 }t|| |\} }}z|||j||jjd|jjd W t|| dS t|| w )z5Computes y += a * x.

    (*) y will be updated.
    r   r   r   r   r   r   N)_check_two_vectorsr   rI   r   ZsaxpyZdaxpyZcaxpyZzaxpyr%   r   r.   _setup_scalar_ptrr"   r)   r*   rK   )r3   rB   yr   rN   r8   a_ptrrR   r   r   r   axpy   s    
 r\   c           
   
   C   s   | j j}|dkrtj}n|dkrtj}n|dv rtdtdt| | t }|}t	|||\}}}	z||| j
| jjd|jjd| W t||	 nt||	 w |du r[|}|S |j |krft|| |S )$Computes the dot product of x and y.r   r   FDz&Use dotu() or dotc() for complex dtyper   r   N)r   rI   r   sdotZddotr%   rX   r   r.   rJ   r"   r)   r*   rK   r   rL   
rB   rZ   rC   r   rN   r8   rO   rP   rQ   rR   r   r   r   dot   s.   

 
ra   c           
   
   C      | j j}|dv rt| ||dS |dkrtj}n|dkrtj}ntdt| | t	 }|}t
|||\}}}	z||| j| jjd|jjd| W t||	 nt||	 w |du r^|}|S |j |krit|| |S )r]   fdrC   r   r   r   r   N)r   rI   ra   r   ZcdotuZzdotur%   rX   r   r.   rJ   r"   r)   r*   rK   r   rL   r`   r   r   r   dotu   .   

 
re   c           
   
   C   rb   )z+Computes the dot product of x.conj() and y.rc   rd   r   r   r   r   N)r   rI   ra   r   ZcdotcZzdotcr%   rX   r   r.   rJ   r"   r)   r*   rK   r   rL   r`   r   r   r   dotc  rf   rg   c           	   
   C   rS   )	z(Computes the Euclidean norm of vector x.r   rG   r   r   r   r   r   N)r   r!   r-   r   rI   r   Zsnrm2Zdnrm2Zscnrm2Zdznrm2r%   r   r.   rT   rJ   r"   r)   r*   rK   r   rL   rU   r   r   r   nrm27  rW   rh   c              
   C   s   |j dkrtd|j |jj}|dkrtj}n|dkr!tj}n|dkr)tj}n|dkr1tj	}nt
dt }t|| |\} }}z|||j||jjd W t|| dS t|| w )	z1Computes x *= a.

    (*) x will be updated.
    r   rG   r   r   r   r   r   N)r   r!   r-   r   rI   r   ZsscalZdscalZcscalZzscalr%   r   r.   rY   r"   r)   r*   rK   )r3   rB   r   rN   r8   r[   rR   r   r   r   scalX  s"   
ri   c                 C   sx   | j dkrtd| j |j dkrtd|j | j|jkr*td| j|j| j|jkr:td| j|jd S )Nr   rG   z!y must be a 1D array (actual: {})z1x and y must be the same size (actual: {} and {})z2x and y must be the same dtype (actual: {} and {}))r   r!   r-   r"   r   r%   )rB   rZ   r   r   r   rX   t  s   

rX   c                 C   s   t | }|d u st|tjr.|d u s|j|kr tjg |d}n|}|jj}t 	| t j
 n%t|tjrO|j|krAtjg |d}n|}|jj}t 	| t j ntd|||fS )Nr   z(out must be either cupy or numpy ndarray)r   getPointerMode
isinstancer#   ndarrayr   r$   r)   r*   rK   CUBLAS_POINTER_MODE_DEVICEr'   r1   CUBLAS_POINTER_MODE_HOSTr%   )r8   rC   r   moderQ   rP   r   r   r   rJ     s   


rJ   c                 C   sL   t ||\}}t| }t|tjrt| tj nt| tj |||fS r   )	_get_scalar_ptrr   rj   rk   r#   rl   rK   rm   rn   )r8   r3   r   r[   ro   r   r   r   rY     s   

rY   c                 C   sh   t | tjr| j|krtj| |d} | jj}| |fS t | tjr%| j|ks,tj| |d} | jj}| |fS )Nr   )	rk   r#   rl   r   arrayr)   r*   r'   r1   )r3   r   r[   r   r   r   rp     s   
rp   c                 C   s  |j j}|dkrtj}n|dkrtj}n|dkrtj}n|dkr$tj}ntd|jdks/J |j|j  kr<dks?J  J |j |j   krM|j ksPJ  J |j	\}}	t
| } | tjkrd|	|}
}n||	}
}|j	d |
ksrJ |j	d |ks{J t||j \}}t||j \}}t }t|}t|tjst|tjrt|tjst|}|jj}t|tjst|}|jj}t|tj nt|tj z{|jr||| ||	||jj||jjd||jjd nP|jr| tjkr| tjkrtj} ntj} ||| |	|||jj|	|jjd||jjd n-|jdd	}||| ||	||jj||jjd||jjd W t|| d
S W t|| d
S W t|| d
S t|| w )zComputes y = alpha * op(a) @ x + beta * y

    op(a) = a if transa is 'N', op(a) = a.T if transa is 'T',
    op(a) = a.T.conj() if transa is 'H'.

    Note: ''y'' will be updated.
    r   r   r   r   r   r   r   r   orderN)r   rI   r   ZsgemvZdgemvZcgemvZzgemvr%   r   r    _trans_to_cublas_opr0   rp   r   r.   rj   rk   r#   rl   rq   r)   r*   rK   rm   rn   _f_contiguous_c_contiguousCUBLAS_OP_CCUBLAS_OP_Tr   )transaalphar3   rB   betarZ   r   rN   mr7   xlenZylen	alpha_ptrbeta_ptrr8   rR   r   r   r   gemv  sl    "









r   c                 C   s  |j j}|dkrtj}n|dkrtj}n|dv rtdtd|jdks'J |j|j  kr4dks7J  J |j |j   krE|j ksHJ  J |j\}}|jd |ksVJ |jd |ks_J t	 }t
|| |\} }	}
|jj|jj}}z\|jr|||||	|d|d|jj|
 n7|jr|||||	|d|d|jj|
 n-|jd	d
}|||||	|d|d|jj|
 t|| W t||
 dS W t||
 dS W t||
 dS t||
 w )DComputes a += alpha * x @ y.T

    Note: ''a'' will be updated.
    r   r   r^   z#Use geru or gerc for complex dtypesr   r   r   r   r   rr   N)r   rI   r   ZsgerZdgerr%   r   r    r   r.   rY   r)   r*   ru   rv   r   r   rL   rK   rz   rB   rZ   r3   r   rN   r|   r7   r8   r~   rR   Zx_ptrZy_ptrZaar   r   r   ger  s<    "
  r   c                 C   s  |j j}|dv rt| |||S |dkrtj}n|dkrtj}ntd|jdks*J |j|j  kr7dks:J  J |j |j   krH|j ksKJ  J |j\}}|jd |ksYJ |jd |ksbJ t	
 }t|| |\} }	}
|jj|jj}}z\|jr|||||	|d|d|jj|
 n7|jr|||||	|d|d|jj|
 n-|jdd}|||||	|d|d|jj|
 t|| W t||
 d	S W t||
 d	S W t||
 d	S t||
 w )
r   rc   r   r   r   r   r   r   rr   N)r   rI   r   r   ZcgeruZzgerur%   r   r    r   r.   rY   r)   r*   ru   rv   r   r   rL   rK   r   r   r   r   geru  s<    "
  r   c                 C   s  |j j}|dv rt| |||S |dkrtj}n|dkrtj}ntd|jdks*J |j|j  kr7dks:J  J |j |j   krH|j ksKJ  J |j\}}|jd |ksYJ |jd |ksbJ t	
 }t|| |\} }	}
|jj|jj}}z@|jr|||||	|d|d|jj|
 n$|jdd}|||||	|d|d|jj|
 t|| W t||
 d	S W t||
 d	S t||
 w )
zKComputes a += alpha * x @ y.T.conj()

    Note: ''a'' will be updated.
    rc   r   r   r   r   r   r   rr   N)r   rI   r   r   ZcgercZzgercr%   r   r    r   r.   rY   r)   r*   ru   r   r   rL   rK   r   r   r   r   gerc8  s4    "
 r   Fc                 C   s  |j j}|dkrtj}n|dkrtj}ntd|jdksJ |j|j  kr,dks/J  J |j |j   kr=|j ks@J  J |j\}	}
|jd |
ksNJ |jd |
ksWJ |js`|j	dd}t
||j \}}t
||j \}}t }t|}t|tjst|tjrt|tjst|}|jj}t|tjst|}|jj}t|tj nt|tj |rtj}ntj}t }z||||
| ||jj|	|jjd||jjd W t|| |S t|| w )	z)Computes y = alpha*A @ x + beta * y

    r   r   zComplex dtypes not supportedr   r   r   r   rr   )r   rI   r   ZssbmvZdsbmvr%   r   r    ru   r   rp   r   r.   rj   rk   r#   rl   rq   r)   r*   rK   rm   rn   CUBLAS_FILL_MODE_LOWERCUBLAS_FILL_MODE_UPPER)krz   r3   rB   r{   rZ   rT   r   rN   r|   r7   r~   r   r8   rR   uplor   r   r   sbmv[  sN    "





r   c                 C   sb   | dks	| t jkrt j} | S | dks| t jkrt j} | S | dks%| t jkr*t j} | S td| )NNTHzinvalid trans (actual: {}))r   r0   rx   rw   r%   r-   )transr   r   r   rt     s   rt   c                 C   sL   d }|t jt jfv r"| jr| jd }||fS | jr"| jd }d| }||fS )Nr   r   )r   r0   rx   ru   r    rv   )r3   r   ldr   r   r   _decide_ld_and_trans  s   

r   c                 C   s,   |d u r| j d }| js| jdd} | |fS )Nr   r   rr   )r    ru   r   )r3   r9   r   r   r   _change_order_if_necessary  s
   
r         ?        c                 C   sj  |j |j   krdksJ  J |j|jksJ |jj}|dkr$tj}n|dkr,tj}n|dkr4tj}n|dkr<tj}ntdt	| } t	|}| tj
krS|j\}	}
n|j\}
}	|tj
krl|jd }|jd |
kskJ n|jd }|jd |
kszJ |d	u rtj|	|f|dd
}d}n|j dksJ |j|	|fksJ |j|ksJ t||j\}}t||j\}}t }t|}t|tjst|tjrt|tjst|}|jj}t|tjst|}|jj}t|tj nt|tj t|| \}} t||\}}|d	u sh|d	u sh|jr8z ||| ||	||
||jj||jj|||jj|	 W t|| |S t|| w |jrhz$||d| d|  ||	|
||jj||jj|||jj| W t|| |S t|| w t||\}}t||\}}|}|js|jdd}z||| ||	||
||jj||jj|||jj|	 W t|| nt|| w |jst || |S )a  Computes out = alpha * op(a) @ op(b) + beta * out

    op(a) = a if transa is 'N', op(a) = a.T if transa is 'T',
    op(a) = a.T.conj() if transa is 'H'.
    op(b) = b if transb is 'N', op(b) = b.T if transb is 'T',
    op(b) = b.T.conj() if transb is 'H'.
    r   r   r   r   r   r   r   r   Nr   rs   r   rr   )!r   r   rI   r   ZsgemmZdgemmZcgemmZzgemmr%   rt   r0   r    r#   r$   rp   r   r.   rj   rk   rl   rq   r)   r*   rK   rm   rn   r   ru   rv   r   r   r   rL   )ry   transbr3   r4   rC   rz   r{   r   rN   r|   r   r7   r~   r   r8   rR   r9   r:   r   r   r   r   gemm  s    







r   c                 C   sL  |j |j   krdksJ  J |j|jksJ |jj}|dkr$tj}n|dkr,tj}n|dkr4tj}n|dkr<tj}ntdt	| } t	|}| tj
krS|j\}	}
n|j\}
}	|tj
krg|j|	|
fksfJ n	|j|
|	fkspJ |du rtj|	|
f|dd}n|j dksJ |j|	|
fksJ |j|ksJ t||j\}}t||j\}}t }t|}t|tjst|tjrt|tjst|}|jj}t|tjst|}|jj}t|tj nt|tj t|| \}} t||\}}|du sZ|du sZ|jr+z||| ||	|
||jj|||jj||jj|	 W t|| |S t|| w |jrZz#||d	|  d	| |
|	||jj|||jj||jj|
 W t|| |S t|| w t||\}}t||\}}|}|jst|jdd
}z||| ||	|
||jj|||jj||jj|	 W t|| nt|| w |jst || |S )zComputes alpha * op(a) + beta * op(b)

    op(a) = a if transa is 'N', op(a) = a.T if transa is 'T',
    op(a) = a.T.conj() if transa is 'H'.
    op(b) = b if transb is 'N', op(b) = b.T if transb is 'T',
    op(b) = b.T.conj() if transb is 'H'.
    r   r   r   r   r   r   Nr   r   rr   )!r   r   rI   r   ZsgeamZdgeamZcgeamZzgeamr%   rt   r0   r    r#   r$   rp   r   r.   rj   rk   rl   rq   r)   r*   rK   rm   rn   r   ru   rv   r   r   r   rL   )ry   r   rz   r3   r{   r4   rC   r   rN   r|   r7   r~   r   r8   rR   r9   r:   r   r   r   r   geam  s    





r   r   c                 C   s0  |j dksJ d|j   krdksJ  J |j|jksJ |jj}|dkr*tj}n|dkr2tj}n|dkr:tj}n|dkrBtj}ntd| dksO| tj	krStj	} n| d	ks\| tj
kr`tj
} ntd
| |j\}}| tj	kr|j|d t| d ksJ n|j|d t| d ksJ |du r|jrd}	nd}	tj||f||	d}n|j dksJ |j|jksJ |j|jksJ t }
|jr|js|jdd}||
d|  |||jj||jj||jj|
 |S |js|jdd}|}|js|jdd}||
| |||jj||jj||jj|
 |jst|| |S )znComputes diag(x) @ a or a @ diag(x)

    Computes diag(x) @ a if side is 'L', a @ diag(x) if side is 'R'.
    r   r   r   r   r   r   r   LRzinvalid side (actual: {})r   NCr   rr   )r   r   rI   r   ZsdgmmZddgmmZcdgmmZzdgmmr%   ZCUBLAS_SIDE_LEFTZCUBLAS_SIDE_RIGHTr!   r-   r    r"   absrv   r#   r$   r   r.   r   r)   r*   ru   r   rL   )Zsider3   rB   rC   Zincxr   rN   r|   r7   rs   r8   r   r   r   r   dgmmd  sb   

 r   c                 C   s  |j dksJ |jj}|dkrtj}n|dkrtj}n|dkr#tj}n|dkr+tj}ntdt	| } | tj
kr>|j\}}	n|j\}	}|du rTtj||f|dd}d	}n|j dks[J |j||fksdJ |j|kskJ |rqtj}
ntj}
t||j\}}t||j\}}t }t|}t|tjst|tjrt|tjst|}|jj}t|tjst|}|jj}t|tj nt|tj t|| \}} t|| \}}|jr|js|jd
d}d|  } |jd }z||d|
 | ||	||jj|||jj| W t|| |S t|| w |js"|jdd}|jd }d|  } |}|js.|jdd}z|||
| ||	||jj|||jj| W t|| nt|| w |jsX||d< |S )a"  Computes out := alpha*op1(a)*op2(a) + beta*out

    op1(a) = a if trans is 'N', op2(a) = a.T if transa is 'N'
    op1(a) = a.T if trans is 'T', op2(a) = a if transa is 'T'
    lower specifies  whether  the  upper  or  lower triangular
    part  of the  array  out  is to be  referenced
    r   r   r   r   r   r   Nr   r   r   rr   r   r   .) r   r   rI   r   ZssyrkZdsyrkZcsyrkZzsyrkr%   rt   r0   r    r#   Zzerosr   r   rp   r   r.   rj   rk   rl   rq   r)   r*   rK   rm   rn   r   rv   r   ru   )r   r3   rC   rz   r{   rT   r   rN   r7   r   r   r~   r   r8   rR   r9   Zldo_r   r   r   r   syrk  s   










r   r   )F)Nr   r   )Nr   )Nr   r   F))r'   r   r+   r#   r   Zcupy_backends.cuda.libsr   Z	cupy.cudar   Zcupy.linalgr   r
   r   r   r=   rD   rF   r@   rV   r\   ra   re   rg   rh   ri   rX   rJ   rY   rp   r   r   r   r   r   rt   r   r   r   r   r   r   r   r   r   r   <module>   sL    
Z

!!



!
A&%
#5

]
X<