o
    0 iZ                     @   s  d dl Z d dlmZ d dlmZmZmZmZmZm	Z	m
Z
mZ d dlmZmZmZmZmZmZ d dlmZ d dlmZ d dlmZ d dlmZ e ZejZejZejZee G d	d
 d
eZeG dd deZ eG dd deZ!eG dd deZ"eG dd deZ#eG dd deZ$eG dd deZ%eG dd deZ&eG dd deZ'eG dd deZ(eG dd deZ)eG dd  d eZ*eG d!d" d"eZ+eG d#d$ d$eZ,eG d%d& d&eZ-eG d'd( d(eZ.eG d)d* d*eZ/eG d+d, d,eZ0eG d-d. d.eZ1eG d/d0 d0eZ2eG d1d2 d2eZ3eG d3d4 d4eZ4d5d6 Z5d7d8 Z6d9d: Z7ee8G d;d< d<eZ9d=d> Z:d?d@ Z;dAdB Z<dCdD Z=e7ej>j?Z@e=e jAZBe=e jCZDe7ej>jEZFe=e jGZHe=e jIZJe7ej>jKZLe=e jMZNe=e jOZPe7ej>jQZRe7ej>jSZTe5ej>jUZVe6e jWZXe5ej>jYZZe6e[Z\e:ej>j]Z^e<e j_ e:ej>j`Zae<e jb e:ej>jcZde<e je e:ej>jfZge<e jh e:ej>jiZje<e jk e:ej>jlZme<e jn e=e jo e=e jp dEdF ZqdGdH ZreqdIZseqdJZteqdKZueqdLZveqdMZweqdNZxeqdOZyeqdPZzeqdQZ{eqdRZ|eqdSZ}eqdTZ~eqdUZeqdVZeqdWZerdXZdYdZ ZejejejejejejfZejejejejfZejejfZeejjAeZeejjGeZeejjeZeejjeZeejjeZeejjeZeejjeZeejjeZeejjeZeejjeZeejjeZeejjeZeG d[d\ d\eZeG d]d^ d^eZeG d_d` d`eZeG dadb dbeZeG dcdd ddeZeG dedf dfeZeG dgdh dheZeG didj djeZeG dkdl dleZeG dmdn dneZeeee eD ]Zeee q!e	D ]Zeee q,eD ]Zeee q7e
D ]Zedov rNeee qBdS )p    N)types)parse_dtypeparse_shaperegister_number_classesregister_numpy_ufunctrigonometric_functionscomparison_functionsmath_operationsbit_twiddling_functions)AttributeTemplateConcreteTemplateAbstractTemplateCallableTemplate	signatureRegistrydim3)
Conversion)cuda) declare_device_function_templatec                   @      e Zd Zdd ZdS )Cuda_array_declc                 C      dd }|S )Nc                 S   s   t | tjrt | tjsd S nt | tjtjfr$tdd | D r#d S nd S t| }t|}|d ur>|d ur@tj	||ddS d S d S )Nc                 S   s   g | ]	}t |tj qS  )
isinstancer   IntegerLiteral).0sr   r   _/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/numba/cuda/cudadecl.py
<listcomp>$   s    z:Cuda_array_decl.generic.<locals>.typer.<locals>.<listcomp>C)dtypendimZlayout)
r   r   Integerr   TupleZUniTupleanyr   r   ZArray)shaper!   r"   Znb_dtyper   r   r   typer   s    z&Cuda_array_decl.generic.<locals>.typerr   selfr'   r   r   r   generic   s   zCuda_array_decl.genericN__name__
__module____qualname__r*   r   r   r   r   r      s    r   c                   @      e Zd ZejjZdS )Cuda_shared_arrayN)r,   r-   r.   r   sharedarraykeyr   r   r   r   r0   2       r0   c                   @   r/   )Cuda_local_arrayN)r,   r-   r.   r   localr2   r3   r   r   r   r   r5   7   r4   r5   c                   @      e Zd ZejjZdd ZdS )Cuda_const_array_likec                 C   r   )Nc                 S   s   | S Nr   )Zndarrayr   r   r   r'   A      z,Cuda_const_array_like.generic.<locals>.typerr   r(   r   r   r   r*   @   s   zCuda_const_array_like.genericN)r,   r-   r.   r   constZ
array_liker3   r*   r   r   r   r   r8   <       r8   c                   @      e Zd ZejZeejgZ	dS )Cuda_threadfence_deviceN)
r,   r-   r.   r   Zthreadfencer3   r   r   nonecasesr   r   r   r   r>   F       r>   c                   @   r=   )Cuda_threadfence_blockN)
r,   r-   r.   r   Zthreadfence_blockr3   r   r   r?   r@   r   r   r   r   rB   L   rA   rB   c                   @   r=   )Cuda_threadfence_systemN)
r,   r-   r.   r   Zthreadfence_systemr3   r   r   r?   r@   r   r   r   r   rC   R   rA   rC   c                   @   s*   e Zd ZejZeejeejej	gZ
dS )Cuda_syncwarpN)r,   r-   r.   r   Zsyncwarpr3   r   r   r?   i4r@   r   r   r   r   rD   X   s    rD   c                
   @   s   e Zd ZejZeeej	ej
fej	ej	ej	ej	ej	eeejej
fej	ej	ejej	ej	eeejej
fej	ej	ejej	ej	eeejej
fej	ej	ejej	ej	gZdS )Cuda_shfl_sync_intrinsicN)r,   r-   r.   r   Zshfl_sync_intrinsicr3   r   r   r$   rE   b1i8f4f8r@   r   r   r   r   rF   ^   s    rF   c                   @   s6   e Zd ZejZeeej	ej
fej	ej	ej
gZdS )Cuda_vote_sync_intrinsicN)r,   r-   r.   r   Zvote_sync_intrinsicr3   r   r   r$   rE   rG   r@   r   r   r   r   rK   m   s
    
rK   c                   @   sV   e Zd ZejZeejejejeejejej	eejejej
eejejejgZdS )Cuda_match_any_syncN)r,   r-   r.   r   Zmatch_any_syncr3   r   r   rE   rH   rI   rJ   r@   r   r   r   r   rL   t   s    rL   c                   @   s   e Zd ZejZeeej	ej
fej	ej	eeej	ej
fej	ejeeej	ej
fej	ejeeej	ej
fej	ejgZdS )Cuda_match_all_syncN)r,   r-   r.   r   Zmatch_all_syncr3   r   r   r$   rE   rG   rH   rI   rJ   r@   r   r   r   r   rM      s    rM   c                   @   r=   )Cuda_activemaskN)
r,   r-   r.   r   Z
activemaskr3   r   r   uint32r@   r   r   r   r   rN      rA   rN   c                   @   r=   )Cuda_lanemask_ltN)
r,   r-   r.   r   Zlanemask_ltr3   r   r   rO   r@   r   r   r   r   rP      rA   rP   c                
   @   z   e Zd ZdZejZeej	ej	eej
ej
eejejeejejeejejeejejeejejeejejgZdS )	Cuda_popcz
    Supported types from `llvm.popc`
    [here](http://docs.nvidia.com/cuda/nvvm-ir-spec/index.html#bit-manipulations-intrinics)
    N)r,   r-   r.   __doc__r   Zpopcr3   r   r   int8int16int32int64uint8uint16rO   uint64r@   r   r   r   r   rR          rR   c                   @   sB   e Zd ZdZejZeej	ej	ej	ej	eej
ej
ej
ej
gZdS )Cuda_fmaz
    Supported types from `llvm.fma`
    [here](https://docs.nvidia.com/cuda/nvvm-ir-spec/index.html#standard-c-library-intrinics)
    N)r,   r-   r.   rS   r   fmar3   r   r   float32float64r@   r   r   r   r   r\      s    r\   c                   @   s,   e Zd ZejjZeej	ej	ej	ej	gZ
dS )	Cuda_hfmaN)r,   r-   r.   r   fp16Zhfmar3   r   r   float16r@   r   r   r   r   r`      s    r`   c                   @   .   e Zd ZejZeejejeej	ej	gZ
dS )	Cuda_cbrtN)r,   r-   r.   r   Zcbrtr3   r   r   r^   r_   r@   r   r   r   r   rd      s
    rd   c                   @   rc   )	Cuda_brevN)r,   r-   r.   r   Zbrevr3   r   r   rO   rZ   r@   r   r   r   r   re      s
    re   c                
   @   rQ   )Cuda_clzz
    Supported types from `llvm.ctlz`
    [here](http://docs.nvidia.com/cuda/nvvm-ir-spec/index.html#bit-manipulations-intrinics)
    N)r,   r-   r.   rS   r   Zclzr3   r   r   rT   rU   rV   rW   rX   rY   rO   rZ   r@   r   r   r   r   rf      r[   rf   c                
   @   sz   e Zd ZdZejZeej	ej
eej	ejeej	ejeej	ejeej	ejeej	ejeej	ej	eej	ejgZdS )Cuda_ffsz
    Supported types from `llvm.cttz`
    [here](http://docs.nvidia.com/cuda/nvvm-ir-spec/index.html#bit-manipulations-intrinics)
    N)r,   r-   r.   rS   r   Zffsr3   r   r   rO   rT   rU   rV   rW   rX   rY   rZ   r@   r   r   r   r   rg      r[   rg   c                   @   s   e Zd ZejZdd ZdS )	Cuda_selpc                 C   sX   |rJ |\}}}t jt jt jt jt jt jt jt jf}||ks#||vr%d S t	||||S r9   )
r   r_   r^   rU   rY   rV   rO   rW   rZ   r   )r)   argskwstestabsupported_typesr   r   r   r*      s   
zCuda_selp.genericN)r,   r-   r.   r   Zselpr3   r*   r   r   r   r   rh      s    rh   c                       t G  fdddt}|S )Nc                       s    e Zd Z ZeejejgZdS )z'_genfp16_unary.<locals>.Cuda_fp16_unaryNr,   r-   r.   r3   r   r   rb   r@   r   l_keyr   r   Cuda_fp16_unary  s    rs   registerr   rr   rs   r   rq   r   _genfp16_unary     rw   c                    s    t  G  fdddt}|S )Nc                       s   e Zd Z Zdd ZdS )z0_genfp16_unary_operator.<locals>.Cuda_fp16_unaryc                 S   s8   |rJ t |dkr|d tjkrttjtjS d S d S )N   r   )lenr   rb   r   )r)   ri   rj   r   r   r   r*     s   z8_genfp16_unary_operator.<locals>.Cuda_fp16_unary.genericNr,   r-   r.   r3   r*   r   rq   r   r   rs     s    rs   register_globalr   rv   r   rq   r   _genfp16_unary_operator  s   r~   c                    ro   )Nc                       s$   e Zd Z ZeejejejgZdS )z)_genfp16_binary.<locals>.Cuda_fp16_binaryNrp   r   rq   r   r   Cuda_fp16_binary#  s    r   rt   )rr   r   r   rq   r   _genfp16_binary"  rx   r   c                   @   r   )Floatc                 C   s&   |rJ |\}|t jkrt||S d S r9   )r   rb   r   )r)   ri   rj   argr   r   r   r*   .  s
   

zFloat.genericNr+   r   r   r   r   r   +  s    r   c                    ro   )Nc                       s$   e Zd Z ZeejejejgZdS )z1_genfp16_binary_comparison.<locals>.Cuda_fp16_cmpN)	r,   r-   r.   r3   r   r   rG   rb   r@   r   rq   r   r   Cuda_fp16_cmp8  s    r   rt   )rr   r   r   rq   r   _genfp16_binary_comparison7  s   r   c                    s"   t  G  fdddt}|S )Nc                          e Zd Z ZfddZdS )z1_fp16_binary_operator.<locals>.Cuda_fp16_operatorc                    s   |rJ t |dkrM|d tjks|d tjkrO|d tjkr+| j|d |d }n| j|d |d }|tjksE|tjksE|tjkrQt	 tjtjS d S d S d S )N   r   ry   )
rz   r   rb   contextZcan_convertr   exactZpromotesafer   )r)   ri   rj   Zconvertible)rettyr   r   r*   T  s   



z9_fp16_binary_operator.<locals>.Cuda_fp16_operator.genericNr{   r   rr   r   r   r   Cuda_fp16_operatorP      r   r|   )rr   r   r   r   r   r   _fp16_binary_operatorO  s   r   c                 C      t | tjS r9   )r   r   rG   opr   r   r   _genfp16_comparison_operatorn     r   c                 C   r   r9   )r   r   rb   r   r   r   r   _genfp16_binary_operatorr  r   r   c                 C   s"   t d|  tjtjf}t|S NZ__numba_wrapper_r   r   rb   Functionfnamedeclr   r   r   _resolve_wrapped_unary  s
   

r   c                 C   s&   t d|  tjtjtjf}t|S r   r   r   r   r   r   _resolve_wrapped_binary  s
   


r   ZhsinZhcosZhlogZhlog10Zhlog2ZhexpZhexp10Zhexp2ZhsqrtZhrsqrtZhfloorZhceilZhrcpZhrintZhtruncZhdivc                    s   t G  fdddt}|S )Nc                       r   )z_gen.<locals>.Cuda_atomicc                    s^   |rJ |\}}}|j  vrd S |jdkrt|j |tj|j S |jdkr-t|j |||j S d S Nry   )r!   r"   r   r   intp)r)   ri   rj   aryidxval)rn   r   r   r*     s   



z!_gen.<locals>.Cuda_atomic.genericNr{   r   rr   rn   r   r   Cuda_atomic  r   r   )ru   r   )rr   rn   r   r   r   r   _gen  s   r   c                   @   r7   )Cuda_atomic_compare_and_swapc                 C   s@   |rJ |\}}}|j }|tv r|jdkrt||||S d S d S r   )r!   integer_numba_typesr"   r   )r)   ri   rj   r   oldr   dtyr   r   r   r*     s   
z$Cuda_atomic_compare_and_swap.genericN)r,   r-   r.   r   atomicZcompare_and_swapr3   r*   r   r   r   r   r     r<   r   c                   @   r7   )Cuda_atomic_casc                 C   s`   |rJ |\}}}}|j }|tvrd S |jdkr!t||tj||S |jdkr.t|||||S d S r   )r!   r   r"   r   r   r   )r)   ri   rj   r   r   r   r   r   r   r   r   r*     s   

zCuda_atomic_cas.genericN)r,   r-   r.   r   r   Zcasr3   r*   r   r   r   r   r     r<   r   c                   @   s"   e Zd ZejZeejej	gZ
dS )Cuda_nanosleepN)r,   r-   r.   r   Z	nanosleepr3   r   r   voidrO   r@   r   r   r   r   r     s    r   c                   @   s(   e Zd ZeZdd Zdd Zdd ZdS )
Dim3_attrsc                 C      t jS r9   r   rV   r)   modr   r   r   	resolve_x
     zDim3_attrs.resolve_xc                 C   r   r9   r   r   r   r   r   	resolve_y  r   zDim3_attrs.resolve_yc                 C   r   r9   r   r   r   r   r   	resolve_z  r   zDim3_attrs.resolve_zN)r,   r-   r.   r   r3   r   r   r   r   r   r   r   r     s
    r   c                   @       e Zd ZeejZdd ZdS )CudaSharedModuleTemplatec                 C   
   t tS r9   )r   r   r0   r   r   r   r   resolve_array     
z&CudaSharedModuleTemplate.resolve_arrayN)	r,   r-   r.   r   Moduler   r1   r3   r   r   r   r   r   r         r   c                   @   r   )CudaConstModuleTemplatec                 C   r   r9   )r   r   r8   r   r   r   r   resolve_array_like   r   z*CudaConstModuleTemplate.resolve_array_likeN)	r,   r-   r.   r   r   r   r;   r3   r   r   r   r   r   r     r   r   c                   @   r   )CudaLocalModuleTemplatec                 C   r   r9   )r   r   r5   r   r   r   r   r   (  r   z%CudaLocalModuleTemplate.resolve_arrayN)	r,   r-   r.   r   r   r   r6   r3   r   r   r   r   r   r   $  r   r   c                   @   s   e Zd ZeejZdd Zdd Z	dd Z
dd Zd	d
 Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd ZdS )CudaAtomicTemplatec                 C   r   r9   )r   r   Cuda_atomic_addr   r   r   r   resolve_add0  r   zCudaAtomicTemplate.resolve_addc                 C   r   r9   )r   r   Cuda_atomic_subr   r   r   r   resolve_sub3  r   zCudaAtomicTemplate.resolve_subc                 C   r   r9   )r   r   Cuda_atomic_andr   r   r   r   resolve_and_6  r   zCudaAtomicTemplate.resolve_and_c                 C   r   r9   )r   r   Cuda_atomic_orr   r   r   r   resolve_or_9  r   zCudaAtomicTemplate.resolve_or_c                 C   r   r9   )r   r   Cuda_atomic_xorr   r   r   r   resolve_xor<  r   zCudaAtomicTemplate.resolve_xorc                 C   r   r9   )r   r   Cuda_atomic_incr   r   r   r   resolve_inc?  r   zCudaAtomicTemplate.resolve_incc                 C   r   r9   )r   r   Cuda_atomic_decr   r   r   r   resolve_decB  r   zCudaAtomicTemplate.resolve_decc                 C   r   r9   )r   r   Cuda_atomic_exchr   r   r   r   resolve_exchE  r   zCudaAtomicTemplate.resolve_exchc                 C   r   r9   )r   r   Cuda_atomic_maxr   r   r   r   resolve_maxH  r   zCudaAtomicTemplate.resolve_maxc                 C   r   r9   )r   r   Cuda_atomic_minr   r   r   r   resolve_minK  r   zCudaAtomicTemplate.resolve_minc                 C   r   r9   )r   r   Cuda_atomic_nanminr   r   r   r   resolve_nanminN  r   z!CudaAtomicTemplate.resolve_nanminc                 C   r   r9   )r   r   Cuda_atomic_nanmaxr   r   r   r   resolve_nanmaxQ  r   z!CudaAtomicTemplate.resolve_nanmaxc                 C   r   r9   )r   r   r   r   r   r   r   resolve_compare_and_swapT  r   z+CudaAtomicTemplate.resolve_compare_and_swapc                 C   r   r9   )r   r   r   r   r   r   r   resolve_casW  r   zCudaAtomicTemplate.resolve_casN)r,   r-   r.   r   r   r   r   r3   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   ,  s     r   c                   @   s  e Zd ZeejZdd Zdd Z	dd Z
dd Zd	d
 Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Zd!d" Zd#d$ Zd%d& Zd'd( Zd)d* Zd+d, Zd-d. Zd/d0 Zd1d2 Z d3d4 Z!d5d6 Z"d7d8 Z#d9d: Z$d;d< Z%d=S )>CudaFp16Templatec                 C   r   r9   )r   r   	Cuda_haddr   r   r   r   resolve_hadd_  r   zCudaFp16Template.resolve_haddc                 C   r   r9   )r   r   	Cuda_hsubr   r   r   r   resolve_hsubb  r   zCudaFp16Template.resolve_hsubc                 C   r   r9   )r   r   	Cuda_hmulr   r   r   r   resolve_hmule  r   zCudaFp16Template.resolve_hmulc                 C      t S r9   )hdiv_devicer   r   r   r   resolve_hdivh  r:   zCudaFp16Template.resolve_hdivc                 C   r   r9   )r   r   	Cuda_hnegr   r   r   r   resolve_hnegk  r   zCudaFp16Template.resolve_hnegc                 C   r   r9   )r   r   	Cuda_habsr   r   r   r   resolve_habsn  r   zCudaFp16Template.resolve_habsc                 C   r   r9   )r   r   r`   r   r   r   r   resolve_hfmaq  r   zCudaFp16Template.resolve_hfmac                 C   r   r9   )hsin_devicer   r   r   r   resolve_hsint  r:   zCudaFp16Template.resolve_hsinc                 C   r   r9   )hcos_devicer   r   r   r   resolve_hcosw  r:   zCudaFp16Template.resolve_hcosc                 C   r   r9   )hlog_devicer   r   r   r   resolve_hlogz  r:   zCudaFp16Template.resolve_hlogc                 C   r   r9   )hlog10_devicer   r   r   r   resolve_hlog10}  r:   zCudaFp16Template.resolve_hlog10c                 C   r   r9   )hlog2_devicer   r   r   r   resolve_hlog2  r:   zCudaFp16Template.resolve_hlog2c                 C   r   r9   )hexp_devicer   r   r   r   resolve_hexp  r:   zCudaFp16Template.resolve_hexpc                 C   r   r9   )hexp10_devicer   r   r   r   resolve_hexp10  r:   zCudaFp16Template.resolve_hexp10c                 C   r   r9   )hexp2_devicer   r   r   r   resolve_hexp2  r:   zCudaFp16Template.resolve_hexp2c                 C   r   r9   )hfloor_devicer   r   r   r   resolve_hfloor  r:   zCudaFp16Template.resolve_hfloorc                 C   r   r9   )hceil_devicer   r   r   r   resolve_hceil  r:   zCudaFp16Template.resolve_hceilc                 C   r   r9   )hsqrt_devicer   r   r   r   resolve_hsqrt  r:   zCudaFp16Template.resolve_hsqrtc                 C   r   r9   )hrsqrt_devicer   r   r   r   resolve_hrsqrt  r:   zCudaFp16Template.resolve_hrsqrtc                 C   r   r9   )hrcp_devicer   r   r   r   resolve_hrcp  r:   zCudaFp16Template.resolve_hrcpc                 C   r   r9   )hrint_devicer   r   r   r   resolve_hrint  r:   zCudaFp16Template.resolve_hrintc                 C   r   r9   )htrunc_devicer   r   r   r   resolve_htrunc  r:   zCudaFp16Template.resolve_htruncc                 C   r   r9   )r   r   Cuda_heqr   r   r   r   resolve_heq  r   zCudaFp16Template.resolve_heqc                 C   r   r9   )r   r   Cuda_hner   r   r   r   resolve_hne  r   zCudaFp16Template.resolve_hnec                 C   r   r9   )r   r   Cuda_hger   r   r   r   resolve_hge  r   zCudaFp16Template.resolve_hgec                 C   r   r9   )r   r   Cuda_hgtr   r   r   r   resolve_hgt  r   zCudaFp16Template.resolve_hgtc                 C   r   r9   )r   r   Cuda_hler   r   r   r   resolve_hle  r   zCudaFp16Template.resolve_hlec                 C   r   r9   )r   r   Cuda_hltr   r   r   r   resolve_hlt  r   zCudaFp16Template.resolve_hltc                 C   r   r9   )r   r   	Cuda_hmaxr   r   r   r   resolve_hmax  r   zCudaFp16Template.resolve_hmaxc                 C   r   r9   )r   r   	Cuda_hminr   r   r   r   resolve_hmin  r   zCudaFp16Template.resolve_hminN)&r,   r-   r.   r   r   r   ra   r3   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r	  r  r  r  r  r  r  r  r   r   r   r   r   [  s@    r   c                   @   s   e Zd ZeeZdd Zdd Zdd Z	dd Z
d	d
 Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Zd!d" Zd#d$ Zd%d& Zd'd( Zd)d* Zd+d, Zd-d. Zd/d0 Zd1d2 Zd3d4 Z d5d6 Z!d7d8 Z"d9d: Z#d;S )<CudaModuleTemplatec                 C      t tjS r9   )r   r   r   Zcgr   r   r   r   
resolve_cg  r   zCudaModuleTemplate.resolve_cgc                 C   r   r9   r   r   r   r   r   resolve_threadIdx  r:   z$CudaModuleTemplate.resolve_threadIdxc                 C   r   r9   r   r   r   r   r   resolve_blockIdx  r:   z#CudaModuleTemplate.resolve_blockIdxc                 C   r   r9   r   r   r   r   r   resolve_blockDim  r:   z#CudaModuleTemplate.resolve_blockDimc                 C   r   r9   r   r   r   r   r   resolve_gridDim  r:   z"CudaModuleTemplate.resolve_gridDimc                 C   r   r9   r   r   r   r   r   resolve_laneid  r   z!CudaModuleTemplate.resolve_laneidc                 C   r  r9   )r   r   r   r1   r   r   r   r   resolve_shared  r   z!CudaModuleTemplate.resolve_sharedc                 C   r   r9   )r   r   rR   r   r   r   r   resolve_popc  r   zCudaModuleTemplate.resolve_popcc                 C   r   r9   )r   r   re   r   r   r   r   resolve_brev  r   zCudaModuleTemplate.resolve_brevc                 C   r   r9   )r   r   rf   r   r   r   r   resolve_clz  r   zCudaModuleTemplate.resolve_clzc                 C   r   r9   )r   r   rg   r   r   r   r   resolve_ffs  r   zCudaModuleTemplate.resolve_ffsc                 C   r   r9   )r   r   r\   r   r   r   r   resolve_fma  r   zCudaModuleTemplate.resolve_fmac                 C   r   r9   )r   r   rd   r   r   r   r   resolve_cbrt  r   zCudaModuleTemplate.resolve_cbrtc                 C   r   r9   )r   r   r>   r   r   r   r   resolve_threadfence  r   z&CudaModuleTemplate.resolve_threadfencec                 C   r   r9   )r   r   rB   r   r   r   r   resolve_threadfence_block  r   z,CudaModuleTemplate.resolve_threadfence_blockc                 C   r   r9   )r   r   rC   r   r   r   r   resolve_threadfence_system  r   z-CudaModuleTemplate.resolve_threadfence_systemc                 C   r   r9   )r   r   rD   r   r   r   r   resolve_syncwarp  r   z#CudaModuleTemplate.resolve_syncwarpc                 C   r   r9   )r   r   rF   r   r   r   r   resolve_shfl_sync_intrinsic  r   z.CudaModuleTemplate.resolve_shfl_sync_intrinsicc                 C   r   r9   )r   r   rK   r   r   r   r   resolve_vote_sync_intrinsic  r   z.CudaModuleTemplate.resolve_vote_sync_intrinsicc                 C   r   r9   )r   r   rL   r   r   r   r   resolve_match_any_sync  r   z)CudaModuleTemplate.resolve_match_any_syncc                 C   r   r9   )r   r   rM   r   r   r   r   resolve_match_all_sync  r   z)CudaModuleTemplate.resolve_match_all_syncc                 C   r   r9   )r   r   rN   r   r   r   r   resolve_activemask  r   z%CudaModuleTemplate.resolve_activemaskc                 C   r   r9   )r   r   rP   r   r   r   r   resolve_lanemask_lt   r   z&CudaModuleTemplate.resolve_lanemask_ltc                 C   r   r9   )r   r   rh   r   r   r   r   resolve_selp  r   zCudaModuleTemplate.resolve_selpc                 C   r   r9   )r   r   r   r   r   r   r   resolve_nanosleep  r   z$CudaModuleTemplate.resolve_nanosleepc                 C   r  r9   )r   r   r   r   r   r   r   r   resolve_atomic	  r   z!CudaModuleTemplate.resolve_atomicc                 C   r  r9   )r   r   r   ra   r   r   r   r   resolve_fp16  r   zCudaModuleTemplate.resolve_fp16c                 C   r  r9   )r   r   r   r;   r   r   r   r   resolve_const  r   z CudaModuleTemplate.resolve_constc                 C   r  r9   )r   r   r   r6   r   r   r   r   resolve_local  r   z CudaModuleTemplate.resolve_localN)$r,   r-   r.   r   r   r   r3   r  r  r  r  r  r  r   r!  r"  r#  r$  r%  r&  r'  r(  r)  r*  r+  r,  r-  r.  r/  r0  r1  r2  r3  r4  r5  r6  r   r   r   r   r    s>    
r  )loglog2log10)operatorZ
numba.corer   Znumba.core.typing.npydeclr   r   r   r   r   r   r	   r
   Znumba.core.typing.templatesr   r   r   r   r   r   Znumba.cuda.typesr   Znumba.core.typeconvr   Znumbar   Znumba.cuda.compilerr   registryru   Zregister_attrr}   r   r0   r5   r8   r>   rB   rC   rD   rF   rK   rL   rM   rN   rP   rR   r\   r`   rd   re   rf   rg   rh   rw   r~   r   floatr   r   r   r   r   ra   Zhaddr   addZCuda_addiaddZ	Cuda_iaddZhsubr   subZCuda_subisubZ	Cuda_isubZhmulr   mulZCuda_mulimulZ	Cuda_imulZhmaxr  Zhminr  Zhnegr   negZCuda_negZhabsr   absZCuda_absZheqr  eqhner
  neZhger  geZhgtr  gthler  leZhltr  lttruedivitruedivr   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r   r   r_   r^   rV   rO   rW   rZ   Zall_numba_typesr   Zunsigned_int_numba_typesr   r   r   maxr   minr   Znanmaxr   Znanminr   and_r   or_r   xorr   incr   decr   Zexchr   r   r   r   r   r   r   r   r   r   r  r   funcr   r   r   r   <module>   sJ   ( 	

			














.^[

