o
    0 i                     @   s  U d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
mZ d dlZd dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ e ZdaejdadZdaG dd de Z!G dd de Z"G dd de Z#d^ddZ$e% dd Z&dee' fddZ(dee' fddZ)dd Z*e% dd Z+d Z,e% d!d" Z-e% d#d$ Z.ej%d%d&d'd( Z/ej%d%d&d^d)d*Z0d+d, Z1d-d. Z2d/d0 Z3d1d2 Z4e4d3d4Z5d4a6d5d6 Z7d7d8 Z8e9e8d9Z:	;		4d_d<d=Z;		>	d`d?d@Z<dAdB Z=ej>?dCZ@dDdE ZAi aBeCeDdF< 		Gdad4ddd4dHdIdJZE	G		4dbdKdLZFG dMdN dNe ZGG dOdP dPeHZIdQdR ZJd^dSdTZKdUdV ZLdWdX ZMdaNdYdZ ZO		4	%dcd\d]ZPdS )d    N)Optional)device)function)get_rocm_path)driver)runtimenvrtc)_environment)_utilwin32)
--device-cz-dcz	-rdc=truez--relocatable-device-code=truec                   @      e Zd ZdS )NVCCExceptionN__name__
__module____qualname__ r   r   ^/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/cupy/cuda/compiler.pyr   !       r   c                   @   r   )HIPCCExceptionNr   r   r   r   r   r   %   r   r   c                   @   r   )JitifyExceptionNr   r   r   r   r   r   )   r   r   c           
   
   C   s  z>t j}tr"t }|d ur"|t j t jdd }t|}||d< tj	| ||tj
dtr/tjndd}|d ur<|| |W S  tjyj } zd||j|j|j}	|dkrZt|	|dkrbt|	t|	d }~w ty } zd	t| }	t|	|d }~ww )
NPATH Tr   )cwdenvstderruniversal_newlinescreationflagsz^`{0}` command returns non-zero exit status. 
command: {1}
return-code: {2}
stdout/stderr: 
{3}nvcchipccz>Failed to run `{0}` command. Check PATH environment variable: )osenviron_win32_get_extra_path_for_msvcpathsepgetcopydeepcopy
subprocesscheck_outputSTDOUTZCREATE_NO_WINDOWwriteCalledProcessErrorformatcmd
returncodeoutputr   r   RuntimeErrorOSErrorstr)
r0   r   backend
log_streamr   Z
extra_pathpathlogemsgr   r   r   _run_cc-   sL   

r<   c                  C   s2   t d} | r	d S t }|r|S t }|r|S d S )Ncl.exe)shutilwhich_get_cl_exe_dir_get_cl_exe_dir_fallback)cl_exeZ
cl_exe_dirr   r   r   r%   ^   s   
r%   returnc               
   C   s   z=zdd l } W n ty   Y W d S w | jt j}|D ]}tj	|d}tj
|r2|  W S qtd|  W d S  ty] } ztdt| d|  W Y d }~d S d }~ww )Nr   r=   zcl.exe could not be found in z,Failed to find cl.exe with setuptools.msvc: : )Zsetuptools.msvc	ExceptionZmsvcZEnvironmentInfoplatformmachineZVCToolsr"   r8   joinexistswarningswarntype)
setuptoolsZvctoolsr8   rB   r:   r   r   r   r@   p   s*   
r@   c               
   C   s   z&ddl m}  ddlm} || ddi}|  |j  tj	|jj
W S  tyF } ztdt| d|  W Y d }~d S d }~ww )Nr   )Distribution)	build_extnameZcupy_cl_exe_discoverz'Failed to find cl.exe with setuptools: rD   )rM   rN   Zsetuptools.command.build_extrO   Zsetup_shlib_compilerZshlib_compilerZ
initializer"   r8   dirnameccrE   rJ   rK   rL   )rN   rO   extr:   r   r   r   rA      s   
rA   c                   C   s   t d u rt a t S N)_nvrtc_versionr	   Z
getVersionr   r   r   r   _get_nvrtc_version   s   rV   c                  C   s   ddl m}  | jS )Nr   core)
cupy._corerX   ZCUPY_CACHE_KEYrW   r   r   r   _get_cupy_cache_key   s   rZ   )Z3253Z62Z72Z87c                  C   sz   t  \} }| dk rd}|S | dkr|dkrd}|S | dkr%|dk r%d}|S | dkr-|dks5| dkr9|dk r9d}|S d	}|S )
N   Z75r   80   Z86   Z90Z120)rV   )majorminornvrtc_max_compute_capabilityr   r   r   _get_max_compute_capability   s   
 rc   c                  C   s$   t  \} }tdd t| |D S )Nc                 s   s    | ]}d | V  qdS )-INr   ).0dr   r   r   	<genexpr>   s
    
z._get_extra_include_dir_opts.<locals>.<genexpr>)rV   tupler
   Z$_get_include_dir_from_conda_or_wheel)r`   ra   r   r   r   _get_extra_include_dir_opts   s   
ri   T)Zfor_each_devicec                  C   s*   t  } t j}|tv r|S t|| tdS )N)key)rc   r   Devicecompute_capability_tegra_archsminint)rb   archr   r   r   	_get_arch   s
   
rq   c                 C   s@   | d u rt  } tst| tt krd|  dfS d|  dfS )Nz	-arch=sm_cubinz-arch=compute_ptx)rq   _use_ptxro   rc   rp   r   r   r   _get_arch_for_options_for_nvrtc   s   rv   c                 C      t dd | D S )Nc                 s   s    | ]	}|t v r|V  qd S rT   
_rdc_flagsre   or   r   r   rg          z'_is_cudadevrt_needed.<locals>.<genexpr>)anyoptionsr   r   r   _is_cudadevrt_needed      r   c                  C   sv   t d urt S ddlm}  |  }|d u rtdtr|d7 }n|d }tj|s-|d7 }n|}tj|s9td|S )Nr   )get_cuda_pathzCUDA is not found.z/lib/x64/cudadevrt.libz/lib64/libcudadevrt.az/lib/libcudadevrt.az>Relocatable PTX code is requested, but cudadevrt is not found.)
_cudadevrt	cupy.cudar   r3   r$   r"   r8   isfile)r   Z	cudadevrtZcudadevrt64r   r   r   _get_cudadevrt_path   s"   

r   c                 C   rw   )Nc                 s   s    | ]	}|t vr|V  qd S rT   rx   rz   r   r   r   rg     r|   z%_remove_rdc_option.<locals>.<genexpr>)rh   r~   r   r   r   _remove_rdc_option
  r   r   c                 C   sH   t j| }|d u st|dkr|S zt|dkW S  ty#   Y dS w )Nr      F)r"   r#   r'   lenro   
ValueError)rP   defaultvalr   r   r   _get_bool_env_variable  s   r   ZCUPY_COMPILE_WITH_PTXFc              
   C   s   ddl m} tsddlm} |  ||  da| }|d |  } z|| |\}}}}W n* tyY }	 zt	t
|	|||d}
tdd}|rN|
tj tt
|
|	d }	~	ww ||ks`J |||fS )	Nr   )jitifyrW   T
r   CUPY_DUMP_CUDA_SOURCE_ON_ERRORF)r   r   #_jitify_header_source_map_populatedrY   rX   Z_init_moduleZ_add_sourcesZ_get_header_source_maprE   CompileExceptionr5   r   dumpsysr   r   )sourcer   cu_pathr   rX   Z
old_sourcerP   headersinclude_namesr:   cexr   r   r   r   _jitify_prep  s,   

r   c                 C   s   t j| dd S )NF)usedforsecurity)hashlibsha1	hexdigest)valuer   r   r   _hash_hexdigestA  r   r       r   kern.cuc              	      s    fdd}|sHt  2}	tj|	|}
t|
d}||  W d    n1 s)w   Y  || ||
|||W  d    S 1 sAw   Y  d S |sLdn|}
|| ||
|||S )Nc              
      s   t jst \}}||f7 }nd}|rt| ||\}}}	nd }}	t \}
}|
dkr.|d7 }t| |||	||d}z|||\}}W ||fS  ty_ } ztdd}|rZ|	t
j  d }~ww )Nrs   r   r_   )z#--device-as-default-execution-space)name_expressionsmethodr   F)r   is_hiprv   r   rV   _NVRTCProgramcompiler   r   r   r   r   )r   r   r   r   r7   r   Zarch_optr   r   r   major_versionminor_versionprogZcompiled_objmappingr:   r   ru   r   r   _compileK  s6   

z%compile_using_nvrtc.<locals>._compilewr   )tempfileTemporaryDirectoryr"   r8   rH   openr-   )r   r   rp   filenamer   r7   cache_in_memoryr   r   root_dirr   cu_filer   ru   r   compile_using_nvrtcH  s    
$	
r   rr   c                 C   s.  ddl m} |st }|dvrtd|dkr|rJ dj|d}| }	|	 }
|
| t Z}|dd }t	j
||}d	| }d
||f }t|d}||  W d    n1 s`w   Y  |s|
d|  |
t|7 }
|
| z	t|
|d| W n ty } ztt|| ||d}tdd}|r|tj |d }~ww |
 }|d |d }|
t|d|f 7 }
|
| z	t|
|d| W n% ty } ztt|| ||d}tdd}|r|tj |d }~ww t|}|d|d|d f7 }|t| }
z	t|
|d| W n ty) } ztt|dd|d}|d }~ww |dkrQt|d}| W  d    W  d    S 1 sKw   Y  n+|dkrxt|d}| W  d    W  d    S 1 srw   Y  nJ |W d    d S W d    d S 1 sw   Y  d S )Nr   )get_nvcc_path)rr   rs   z,Invalid code_type %s. Should be cubin or ptxrs   z'-gencode=arch=compute_{cc},code=sm_{cc})rR   .z%s.cuz%s.%sr   z--%sr    r   Fz--cubinz.o-oz--device-link.cubinr   rbrr   )r   r   rq   r   r/   splitappendr   r   r"   r8   rH   r   r-   listr<   r   r   r5   r   r   r   r   r(   r   read)r   r   rp   r   	code_typeseparate_compilationr7   r   Zarch_strZ_nvccr0   r   Z
first_partr8   r   result_pathr   r:   r   r   Zcmd_partialobjZptx_fileZbin_filer   r   r   compile_using_nvccz  s   




;
>>$r   c           	   
   C   s   |dkr4|d |f7 }t| }z	||\}}W nN ty3 } ztdd}|r.|tj  d }~ww |dkrbz|d }t| ||ddd	}W n  tya } ztdd}|r\|tj  d }~ww t	d
| t
|tsoJ ddd |  D S )Nr	   z-arch=compute_{}r   Fr    )r   zpreprocess.ptxzpreprocess.curs   )r   Invalid backend %sr   c                 s   s    | ]
}| d r|V  qdS )z//N
startswith)re   xr   r   r   rg     s    

z_preprocess.<locals>.<genexpr>)r/   r   r   r   r   r   r   r   r   r   
isinstancebytesrH   decode
splitlines)	r   r   rp   r6   r   result_r:   r   r   r   r   _preprocess  sD   



r   z~/.cupy/kernel_cachec                   C   s   t jdtS )NZCUPY_CACHE_DIR)r"   r#   r'   _default_cache_dirr   r   r   r   get_cache_dir  s   r   _empty_file_preprocess_cacher	   )enable_cooperative_groupsr   r7   r   c                C   s   |r	t jr	td|d ur|dkrttddo|dk}
t jr3|dkr%dnd}t| ||||||||
	S t| |||||||||
|	S )Nz+Cooperative groups is not supported in HIP.r	   ZCUPY_CACHE_IN_MEMORYFhiprtcr!   )r   r   r   NotImplementedErrorr   _compile_with_cache_hip_compile_with_cache_cuda)r   r   rp   	cache_dirextra_sourcer6   r   r   r7   r   r   r   r   r   _compile_module_with_cache  s*   
r   c               	   C   s*  |d u rt  }|d u rt }|d7 }|r|d7 }tddr!|d7 }d|v }|
r.|s.|d7 }n|r4|
s4d}
|
r>|d	kr>td
|t 7 }||t |ft| }t|d }|d u rct	d|||}|t|< d||| |t
 f }|d}t|d }t }|	stj|stj|dd tj||}tj|r|st|d}| }W d    n1 sw   Y  t|tkr|d t }|td  }t|d}||kr|| |S n	 |d	kr|	rdn|d }t| ||||||	|
\}}t|rt }||d t }|| |  }n|}|!| n|dkr.t|}t"| |||d d||d}ntd| |	st|d}t#j$|dd}|%| |%| |j&}W d    n	1 s^w   Y  t'(|| tddrt|d d}|%|  W d    n	1 sw   Y  n	 || |S )N)z	-ftz=true)r   ZCUPY_CUDA_COMPILE_WITH_DEBUGF)z--device-debugz--generate-line-info-DCUPY_USE_JITIFY)r   Tr	   zjitify only works with NVRTCr   z%s %s %s %s %sutf-8r   exist_okr   ascii.cuzcupy.ptxr    rr   )r   r   r7   r   dirdeleteCUPY_CACHE_SAVE_CUDA_SOURCEr   ))r   rq   r   r   ri   rV   rv   r   r'   r   rZ   encoder   r   Moduler"   r8   isdirmakedirsrH   rI   r   r   r   _hash_lengthloadr   r   Z	LinkStateZadd_ptr_datar   Zadd_ptr_fileZcomplete_set_mappingr   r   NamedTemporaryFiler-   rP   r>   move) r   r   rp   r   r   r6   r   r   r7   r   r   Zis_jitify_requestedr   basekey_srcrP   modr8   filedatahashrr   Z
cubin_hashZcu_namers   r   Zlsr   Zrdctf	temp_pathfr   r   r   r     s   














r   c                       sF   e Zd Zd fdd	Zdd Zdd Zdd	 Zd
d Zdd Z  Z	S )r   r	   c                    s0   || _ || _|| _|| _|| _tt|   d S rT   )_msgr   rP   r   r6   superr   __init__)selfr;   r   rP   r   r6   	__class__r   r   r     s   zCompileException.__init__c                 C   s    t | | j| j| j| j| jffS rT   )rL   r   r   rP   r   r6   r   r   r   r   
__reduce__  s   zCompileException.__reduce__c                 C   s   t | S rT   )r5   r   r   r   r   __repr__     zCompileException.__repr__c                 C   s   |   S rT   )get_messager   r   r   r   __str__  r   zCompileException.__str__c                 C   s   | j S rT   )r   r   r   r   r   r     s   zCompileException.get_messagec                 C   s   | j d}tttt|d }d|}|d| j	
  |d|  |d |d| j |dd	| j |d
 t|D ]\}}|||d |  d  qO|d |  d S )Nr   r   z
{{:0{}d}} z{} zcompilation error: {}
z-----
z	Name: {}
zOptions: {}
 zCUDA source:
)r   r   ro   mathfloorlog10r   r/   r-   r6   upperrP   rH   r   	enumeraterstripflush)r   r   linesdigitsZ	linum_fmtiliner   r   r   r     s   


"
zCompileException.dumpr   )
r   r   r   r   r   r   r  r   r   __classcell__r   r   r   r   r     s    r   c                   @   s2   e Zd Z		dddZejfddZdd	d
ZdS )r   default_programr   Nrs   c                 C   s\   d | _ t|tr|d}t|tr|d}|| _|| _t||||| _ || _|| _	d S )NzUTF-8)
ptrr   r   r   srcrP   r	   ZcreateProgramr   r   )r   r  rP   r   r   r   r   r   r   r   r     s   




z_NVRTCProgram.__init__c                 C   s$   | rd S | j rt| j  d S d S rT   )r  r	   ZdestroyProgram)r   is_shutting_downr   r   r   __del__  s
   z_NVRTCProgram.__del__c              	   C   s   zZ| j r| j D ]	}t| j| qt| j| d }| j r.i }| j D ]}t| j|||< q"|d ur;|t| j | jdkrIt	| j|fW S | jdkrWt
| j|fW S td tjyx   t| j}t|| j| j|tjsuddw )Nrr   rs   zUnknown NVRTC compile methodr	   r   )r   r	   ZaddNameExpressionr  ZcompileProgramZgetLoweredNamer-   ZgetProgramLogr   ZgetCUBINZgetPTXr3   Z
NVRTCErrorr   r  rP   r   r   )r   r   r7   Zkerr   r9   r   r   r   r     s2   



z_NVRTCProgram.compile)r  r   r   Nrs   )r   N)r   r   r   r   r   r  r  r   r   r   r   r   r     s    
r   c                 C   s   t d| d uS )Nz^[a-zA-Z_][a-zA-Z_0-9]*$)rematch)rP   r   r   r   is_valid_kernel_name  s   r  c                 C   sX  ddgt | }t }tj|d}|d }|d }t|d}	|	|  W d    n1 s1w   Y  ||d|g7 }z	t||d|}
W n% t	yk } zt
t|| ||d}tdd	}|re|tj |d }~ww tj|szt	d
||
t|d}	|	 W  d    W  d    S 1 sw   Y  W d    d S 1 sw   Y  d S )Nr!   z--gencokern.cpp.hsacor   r   r   FzP`hipcc` command does not generate output file. 
command: {0}
stdout/stderr: 
{1}r   )r   r   r   r"   r8   rH   r   r-   r<   r   r   r5   r   r   r   r   r   r/   r   )r   r   rp   r7   r0   r   r8   Zin_pathZout_pathr   r2   r:   r   r   r   r   r   compile_using_hipcc  sD   

"r  c              	   C   s   ddgt | }t F}tj|d}d| }t|d}||  W d    n1 s-w   Y  || t	||d}t
|tsDJ tdd|W  d    S 1 sUw   Y  d S )Nr!   z--preprocessr  z%s.cppr   z	(?m)^#.*$r   )r   r   r   r"   r8   rH   r   r-   r   r<   r   r5   r  sub)r   r   r0   r   r8   r   r   Zpp_srcr   r   r   _preprocess_hipcc  s   

$r  c              
   C   st   t dkrd}nd}t|}z	||\}}W n ty0 } ztdd}|r+|tj  d }~ww t|t	s8J |S )Nthz}
        // hiprtc segfaults if the input code is empty
        __global__ void _cupy_preprocess_dummy_kernel_() { }
        z
        // hiprtc segfaults if the input code is empty
        #include <hip/hip_runtime.h>
        __global__ void _cupy_preprocess_dummy_kernel_() { }
        r   F)
_cuda_hip_versionr   r   r   r   r   r   r   r   r   )r   r   coder   r   r   r:   r   r   r   r   _preprocess_hiprtc-  s"   r   c                 C   s   |sd|  S t dkr| S t dkrd|  S td u r/|d ur/|d}dd |D }d| a}| d} dd | D } dt d|  } | S )	Nz#include <hip/hip_runtime.h>
r  i  r   c                 S   s$   g | ]}| d s| ds|qS )#includez#pragma oncer   re   r  r   r   r   
<listcomp>\  s
    
z*_convert_to_hip_source.<locals>.<listcomp>c                 S   s   g | ]	}| d s|qS )r!  r   r"  r   r   r   r#  b  s    z7#include <hip/hip_runtime.h>
#include <hip/hip_fp16.h>
)r  _hip_extra_sourcer   rH   )r   r   	is_hiprtcr   r   r   _convert_to_hip_sourceL  s&   

r&  r   c
                 C   s  t |rtd|d7 }t }
|
dkr"|
dk r"|dt  d f7 }|d u r)t }|d u r2t j}|	r=t	| ||dkd} ||t
 |f}t|d }|d u ra|dkrXtd	|}ntd	|}|t|< d
||| |f }|d}t|d }t }|stj|stj|dd tj||}tj|r|st|d}| }W d    n1 sw   Y  t|tkr|d t }|td  }t|d}||kr|| |S n	 |dkrt| |||d |||\}}|| nt | |||}|sMt|d}t!j"|dd}|#| |#| |j$}W d    n	1 sw   Y  t%&|| t'ddrLt|d d}|#|  W d    n	1 sGw   Y  n	 || |S )Nz,separate compilation is not supported in HIP)z-fcuda-flush-denormals-to-zeroifi ird   z/llvm/lib/clang/13.0.0/include/r   )r%  r   z%s %s %s %sr   r  Tr   r   r   r   Fr   r   r  r   )(r   r   r   get_build_versionr   r   r   rk   rl   r&  rV   r   r'   r   r  r   r   r   r   r"   r8   r   r   rH   rI   r   r   r   r   r   r   r   r  r   r   r-   rP   r>   r   r   )r   r   rp   r   r   r6   r   r7   r   Zuse_converterZrocm_build_versionr   r   r   rP   r   r8   r   r   Z
hash_valuebinaryZbinary_hashr   r   r   r   r   r   r   j  s   	







r   rT   )r   Nr   NNFF)r   Nr   rr   FN)r   NNNr	   )Nr	   FNNFF)r   NNFT)Qr(   r   r  r"   rF   r  r>   r*   r   r   typingr   rJ   r   r   r   r   Zcupy_backends.cuda.apir   r   Zcupy_backends.cuda.libsr	   Zcupyr
   r   r'  r  rU   r   r$   ry   r   rE   r   r   r   r<   memoizer%   r5   r@   rA   rV   rZ   rm   rc   ri   rq   rv   r   r   r   r   rt   r   r   r   r   r   r   r   r   r8   
expanduserr   r   r   dict__annotations__r   r   r   objectr   r  r  r  r   r$  r&  r   r   r   r   r   <module>   s   
 
1









%
2
X# 
z'3
(