B
    a                 @   sP  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlm	Z	 d dlm
Z
 d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ eejZd
ZdZdZdZdZdZdZdZeeeeefZeeefZeeefZeeeZ e j!ej"#   e j!ej$#   e j!ej%#   e j!ej&#   ee Z'e(ee( dddZ)e(ee( dddZ*e(ee( dddZ+e	e, e-dddZ.e(e-dddZ/e(e
e( dd d!Z0e	e, ee(d"f ee(d"f d#d$d%Z1e	e, ee(d"f dd&d'Z2e(ee(d"f dd(d)Z3e4d*ej5ej6B Z7e4d+Z8e(e(d,d-d.Z9e(ee( d/d0d1Z:dS )2    N)IO)List)Optional)Set)Tuple)
extensions)interpreters)licenses	directorysymlinksocketfile
executableznon-executabletextbinary)pathreturnc          	   C   s>  yt | }W n& ttfk
r4   t|  dY nX |j}t|rLthS t|r\t	hS t
|rlthS th}t | t j}|r|t n
|t tt j| }t|dkr|| n*|rt| }t|dkr|t|d  t|@ st| r|t n
|t t|@ s(t|t|@ s:t||S )Nz does not exist.r   )oslstatOSError
ValueErrorst_modestatS_ISDIR	DIRECTORYS_ISLNKSYMLINKS_ISSOCKSOCKETFILEaccessX_OKadd
EXECUTABLENON_EXECUTABLEtags_from_filenamer   basenamelenupdateparse_shebang_from_filetags_from_interpreterENCODING_TAGSfile_is_textTEXTBINARYAssertionError	MODE_TAGS)r   srmodetagsr   tshebang r6   5/tmp/pip-unpacked-wheel-jv6j9gop/identify/identify.pytags_from_path)   s<    






r8   c             C   s   t j| \}}t j|\}}t }x4|g|d D ] }|tjkr8|tj|  P q8W t|dkr|dd  	 }|tj
kr|tj
|  n|tjkr|tj|  |S )N.r      )r   r   splitsplitextsetr   NAMESr(   r'   lower
EXTENSIONSEXTENSIONS_NEED_BINARY_CHECK)r   _filenameextretpartr6   r6   r7   r%   W   s    


r%   )interpreterr   c             C   sD   |  d\}}} x,| r<| tjkr*tj|  S |  d\} }}qW t S )N/r9   )
rpartitionr   INTERPRETERSr=   )rG   rB   r6   r6   r7   r*   m   s    

r*   )bytesior   c          	   C   sL   t ddddddddgt td	d
 t tdd }t| dd| S )zReturn whether the first KB of contents seems to be binary.

    This is roughly based on libmagic's binary/text detection:
    https://github.com/file/file/blob/df74b09b9027676088c797528edcaae5a9ce9ad0/src/encoding.c#L203-L228
          	   
                            i   N)	bytearrayrangeboolread	translate)rK   Z
text_charsr6   r6   r7   is_textz   s    $r]   c          	   C   s8   t j| st|  dt| d
}t|S Q R X d S )Nz does not exist.rb)r   r   lexistsr   openr]   )r   fr6   r6   r7   r,      s    r,   )liner   c             C   s(   y
t | S  tk
r"   |  S X d S )N)shlexr;   r   )rb   r6   r6   r7   _shebang_split   s    
rd   .)rK   cmdr   c             C   s   x|  ddkr|  }y|d}W n tk
r:   |S X x|D ]}|tkrB|S qBW tt| }x4t|d d D ] \}}|dkrqx||d  f}qxW qW |S )N   s   #!zUTF-8z-ir:   )	r[   readlinedecodeUnicodeDecodeError	printabletuplerd   strip	enumerate)rK   re   Znext_line_bZ	next_linecZline_tokensitokenr6   r6   r7   _parse_nix_shebang   s    
rr   c             C   s   |  ddkrdS |  }y|d}W n tk
r<   dS X x|D ]}|tkrDdS qDW tt| }|r|d dkr|dd }|d	krt| |S |S )
z8Parse the shebang from a file opened for reading binary.rf   s   #!r6   zUTF-8r   z/usr/bin/envr:   N)z	nix-shell)	r[   rh   ri   rj   rk   rl   rd   rm   rr   )rK   Zfirst_line_b
first_linero   re   r6   r6   r7   parse_shebang   s     

rt   c          
   C   s   t j| st|  dt | t js,dS yt| d
}t|S Q R X W n4 tk
r~ } z|j	t	j
krldS  W dd}~X Y nX dS )z$Parse the shebang given a file path.z does not exist.r6   r^   N)r   r   r_   r   r    r!   r`   rt   r   errnoEINVAL)r   ra   er6   r6   r7   r)      s    r)   z^\s*(Copyright|\(C\)) .*$z\s+)sr   c             C   s    t d| } td| } |  S )N  )COPYRIGHT_REsubWS_RErm   )rx   r6   r6   r7   _norm_license   s    r~   )rC   r   c          	   C   s   ddl }t| dd}| }W dQ R X t|}tj}d}xhtjD ]^\}}t|}	||	kr^|S |rtt	|t	|	 t	| dkrqB|
||	}
|
|k rB|
}|}qBW |r|t	| dk r|S dS dS )a  Return the spdx id for the license contained in `filename`.  If no
    license is detected, returns `None`.

    spdx: https://spdx.org/licenses/
    licenses from choosealicense.com: https://github.com/choosealicense.com

    Approximate algorithm:

    1. strip copyright line
    2. normalize whitespace (replace all whitespace with a single space)
    3. check exact text match with existing licenses
    4. failing that use edit distance
    r   NzUTF-8)encodingry   g?)editdistance_sr`   r[   r~   sysmaxsizer	   ZLICENSESabsr'   Zdistance)rC   r   ra   contentsZnormZmin_edit_distZmin_edit_dist_spdxZspdxr   Znorm_licenseZ	edit_distr6   r6   r7   
license_id   s&    $r   );ru   os.pathr   rerc   r   stringr   typingr   r   r   r   r   identifyr   r   Zidentify.vendorr	   	frozensetrk   r   r   r   r   r#   r$   r-   r.   Z	TYPE_TAGSr0   r+   Z	_ALL_TAGSr(   r@   valuesrA   r>   rJ   ZALL_TAGSstrr8   r%   r*   bytesrZ   r]   r,   rd   rr   rt   r)   compileI	MULTILINEr{   r}   r~   r   r6   r6   r6   r7   <module>   s^   

.

