B
    qnd7*                 @   s  d dl Z d dlmZ d dlmZmZmZmZ G dd dZG dd de j	Z
ee
jZG dd	 d	e j	ZG d
d dZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd dZG dd dZd!eee  eeddd ZdS )"    N)chain)ListSetTupleTypec               @   sF   e Zd ZeeeedddZdd Zd dddZd	d
 Zdd Z	dS )Entity)sent_idstartendtagc             C   s   || _ || _|| _|| _d S )N)r   r	   r
   r   )selfr   r	   r
   r    r   2/tmp/pip-unpacked-wheel-a5vokgmp/seqeval/scheme.py__init__   s    zEntity.__init__c             C   s   d | j| j| j| jS )Nz({}, {}, {}, {}))formatr   r   r	   r
   )r   r   r   r   __repr__   s    zEntity.__repr__)otherc             C   s   |   |  kS )N)to_tuple)r   r   r   r   r   __eq__   s    zEntity.__eq__c             C   s   t |  S )N)hashr   )r   r   r   r   __hash__   s    zEntity.__hash__c             C   s   | j | j| j| jfS )N)r   r   r	   r
   )r   r   r   r   r      s    zEntity.to_tupleN)
__name__
__module____qualname__intstrr   r   r   r   r   r   r   r   r   r      s
   r   c               @   s`   e Zd Ze Ze Ze Ze Ze Z	e Z
e ZeeB eB eB e	B e
B eB ZdS )PrefixN)r   r   r   enumautoIOBESULANYr   r   r   r   r      s   r   c               @   s$   e Zd Ze Ze ZeeB ZdS )TagN)r   r   r   r   r   SAMEDIFFr&   r   r   r   r   r'   )   s   r'   c               @   s   e Zd ZdZdZdZdZdeeedddZ	dd Z
d	d
 Zd dddZd dddZd dddZd edddZd eeeeef  dddZdS )TokenNF-)tokensuffix	delimiterc             C   sT   || _ |rt|d  n
t|d  | _|r4|d d n
|dd  }||pLd| _d S )Nr      _)r,   Prefixesprefixstripr   )r   r,   r-   r.   r   r   r   r   r   5   s    zToken.__init__c             C   s   | j S )N)r,   )r   r   r   r   r   ;   s    zToken.__repr__c             C   s8   | j | jkr4t| jdd}d}t|| j|dS )z+Check whether the prefix is allowed or not.zPrefix. z5Invalid token is found: {}. Allowed prefixes are: {}.T)r3   allowed_prefixr   replace
ValueErrorr   r,   )r   Zallowed_prefixesmessager   r   r   is_valid>   s
    zToken.is_valid)prevc             C   s   |  || jS )z6Check whether the current token is the start of chunk.)check_patternsstart_patterns)r   r;   r   r   r   is_startF   s    zToken.is_startc             C   s   |  || jS )z3Check whether the current token is inside of chunk.)r<   inside_patterns)r   r;   r   r   r   	is_insideJ   s    zToken.is_insidec             C   s   |  || jS )z5Check whether the previous token is the end of chunk.)r<   end_patterns)r   r;   r   r   r   is_endN   s    zToken.is_end)r;   condc             C   sF   |t jkrdS |j| jkr(|t jkr(dS |j| jkrB|t jkrBdS dS )z)Check whether the tag pattern is matched.TF)r'   r&   r   r(   r)   )r   r;   rC   r   r   r   	check_tagR   s    
zToken.check_tag)r;   patternsc             C   s<   x6|D ].\}}}|j |kr| j |kr| ||rdS qW dS )z.Check whether the prefix patterns are matched.TF)r3   rD   )r   r;   rE   Zprev_prefixZcurrent_prefixZtag_condr   r   r   r<   \   s     zToken.check_patterns)Fr+   )r   r   r   r6   r=   r?   rA   r   boolr   r   r:   r>   r@   rB   r'   rD   r   r   r   r<   r   r   r   r   r*   /   s   
r*   c               @   s   e Zd ZejejB ejB Zejejej	fejejej
fejejej	fejejejfejejejfhZejejejfejejejfhZejejej
fejejej	fejejej	fejejej	fejejej
fejejejfhZdS )IOB1N)r   r   r   r   r   r    r!   r6   r'   r&   r)   r(   r=   r?   rA   r   r   r   r   rG   d   s   rG   c               @   s   e Zd ZejejB ejB Zejejej	fejejej
fejejej	fejejejfhZejejejfejejejfhZejejej
fejejej	fejejej
fejejejfejejejfhZdS )IOE1N)r   r   r   r   r   r    r"   r6   r'   r&   r)   r(   r=   r?   rA   r   r   r   r   rH   {   s   rH   c               @   s   e Zd ZejejB ejB Zejeje	jfhZ
ejeje	jfejeje	jfhZejeje	jfejeje	jfejeje	jfejeje	jfejeje	jfejeje	jfhZdS )IOB2N)r   r   r   r   r   r    r!   r6   r&   r'   r=   r(   r?   r)   rA   r   r   r   r   rI      s   rI   c               @   s   e Zd ZejejB ejB Zejejej	fejejej	fejejej	fejejej	fejejej
fejejej
fhZejejejfejejejfhZejej	ej	fhZdS )IOE2N)r   r   r   r   r   r    r"   r6   r'   r&   r)   r=   r(   r?   rA   r   r   r   r   rJ      s   rJ   c               @   s   e Zd ZejejB ejB ejB ejB Z	ej
ejej
fej
ejej
fhZejejejfejejejfejejejfejejejfhZejej
ej
fejej
ej
fhZdS )IOBESN)r   r   r   r   r   r    r!   r"   r#   r6   r&   r'   r=   r(   r?   rA   r   r   r   r   rK      s   rK   c               @   s   e Zd ZejejB ejB ejB ejB Z	ej
ejej
fej
ejej
fhZejejejfejejejfejejejfejejejfhZejej
ej
fejej
ej
fhZdS )BILOUN)r   r   r   r   r!   r   r%   r    r$   r6   r&   r'   r=   r(   r?   rA   r   r   r   r   rL      s   rL   c               @   sV   e Zd Zdee ee eeedddZ	e
dd Zeed	d
dZedddZdS )TokensFr+   N)tokensschemer-   r.   r   c                sB   d d| _  fdd|D | _| j| j g | _|| _d S )Nr    )r-   r.   c                s   g | ]}| d qS ))r-   r.   r   ).0r,   )r.   rO   r-   r   r   
<listcomp>   s    z#Tokens.__init__.<locals>.<listcomp>)outside_tokenrN   extended_tokensr   )r   rN   rO   r-   r.   r   r   )r.   rO   r-   r   r      s    zTokens.__init__c             C   s   d}g }| j }x|t| jk r| j| }|  ||rz| j|d |d}| |rtt| j|||j	d}|
| |}n|d7 }| j|d  }qW |S )zExtract entities from tokens.

        Returns:
            list: list of Entity.

        Example:
            >>> tokens = Tokens(['B-PER', 'I-PER', 'O', 'B-LOC'], IOB2)
            >>> tokens.entities
            [('PER', 0, 2), ('LOC', 3, 4)]
        r   r0   )r	   r;   )r   r	   r
   r   )rR   lenrS   r:   r>   _forward_is_endr   r   r   append)r   ientitiesr;   r,   r
   entityr   r   r   rY      s    



zTokens.entities)r	   r;   c             C   sD   x4t | j|d  |D ]\}}||r.|}q|S qW t| jd S )Nr0   )	enumeraterS   r@   rT   rN   )r   r	   r;   rX   r,   r   r   r   rU     s
    
zTokens._forward)rX   c             C   s"   | j | }| j |d  }||S )Nr0   )rS   rB   )r   rX   r,   r;   r   r   r   rV   	  s    
zTokens._is_end)Fr+   N)r   r   r   r   r   r   r*   rF   r   r   propertyrY   rU   rV   r   r   r   r   rM      s    rM   c               @   sH   e Zd Zdeee  ee eedddZedddZ	e
d	d
 ZdS )EntitiesFr+   )	sequencesrO   r-   r.   c                s     fddt |D | _d S )Nc          	      s$   g | ]\}}t | |d jqS ))rO   r-   r.   r   )rM   rY   )rP   r   seq)r.   rO   r-   r   r   rQ     s   z%Entities.__init__.<locals>.<listcomp>)r[   rY   )r   r^   rO   r-   r.   r   )r.   rO   r-   r   r     s    zEntities.__init__)tag_namec                s    fddt | j D }|S )Nc                s   h | ]}|j  kr|qS r   )r   )rP   rZ   )r`   r   r   	<setcomp>  s    z"Entities.filter.<locals>.<setcomp>)r   rY   )r   r`   rY   r   )r`   r   filter  s    zEntities.filterc             C   s   dd t | j D }|S )Nc             S   s   h | ]
}|j qS r   )r   )rP   rZ   r   r   r   ra     s   z'Entities.unique_tags.<locals>.<setcomp>)r   rY   )r   tagsr   r   r   unique_tags  s    zEntities.unique_tagsN)Fr+   )r   r   r   r   r   r   r*   rF   r   rb   r\   rd   r   r   r   r   r]     s   "r]   Fr+   )r^   r-   r.   c          
   C   s  t  }d}x\| D ]T}xN|D ]F}yt|||d}||j W q tk
r^   t||Y qX qW qW tjtj	tj
htjtj
htj
tj	htj
hg}tjtj	tjhtjtjhtjtj	htjhg}tjtj	tj
tjtjhtjtj
tjtjhtjtj	tj
tjhtj	tj
tjtjhtjtj
tjhtj
tjtjhtj	tj
tjhtj
tjhtjhg	}	tjtj	tj
tjtjhtjtj
tjtjhtjtj	tj
tjhtj	tj
tjtjhtjtj
tjhtj
tjtjhtj	tj
tjhtj
tjhtjhg	}
||krtS ||krtS ||	krtS ||
krtS t||dS )zuDetects scheme automatically.

    auto_detect supports the following schemes:
    - IOB2
    - IOE2
    - IOBES
    z This scheme is not supported: {})r-   r.   N)setr*   addr3   KeyErrorr8   r   r   r   r    r!   r"   r#   r%   r$   rI   rJ   rK   rL   )r^   r-   r.   prefixeserror_messagerN   r,   Zallowed_iob2_prefixesZallowed_ioe2_prefixesZallowed_iobes_prefixesZallowed_bilou_prefixesr   r   r   auto_detect#  sX    















rj   )Fr+   )r   	itertoolsr   typingr   r   r   r   r   Flagr   dict__members__r2   r'   r*   rG   rH   rI   rJ   rK   rL   rM   r]   r   rF   rj   r   r   r   r   <module>   s    
54