o
    W+ i                     @   sL   d dl mZmZ d dlZd dlZd dlmZ ddlm	Z	 G dd de	Z
dS )    )AnyDictN)ModeKeys   )OfaBasePreprocessorc                       s   e Zd ZdZejf fdd	Zdeee	f deee	f fddZ
deee	f deee	f fdd	Zdeee	f deee	f fd
dZ  ZS )OfaSudokuPreprocessorz+
    OFA preprocessor for sudoku tasks
    c                    sL  t t| j|||g|R i | | jjdd| _| jdd| _| jdd| _| jrg | _	g | _
tdD ]-}tdD ]&}| j	|d  | j
|d  |d	krW|d	ksc| j	d
 | j
d
 q=q7t| j
| _
t| j	| _	t| | j}t| j
|g}	t| j	|g}
t| j|	| jg| _
t| j|
| jg| _	dS dS )zpreprocess the data

        Args:
            cfg(modelscope.utils.config.ConfigDict) : model config
            model_dir (str): model path,
            mode: preprocessor mode (model mode)
        promptz solve the sudoku .seg_embeddingFmax_struct_length   	   r      r   N)superr   __init__cfgmodelgetinstruction_textr	   r
   input_puzzle_rowinput_puzzle_colrangeappendtorchZtensorZ
zeros_liketokenize_textcatbos_itemZeos_item)selfr   Z	model_dirmodeargskwargsidxZjdxZinstruct_segr   r   	__class__ o/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/modelscope/preprocessors/ofa/sudoku.pyr      sH   


zOfaSudokuPreprocessor.__init__datareturnc                 C   s    | j tjkr| |S | |S )N)r   r   ZTRAIN_build_train_sample_build_infer_sample)r   r%   r#   r#   r$   __call__:   s   

zOfaSudokuPreprocessor.__call__c                 C   sl   |  |}|d }|   }d|d| j }| j|dd|d< t| j	|d dd g|d< |S )	z
        build sample for training tasks.

        step 1. execute the `_build_infer_sample` function to get a batch sample
            for inference.
        step 2. process the label data for training.
        label NF)Zadd_bostargetZprev_output_tokens)
r(   lowerstripsplitjoinZmax_tgt_lengthr   r   r   r   )r   r%   sampler,   Ztarget_token_listr#   r#   r$   r'   @   s   
z)OfaSudokuPreprocessor._build_train_samplec                 C   s   d| j v r	d|v sJ d|| j d  }d|   d| j }| || j }|d| j| j  }d|d}| j	rH| j
|d< | j|d< d	| j v r`| j d	 |v r`d
|| j d	  |d< |S )a-  
        build sample for inference tasks.

        step 1. Get the input random masked sudoku text input, which shold be
            generated like below pseudo code.
            >>> sudo = np.random.randint(1, 9, size=(9, 9)) # a pseudo sudoku
            >>> sudo_text = " | ".join(" : ".join(str(c) for c in row) \
            >>>             for row in sudo)
        step 2. Limit the length, tokenize the input text and add the bos token
            to the front of the input as source input.
        step 3. Add a pseodo ids for every input.
        textz;there must be `text` column in task key map and source datar+   Ng        )idsourceZseg_row_tokensZseg_col_tokensZsolutionz {}r*   )Z
column_mapr1   r.   r/   r0   r
   r   r   Zmax_src_lengthr	   r   r   format)r   r%   r3   Zsrc_itemr2   r#   r#   r$   r(   Q   s$    


z)OfaSudokuPreprocessor._build_infer_sample)__name__
__module____qualname____doc__r   Z	INFERENCEr   r   strr   r)   r'   r(   __classcell__r#   r#   r!   r$   r      s    "*"*r   )typingr   r   numpynpr   Zmodelscope.utils.constantr   baser   r   r#   r#   r#   r$   <module>   s   