o
    1 i)                     @   s   d dl mZmZ d dlZd dlmZ d dlmZ d dl	m
Z
mZ d dlmZ d dlmZ d dlmZmZ e \ZZZeG d	d
 d
eZdS )    )DictListN)normc_initializer)	TFModelV2)get_activation_fnget_filter_config)OldAPIStack)try_import_tf)ModelConfigDict
TensorTypec                
       sz   e Zd ZdZdejjdejjdedede	f
 fddZ
d	ee	ef d
ee dedeee ffddZdefddZ  ZS )VisionNetworka.  Generic vision network implemented in ModelV2 API.

    An additional post-conv fully connected stack can be added and configured
    via the config keys:
    `post_fcnet_hiddens`: Dense layer sizes after the Conv2D stack.
    `post_fcnet_activation`: Activation function to use for this FC stack.
    	obs_spaceaction_spacenum_outputsmodel_confignamec                    s  | dst|j|d< tt| ||||| t| j ddd}| jd }t|dks1J d| dg }t| ddd}	| j d	}
| j d
}|j}d| _	t
jjj|dd}|}d| _t|d d dD ]%\}\}}}t
jjj||t|ttfr|n||f|ddd|d|}qi|d \}}}|
r|rt
jjj|r|n||t|ttfr|n||f|dddd|}|d d |r|gng  }|}t|D ]\}}|}t
jjj|d||	tdd|}qnt
jjj||t|ttfr|n||f|dddt|d|}|r|rTt
jjj|d ddg|	dddd| }}t|dd  |g D ]&\}}|}t
jjj|d|d |t|d k rH|	nd tdd|}q,n|}t
jjj|ddgd dddd| }}|jd dksy|jd dkrtd| jd | jt|jn1d| _t
jjjdd|}t|D ]\}}t
jjj|d||	tdd|}q|}|jd | _|}|r| jst
jjdd |}t
jjjdd d td!d|}nw|}t|d d dD ]'\}\}}}t
jjj||t|ttfr|n||f|ddd"|d|}q|d \}}}t
jjj||t|ttfr)|n||f|ddd"t|d|}t
jjjdddgd ddd#d|}t
jjd$d |}t
j|||g| _d S )%NZconv_filtersZconv_activationtf)Z	frameworkr   z0Must provide at least 1 entry in `conv_filters`!post_fcnet_hiddenspost_fcnet_activationno_final_linearvf_share_layersZchannels_lastZobservations)shaper   F   Zsamezconv{})strides
activationpaddingdata_formatr   ZvalidZconv_outzpost_fcnet_{}g      ?)r   r   Zkernel_initializer)r   r   r   r      zGiven `conv_filters` ({}) do not result in a [B, 1, 1, {} (`num_outputs`)] shape (but in {})! Please adjust your Conv2D stack such that the dims 1 and 2 are both 1.T)r   c                 S      t j| ddgdS Nr   r   Zaxisr   squeezex r&   i/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/ray/rllib/models/tf/visionnet.py<lambda>       z(VisionNetwork.__init__.<locals>.<lambda>	value_outg{Gz?zconv_value_{}Zconv_value_outc                 S   r   r    r"   r$   r&   r&   r'   r(      r)   )getr   r   superr   __init__r   r   lenr   r   ZkerasZlayersZInputlast_layer_is_flattened	enumerateZConv2D
isinstancelisttupleformatZDenser   
ValueErrorr   ZFlattenLambdaZModel
base_model)selfr   r   r   r   r   r   filtersr   r   r   r   Zinput_shapeZinputsZ
last_layeriZout_sizeZkernelZstrideZlayer_sizesZfeature_outZlast_cnnZ
logits_outr*   	__class__r&   r'   r-      s  




 




	 	 

zVisionNetwork.__init__
input_dictstateseq_lensreturnc                 C   s`   |d }| j dkrt|g d}| t|tj\}| _| jr%||fS tj|ddgd|fS )NobsZchannels_first)r   r      r   r   r   r!   )	r   r   Z	transposer7   castZfloat32
_value_outr/   r#   )r8   r=   r>   r?   rA   Z	model_outr&   r&   r'   forward   s   
zVisionNetwork.forwardc                 C   s   t | jdgS )Nr   )r   ZreshaperD   )r8   r&   r&   r'   value_function  s   zVisionNetwork.value_function)__name__
__module____qualname____doc__gymspacesZSpaceintr
   strr-   r   r   r   rE   rF   __classcell__r&   r&   r;   r'   r      s0     ^


r   )typingr   r   Z	gymnasiumrK   Zray.rllib.models.tf.miscr   Zray.rllib.models.tf.tf_modelv2r   Zray.rllib.models.utilsr   r   Zray.rllib.utils.annotationsr   Zray.rllib.utils.frameworkr	   Zray.rllib.utils.typingr
   r   Ztf1r   Ztfvr   r&   r&   r&   r'   <module>   s    