o
    + iG                     @  s:  d dl mZ d dlmZ d dlmZmZmZ d dlm	Z	m
Z
 d dlZd dlmZ d dlmZ dd	lmZ d
dlmZ erJd dlmZ G dd deZg ZdddZG dd dejZG dd dZG dd dejZG dd dejZG dd deZG dd deZ		d/d0d)d*Z		d/d1d+d,Z		d/d2d-d.Z dS )3    )annotations)partial)TYPE_CHECKINGCallable	TypedDict)NotRequiredUnpackN)nn)get_weights_path_from_url   )ConvNormActivation   _make_divisible)Tensorc                   @  s   e Zd ZU ded< ded< dS )_MobileNetV3OptionszNotRequired[int]num_classeszNotRequired[bool]	with_poolN)__name__
__module____qualname____annotations__ r   r   l/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/paddle/vision/models/mobilenetv3.pyr   $   s   
 r   )zIhttps://paddle-hapi.bj.bcebos.com/models/mobilenet_v3_small_x1.0.pdparamsZ 34fe0e7c1f8b00b2b056ad6788d0590c)zIhttps://paddle-hapi.bj.bcebos.com/models/mobilenet_v3_large_x1.0.pdparamsZ 118db5792b4e183b925d8e8e334db3df)zmobilenet_v3_small_x1.0zmobilenet_v3_large_x1.0c                      s@   e Zd ZdZejejfd fd
dZdddZdddZ	  Z
S )SqueezeExcitationa  
    This block implements the Squeeze-and-Excitation block from https://arxiv.org/abs/1709.01507 (see Fig. 1).
    Parameters ``activation``, and ``scale_activation`` correspond to ``delta`` and ``sigma`` in eq. 3.
    This code is based on the torchvision code with modifications.
    You can also see at https://github.com/pytorch/vision/blob/main/torchvision/ops/misc.py#L127

    Args:
        input_channels (int): Number of channels in the input image.
        squeeze_channels (int): Number of squeeze channels.
        activation (Callable[..., paddle.nn.Layer], optional): ``delta`` activation. Default: ``paddle.nn.ReLU``.
        scale_activation (Callable[..., paddle.nn.Layer]): ``sigma`` activation. Default: ``paddle.nn.Sigmoid``.
    input_channelsintsqueeze_channels
activationCallable[..., nn.Layer]scale_activationreturnNonec                   sJ   t    td| _t||d| _t||d| _| | _| | _	d S )Nr   )
super__init__r	   AdaptiveAvgPool2DavgpoolZConv2Dfc1fc2r   r    )selfr   r   r   r    	__class__r   r   r$   E   s   
zSqueezeExcitation.__init__inputr   c                 C  s2   |  |}| |}| |}| |}| |S N)r&   r'   r   r(   r    r)   r,   scaler   r   r   _scaleS   s
   




zSqueezeExcitation._scalec                 C  s   |  |}|| S r-   )r0   r.   r   r   r   forwardZ   s   
zSqueezeExcitation.forward)
r   r   r   r   r   r   r    r   r!   r"   )r,   r   r!   r   )r   r   r   __doc__r	   ReLUZSigmoidr$   r0   r1   __classcell__r   r   r*   r   r   7   s    
r   c                   @  s(   e Zd Z	ddddZedddZdS )InvertedResidualConfig      ?in_channelsr   kernelexpanded_channelsout_channelsuse_seboolr   strstrider/   floatc	           	      C  s   | j ||d| _|| _| j ||d| _| j ||d| _|| _|d u r&d | _n|dkr/tj| _n|dkr8tj	| _nt
d| || _d S )N)r/   relu	hardswishz*The activation function is not supported: )adjust_channelsr7   r8   r9   r:   r;   activation_layerr	   r3   	HardswishRuntimeErrorr>   )	r)   r7   r8   r9   r:   r;   r   r>   r/   r   r   r   r$   `   s"   


zInvertedResidualConfig.__init__c                 C  s   t | | dS )N   r   )Zchannelsr/   r   r   r   rB   ~   s   z&InvertedResidualConfig.adjust_channelsN)r6   )r7   r   r8   r   r9   r   r:   r   r;   r<   r   r=   r>   r   r/   r?   )r   r   r   r$   staticmethodrB   r   r   r   r   r5   _   s
    
r5   c                      s(   e Zd Zd fddZdddZ  ZS )InvertedResidualr7   r   r9   r:   filter_sizer>   r;   r<   rC   r   
norm_layerr!   r"   c	           	   
     s   t    |dko||k| _|| _||k| _| jr%t||ddd||d| _t||||t|d d |||d| _| jrHt	|t
|d tjd| _t||ddd|d d| _d S )Nr   r   )r7   r:   kernel_sizer>   paddingrJ   rC   r   r7   r:   rK   r>   rL   groupsrJ   rC      )r    )r#   r$   use_res_connectr;   expandr   expand_convr   bottleneck_convr   r   r	   ZHardsigmoidmid_selinear_conv)	r)   r7   r9   r:   rI   r>   r;   rC   rJ   r*   r   r   r$      sN   



zInvertedResidual.__init__xr   c                 C  sN   |}| j r
| |}| |}| jr| |}| |}| jr%t||}|S r-   )	rQ   rR   rS   r;   rT   rU   rP   paddleadd)r)   rV   identityr   r   r   r1      s   



zInvertedResidual.forward)r7   r   r9   r   r:   r   rI   r   r>   r   r;   r<   rC   r   rJ   r   r!   r"   rV   r   r!   r   )r   r   r   r$   r1   r4   r   r   r*   r   rH      s    7rH   c                      sN   e Zd ZU dZded< ded< ded< 				
dd fddZdddZ  ZS )MobileNetV3aO  MobileNetV3 model from
    `"Searching for MobileNetV3" <https://arxiv.org/abs/1905.02244>`_.

    Args:
        config (list[InvertedResidualConfig]): MobileNetV3 depthwise blocks config.
        last_channel (int): The number of channels on the penultimate layer.
        scale (float, optional): Scale of channels in each layer. Default: 1.0.
        num_classes (int, optional): Output dim of last fc layer. If num_classes <=0, last fc layer
            will not be defined. Default: 1000.
        with_pool (bool, optional): Use pool before the last fc layer or not. Default: True.
    r?   r/   r   r   r<   r   r6     Tconfiglist[InvertedResidualConfig]last_channelr!   r"   c              
     s
  t    || _|| _|| _|| _|| _|d j| _|d j| _	| j	d | _
ttjddd td| jddd	d	tj d
| _tj fdd| jD  | _t| j	| j
d	d	dd	 tjd| _|rctd	| _|dkrtt| j
| jt tjddt| j|| _d S d S )Nr      gMbP?gGz?)epsilonZmomentum   r   r   )r7   r:   rK   r>   rL   rN   rC   rJ   c                   s2   g | ]}t |j|j|j|j|j|j|j d qS ))r7   r9   r:   rI   r>   r;   rC   rJ   )rH   r7   r9   r:   r8   r>   r;   rC   ).0cfgrJ   r   r   
<listcomp>   s    z(MobileNetV3.__init__.<locals>.<listcomp>rM   g?)p)r#   r$   r]   r/   r_   r   r   r7   Zfirstconv_in_channelsZlastconv_in_channelsZlastconv_out_channelsr   r	   ZBatchNorm2Dr   rD   convZ
Sequentialblockslastconvr%   r&   ZLinearZDropout
classifier)r)   r]   r_   r/   r   r   r*   rf   r   r$      sZ   



zMobileNetV3.__init__rV   r   c                 C  sR   |  |}| |}| |}| jr| |}| jdkr't|d}| |}|S )Nr   r   )	ri   rj   rk   r   r&   r   rW   flattenrl   )r)   rV   r   r   r   r1     s   





zMobileNetV3.forwardr6   r\   T)r]   r^   r_   r   r/   r?   r   r   r   r<   r!   r"   rZ   )r   r   r   r2   r   r$   r1   r4   r   r   r*   r   r[      s   
 Er[   c                      *   e Zd ZdZ			dd fddZ  ZS )MobileNetV3Smalla  MobileNetV3 Small architecture model from
    `"Searching for MobileNetV3" <https://arxiv.org/abs/1905.02244>`_.

    Args:
        scale (float, optional): Scale of channels in each layer. Default: 1.0.
        num_classes (int, optional): Output dim of last fc layer. If num_classes <= 0, last fc layer
            will not be defined. Default: 1000.
        with_pool (bool, optional): Use pool before the last fc layer or not. Default: True.

    Returns:
        :ref:`api_paddle_nn_Layer`. An instance of MobileNetV3 Small architecture model.

    Examples:
        .. code-block:: python

            >>> import paddle
            >>> from paddle.vision.models import MobileNetV3Small

            >>> # Build model
            >>> model = MobileNetV3Small(scale=1.0)

            >>> x = paddle.rand([1, 3, 224, 224])
            >>> out = model(x)

            >>> print(out.shape)
            [1, 1000]
    r6   r\   Tr/   r?   r   r   r   r<   r!   r"   c                   s  t ddddddd|t ddddddd|t ddd	dddd
|t ddddddd|t ddddddd
|t ddddddd
|t ddddddd
|t ddddddd
|t ddddddd|t ddddddd
|t ddddddd
|g}td| d}t j|||||d d S )N   rc   Tr@   r   H      FX   r      `   (   rA      x   0      i   i@  i   rF   r_   r/   r   r   r5   r   r#   r$   r)   r/   r   r   r]   r_   r*   r   r   r$   J  s(   
zMobileNetV3Small.__init__rn   r/   r?   r   r   r   r<   r!   r"   r   r   r   r2   r$   r4   r   r   r*   r   rp   -      rp   c                      ro   )MobileNetV3Largea  MobileNetV3 Large architecture model from
    `"Searching for MobileNetV3" <https://arxiv.org/abs/1905.02244>`_.

    Args:
        scale (float, optional): Scale of channels in each layer. Default: 1.0.
        num_classes (int, optional): Output dim of last fc layer. If num_classes <= 0, last fc layer
            will not be defined. Default: 1000.
        with_pool (bool, optional): Use pool before the last fc layer or not. Default: True.

    Returns:
        :ref:`api_paddle_nn_Layer`. An instance of MobileNetV3 Large architecture model.

    Examples:
        .. code-block:: python

            >>> import paddle
            >>> from paddle.vision.models import MobileNetV3Large

            >>> # Build model
            >>> model = MobileNetV3Large(scale=1.0)

            >>> x = paddle.rand([1, 3, 224, 224])
            >>> out = model(x)

            >>> print(out.shape)
            [1, 1000]
    r6   r\   Tr/   r?   r   r   r   r<   r!   r"   c                   sX  t ddddddd|t ddddddd|t ddd	dddd|t dd
d	dddd|t dd
ddddd|t dd
ddddd|t ddddddd|t ddddddd|t ddddddd|t ddddddd|t ddddddd|t ddddddd|t dd
ddddd|t dd
ddddd|t dd
ddddd|g}td| d}t j|||||d d S )Nrq   rc   Fr@   r   @   rs   r   rr   ru   rw   Try   rx   P   rA         i  p   i     i  i   rF   r|   r}   r~   r*   r   r   r$     sT   #
zMobileNetV3Large.__init__rn   r   r   r   r   r*   r   r   g  r   r   Fr6   archr=   
pretrainedr<   r/   r?   kwargsUnpack[_MobileNetV3Options]r!   c                 K  s   | dkrt dd|i|}n	tdd|i|}|rB|  d| } | tv s+J |  dtt|  d t|  d }t|}|| |S )Nmobilenet_v3_larger/   Z_xzJ model do not have a pretrained model now, you should set pretrained=Falser   r   r   )r   rp   
model_urlsr
   rW   loadZset_dict)r   r   r/   r   modelZweight_pathparamr   r   r   _mobilenet_v3  s   


r   c                 K     t 	d|| d|}|S )a  MobileNetV3 Small architecture model from
    `"Searching for MobileNetV3" <https://arxiv.org/abs/1905.02244>`_.

    Args:
        pretrained (bool, optional): Whether to load pre-trained weights. If True, returns a model pre-trained on ImageNet. Default: False.
        scale (float, optional): Scale of channels in each layer. Default: 1.0.
        **kwargs (optional): Additional keyword arguments. For details, please refer to :ref:`MobileNetV3Small <api_paddle_vision_models_MobileNetV3Small>`.

    Returns:
        :ref:`api_paddle_nn_Layer`. An instance of MobileNetV3 Small architecture model.

    Examples:
        .. code-block:: python

            >>> import paddle
            >>> from paddle.vision.models import mobilenet_v3_small

            >>> # Build model
            >>> model = mobilenet_v3_small()

            >>> # Build model and load imagenet pretrained weight
            >>> # model = mobilenet_v3_small(pretrained=True)

            >>> # Build mobilenet v3 small model with scale=0.5
            >>> model = mobilenet_v3_small(scale=0.5)

            >>> x = paddle.rand([1, 3, 224, 224])
            >>> out = model(x)

            >>> print(out.shape)
            [1, 1000]
    mobilenet_v3_smallr/   r   N)r   r   r   r/   r   r   r   r   r   r        %r   c                 K  r   )a  MobileNetV3 Large architecture model from
    `"Searching for MobileNetV3" <https://arxiv.org/abs/1905.02244>`_.

    Args:
        pretrained (bool, optional): Whether to load pre-trained weights. If True, returns a model pre-trained on ImageNet. Default: False.
        scale (float, optional): Scale of channels in each layer. Default: 1.0.
        **kwargs (optional): Additional keyword arguments. For details, please refer to :ref:`MobileNetV3Large <api_paddle_vision_models_MobileNetV3Large>`.

    Returns:
        :ref:`api_paddle_nn_Layer`. An instance of MobileNetV3 Large architecture model.

    Examples:
        .. code-block:: python

            >>> import paddle
            >>> from paddle.vision.models import mobilenet_v3_large

            >>> # Build model
            >>> model = mobilenet_v3_large()

            >>> # Build model and load imagenet pretrained weight
            >>> # model = mobilenet_v3_large(pretrained=True)

            >>> # Build mobilenet v3 large model with scale=0.5
            >>> model = mobilenet_v3_large(scale=0.5)

            >>> x = paddle.rand([1, 3, 224, 224])
            >>> out = model(x)

            >>> print(out.shape)
            [1, 1000]
    r   r   N)r   r   r   r   r   r   r     r   r   )Fr6   )
r   r=   r   r<   r/   r?   r   r   r!   r[   )r   r<   r/   r?   r   r   r!   rp   )r   r<   r/   r?   r   r   r!   r   )!
__future__r   	functoolsr   typingr   r   r   Ztyping_extensionsr   r   rW   r	   Zpaddle.utils.downloadr
   opsr   _utilsr   r   r   __all__r   ZLayerr   r5   rH   r[   rp   r   r   r   r   r   r   r   r   <module>   s>   ($Ee:R,