o
    `)i8,                     @   s  d dl Z d dlmZmZ d dlZd dlZd dlmZmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZmZ dd	lmZ d
dlmZmZ dd Zdd Zdd Zedddd Zejj			d$dedeeee f dee dedededefd d!ZG d"d# d#ej Z!dS )%    N)ListUnion)nnTensor)is_compile_supported)BroadcastingList2)_pair)_assert_has_ops_has_ops   )_log_api_usage_once   )check_roi_boxes_shapeconvert_boxes_to_roi_formatc                     s    fdd}|S )zkLazily wrap a function with torch.compile on the first call

    This avoids eagerly importing dynamo.
    c                    s   t   fdd}|S )Nc                     s6   t jfi  }t|t j< || i |S N)torchcompile	functoolswrapsglobals__name__)argskwargsZcompiled_fn)compile_kwargsfn e/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/torchvision/ops/roi_align.pycompile_hook   s   z7lazy_compile.<locals>.decorate_fn.<locals>.compile_hook)r   r   )r   r   r   )r   r   decorate_fn   s   z!lazy_compile.<locals>.decorate_fnr   )r   r   r   r   r   lazy_compile   s   	r    c                    s    \} }}|jdd}|jdd}| }	| }
t|	|d k|d |	d }t|	|d k|d |	}	t|	|d k|j|}t|
|d k|d |
d }t|
|d k|d |
}
t|
|d k|j|}||	 }||
 }d| }d| } fdd}||	|
}||	|}|||
}|||}dd }|||}|||}|||}|||}|| ||  ||  ||  }|S )	Nr   minr         ?c                    s   d ur*d us
J t d d d d d f | d} t d d d d d f |d}d d d d d d d f t j jdd d d d d d d f | d d d d d d d d d f |d d d d d d d d d f f S )Nr   device)r   wherearanger%   yxZchannelsinputroi_batch_indxmaskymaskr   r   masked_indexB   s     "z+_bilinear_interpolate.<locals>.masked_indexc              	   S   s@   | d d d d d d d d d f |d d d d d d d d d f  S r   r   r(   r   r   r   
outer_prodW   s   @z)_bilinear_interpolate.<locals>.outer_prod)sizeclampintr   r&   todtype)r,   r-   r)   r*   r/   r.   _heightwidthZy_lowZx_lowZy_highZx_highZlylxhyhxr0   v1v2Zv3Zv4r1   Zw1Zw2Zw3Zw4valr   r+   r   _bilinear_interpolate#   s6   







 r@   c                 C   s&   t  r| jr| jt jkr|  S | S r   )r   Zis_autocast_enabledis_cudar6   doublefloat)Ztensorr   r   r   
maybe_caste   s   rD   T)Zdynamicc           $   
   C   s6  | j }t| } t|}|  \}}}	}
tj|| jd}tj|| jd}|d d df  }|r1dnd}|d d df | | }|d d df | | }|d d df | | }|d d df | | }|| }|| }|s{tj|d	d
}tj|d	d
}|| }|| }|dk}|r|nt|| }|r|nt|| }	 |rt	|| d}tj|| jd}tj|| jd}d }d }n9tj|| dd
}tj|	| jd}tj|
| jd}|d d d f |d d d f k }|d d d f |d d d f k }dd }|||d d d d f ||  |d d d d f d 
| j |||   } |||d d d d f ||  |d d d d f d 
| j |||   }!t| || |!||}"|swt|d d d d d d d d f |"d}"t|d d d d d d d d f |"d}"|"d}#t|tjr|#|d d d d d f  }#n|#| }#|#
|}#|#S )Nr$   r   g      ?g        r   r         r#   r!   c                 S   s   | d d d d f S r   r   )tr   r   r   from_K   s   z_roi_align.<locals>.from_K))r6   rD   r2   r   r'   r%   r4   r3   ceilmaxr5   r@   r&   sum
isinstancer   )$r,   roisspatial_scaleZpooled_heightZpooled_widthsampling_ratioalignedZ
orig_dtyper7   r8   r9   phpwr-   offsetZroi_start_wZroi_start_hZ	roi_end_wZ	roi_end_hZ	roi_widthZ
roi_heightZ
bin_size_hZ
bin_size_wZexact_samplingZroi_bin_grid_hZroi_bin_grid_wcountZiyZixr/   r.   rH   r)   r*   r?   outputr   r   r   
_roi_alignr   sr     ((&&

rX   r#   rI   Fr,   boxesoutput_sizerP   rQ   rR   returnc              	   C   s   t j st j stt t| |}t|}t|t j	s"t
|}t j sKt r7t  rK| js7| js7| jrKt| jjrKt| |||d |d ||S t  t jj| |||d |d ||S )aj  
    Performs Region of Interest (RoI) Align operator with average pooling, as described in Mask R-CNN.

    Args:
        input (Tensor[N, C, H, W]): The input tensor, i.e. a batch with ``N`` elements. Each element
            contains ``C`` feature maps of dimensions ``H x W``.
            If the tensor is quantized, we expect a batch size of ``N == 1``.
        boxes (Tensor[K, 5] or List[Tensor[L, 4]]): the box coordinates in (x1, y1, x2, y2)
            format where the regions will be taken from.
            The coordinate must satisfy ``0 <= x1 < x2`` and ``0 <= y1 < y2``.
            If a single Tensor is passed, then the first column should
            contain the index of the corresponding element in the batch, i.e. a number in ``[0, N - 1]``.
            If a list of Tensors is passed, then each Tensor will correspond to the boxes for an element i
            in the batch.
        output_size (int or Tuple[int, int]): the size of the output (in bins or pixels) after the pooling
            is performed, as (height, width).
        spatial_scale (float): a scaling factor that maps the box coordinates to
            the input coordinates. For example, if your boxes are defined on the scale
            of a 224x224 image and your input is a 112x112 feature map (resulting from a 0.5x scaling of
            the original image), you'll want to set this to 0.5. Default: 1.0
        sampling_ratio (int): number of sampling points in the interpolation grid
            used to compute the output value of each pooled output bin. If > 0,
            then exactly ``sampling_ratio x sampling_ratio`` sampling points per bin are used. If
            <= 0, then an adaptive number of grid points are used (computed as
            ``ceil(roi_width / output_width)``, and likewise for height). Default: -1
        aligned (bool): If False, use the legacy implementation.
            If True, pixel shift the box coordinates it by -0.5 for a better alignment with the two
            neighboring pixel indices. This version is used in Detectron2

    Returns:
        Tensor[K, C, output_size[0], output_size[1]]: The pooled RoIs.
    r   r   )r   ZjitZis_scripting
is_tracingr   	roi_alignr   r   rN   r   r   r
   Z$are_deterministic_algorithms_enabledrA   Zis_mpsZis_xpur   r%   typerX   r	   opsZtorchvision)r,   rY   rZ   rP   rQ   rR   rO   r   r   r   r]      s2   )

r]   c                	       sj   e Zd ZdZ	ddee dededef fddZd	e	d
e
e	ee	 f de	fddZdefddZ  ZS )RoIAlignz 
    See :func:`roi_align`.
    FrZ   rP   rQ   rR   c                    s.   t    t|  || _|| _|| _|| _d S r   )super__init__r   rZ   rP   rQ   rR   )selfrZ   rP   rQ   rR   	__class__r   r   rb     s   

zRoIAlign.__init__r,   rO   r[   c                 C   s   t ||| j| j| j| jS r   )r]   rZ   rP   rQ   rR   )rc   r,   rO   r   r   r   forward  s   zRoIAlign.forwardc              
   C   s2   | j j d| j d| j d| j d| j d
}|S )Nz(output_size=z, spatial_scale=z, sampling_ratio=z
, aligned=))re   r   rZ   rP   rQ   rR   )rc   sr   r   r   __repr__  s   
zRoIAlign.__repr__)F)r   
__module____qualname____doc__r   r4   rC   boolrb   r   r   r   rf   strri   __classcell__r   r   rd   r   r`     s    	"r`   )r#   rI   F)"r   typingr   r   r   Ztorch.fxr   r   Ztorch._dynamo.utilsr   Ztorch.jit.annotationsr   Ztorch.nn.modules.utilsr   Ztorchvision.extensionr	   r
   utilsr   _utilsr   r   r    r@   rD   rX   Zfxwrapr4   rC   rm   r]   Moduler`   r   r   r   r   <module>   sH    B
X;