o
    + ik                     @  s   d dl mZ d dlmZmZmZ d dlZd dlZd dl	m
Z
 d dlmZ er6d dlmZ d dlmZ ed Zg ZdZd	Zg d
ZG dd deZdS )    )annotations)TYPE_CHECKINGAnyLiteralN)_check_exists_and_download)Dataset)_DTypeLiteraltraintestz:http://paddlemodels.bj.bcebos.com/uci_housing/housing.dataZ d4accdce7a25600298819f8e28e8d593)ZCRIMZZNZINDUSZCHASZNOXZRMZAGEZDISZRADZTAXZPTRATIOBZLSTATc                   @  s\   e Zd ZU dZded< ded< ded< 				
dd ddZd!d"ddZd#ddZd$ddZdS )%
UCIHousinga)  
    Implementation of `UCI housing <https://archive.ics.uci.edu/ml/datasets/Housing>`_
    dataset

    Args:
        data_file(str|None): path to data file, can be set None if
            :attr:`download` is True. Default None.
        mode(str): 'train' or 'test' mode. Default 'train'.
        download(bool): whether to download dataset automatically if
            :attr:`data_file` is not set. Default True.

    Returns:
        Dataset: instance of UCI housing dataset.

    Examples:

        .. code-block:: python

            >>> import paddle
            >>> from paddle.text.datasets import UCIHousing

            >>> class SimpleNet(paddle.nn.Layer):
            ...     def __init__(self):
            ...         super().__init__()
            ...
            ...     def forward(self, feature, target):
            ...         return paddle.sum(feature), target

            >>> paddle.disable_static()

            >>> uci_housing = UCIHousing(mode='train')

            >>> for i in range(10):
            ...     feature, target = uci_housing[i]
            ...     feature = paddle.to_tensor(feature)
            ...     target = paddle.to_tensor(target)
            ...
            ...     model = SimpleNet()
            ...     feature, target = model(feature, target)
            ...     print(feature.shape, target.numpy())
            [] [24.]
            [] [21.6]
            [] [34.7]
            [] [33.4]
            [] [36.2]
            [] [28.7]
            [] [22.9]
            [] [27.1]
            [] [16.5]
            [] [18.9]

    _UciHousingDataSetModemode
str | None	data_filer   dtypeNr
   TdownloadboolreturnNonec                 C  sh   |  dv sJ d| |  | _|| _| jd u r)|s J dt|ttd|| _|   t | _	d S )Nr	   z*mode should be 'train' or 'test', but got z>data_file is not set and downloading automatically is disabledZuci_housing)
lowerr   r   r   URLMD5
_load_datapaddleZget_default_dtyper   )selfr   r   r    r   l/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/paddle/text/datasets/uci_housing.py__init__m   s   


zUCIHousing.__init__   皙?feature_numintratiofloatc           	      C  s   t j| jdd}||jd | |}|jdd|jdd|jdd|jd  }}}t|d D ]}|d d |f ||  || ||   |d d |f< q2t	|jd | }| j
dkrf|d | | _d S | j
dkrt||d  | _d S d S )N )sepr   )Zaxis   r
   r   )npfromfiler   Zreshapeshapemaxminsumranger#   r   data)	r   r"   r$   r0   maximumsminimumsZavgsioffsetr   r   r   r      s   


6

zUCIHousing._load_dataidx)tuple[npt.NDArray[Any], npt.NDArray[Any]]c                 C  s>   | j | }t|d d | jt|dd  | jfS )N)r0   r)   arrayZastyper   )r   r5   r0   r   r   r   __getitem__   s   

zUCIHousing.__getitem__c                 C  s
   t | jS )N)lenr0   )r   r   r   r   __len__   s   
zUCIHousing.__len__)Nr
   T)r   r   r   r   r   r   r   r   )r    r!   )r"   r#   r$   r%   r   r   )r5   r#   r   r6   )r   r#   )	__name__
__module____qualname____doc____annotations__r   r   r9   r;   r   r   r   r   r   3   s   
 5
r   )
__future__r   typingr   r   r   numpyr)   r   Zpaddle.dataset.commonr   Z	paddle.ior   Znumpy.typingZnptZpaddle._typing.dtype_liker   r   __all__r   r   Zfeature_namesr   r   r   r   r   <module>   s   