o
    * i"                     @   s  d Z ddlZddlZddlZddlZddlZddlm	Z	 g Z
g dZdZdZG dd dZG d	d
 d
Zdadadadadd Zd1ddZe	ddddddd ZejeddZejeddZe	ddddddd Zdd Ze	ddddddd Ze	dddddd d! Zd"d# Ze	dddddd$d% Ze	dddddd&d' Z e	dddddd(d) Z!e	dddddd*d+ Z"d,d- Z#e	dddddd.d/ Z$e%d0kre#  dS dS )2aD  
Movielens 1-M dataset.

Movielens 1-M dataset contains 1 million ratings from 6000 users on 4000
movies, which was collected by GroupLens Research. This module will download
Movielens 1-M dataset from
http://files.grouplens.org/datasets/movielens/ml-1m.zip and parse training
set and test set into paddle reader creators.

    N)
deprecated)         #   -   2   8   z3https://dataset.bj.bcebos.com/movielens%2Fml-1m.zipZ c4d9eecfca2ab87c1945afe126590906c                   @   0   e Zd ZdZdd Zdd Zdd Zdd	 Zd
S )	MovieInfozM
    Movie id, title and categories information are stored in MovieInfo.
    c                 C   s   t || _|| _|| _d S N)intindex
categoriestitle)selfr   r   r    r   d/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/paddle/dataset/movielens.py__init__0   s   

zMovieInfo.__init__c                 C   s(   | j dd | jD dd | j D gS )z/
        Get information from a movie.
        c                 S   s   g | ]}t | qS r   )CATEGORIES_DICT).0cr   r   r   
<listcomp>;   s    z#MovieInfo.value.<locals>.<listcomp>c                 S   s   g | ]}t |  qS r   )MOVIE_TITLE_DICTlower)r   wr   r   r   r   <   s    )r   r   r   splitr   r   r   r   value5   s   zMovieInfo.valuec                 C   s   d| j  d| j d| j dS )Nz<MovieInfo id(z	), title(z), categories()>)r   r   r   r   r   r   r   __str__?   s   zMovieInfo.__str__c                 C   s   |   S r   )r    r   r   r   r   __repr__B      zMovieInfo.__repr__N__name__
__module____qualname____doc__r   r   r    r!   r   r   r   r   r   +   s    
r   c                   @   r
   )UserInfozK
    User id, gender, age, and job information are stored in UserInfo.
    c                 C   s2   t || _|dk| _tt || _t || _d S )NM)r   r   is_male	age_tableagejob_id)r   r   genderr,   r-   r   r   r   r   K   s   

zUserInfo.__init__c                 C   s   | j | jrdnd| j| jgS )z.
        Get information from a user.
        r   r   )r   r*   r,   r-   r   r   r   r   r   Q   s   zUserInfo.valuec              	   C   s6   | j rdnd}d| j d| dt| j  d| j d	S )Nr)   Fz<UserInfo id(z
), gender(z), age(z), job(r   )r*   r   r+   r,   r-   )r   r.   r   r   r   r    W   s   (zUserInfo.__str__c                 C   s   t | S r   )strr   r   r   r   r!   [   r"   zUserInfo.__repr__Nr#   r   r   r   r   r(   F   s    r(   c               
   C   s  t jjtdt} td u rtd}t	j
| d}| D ]}t|t	js'J i at }t }|dQ}t|D ]D\}}|jdd}| d\}	}
}|d}|D ]}|| qT||
d	}
t|	||
d
tt|	< |
 D ]	}||  qsq9W d    n1 sw   Y  i at|D ]\}}|t|< qi at|D ]\}}|t|< qi a|d+}|D ] }|jdd}| d\}}}}}t||||dtt|< qW d    n1 sw   Y  qW d    | S 1 sw   Y  | S )N	movielensz^(.*)\((\d+)\)$filezml-1m/movies.datlatinencoding::|r   )r   r   r   zml-1m/users.dat)r   r.   r,   r-   )paddledatasetcommondownloadURLMD5
MOVIE_INFOrecompilezipfileZipFileinfolist
isinstanceZipInfosetopen	enumeratedecodestripr   addmatchgroupr   r   r   r   r   	USER_INFOr(   )fnpatternpackageinfoZtitle_word_setZcategories_setZ
movie_fileilineZmovie_idr   r   r   r   Z	user_fileuidr.   r,   Zjob_r   r   r   __initialize_meta_info__e   s^   




''rX   皙?Fc              	   c   s   t  }tj|  tj|dj}|dL}|D ]A}|jdd}tj |k |krZ| 	d\}}}}	t
|}t
|}t|d d }t| }
t| }| |
  |gg V  qW d    n1 sew   Y  W d    d S W d    d S 1 s}w   Y  d S )Nr2   zml-1m/ratings.datr4   r5   r7      g      @)rX   nprandomseedrB   rC   rH   rJ   rK   r   r   floatr?   rO   r   )Z	rand_seedZ
test_ratiois_testrP   rR   ZratingrU   rV   Zmov_idrW   Zmovusrr   r   r   
__reader__   s,   
Pra   z2.0.0zpaddle.text.datasets.Movielensr   z>Please use new dataset API which supports paddle.io.DataLoader)ZsinceZ	update_tolevelreasonc                     s    fddS )Nc                      s   t di  S )Nr   )ra   r   kwargsr   r   <lambda>   s    z$__reader_creator__.<locals>.<lambda>r   rd   r   rd   r   __reader_creator__   s   rg   )r_   Tc                   C   
   t   tS )z%
    Get movie title dictionary.
    )rX   r   r   r   r   r   get_movie_title_dict      
ri   c                 C      | j |j kr| S |S r   )r   abr   r   r   __max_index_info__      ro   c                   C      t   tttt jS )z,
    Get the maximum value of movie id.
    )rX   	functoolsreducero   listr?   valuesr   r   r   r   r   max_movie_id      
rv   c                   C   rq   )z+
    Get the maximum value of user id.
    )rX   rr   rs   ro   rt   rO   ru   r   r   r   r   r   max_user_id   rw   rx   c                 C   rk   r   )r-   rl   r   r   r   __max_job_id_impl__   rp   ry   c                   C   rq   )z*
    Get the maximum value of job id.
    )rX   rr   rs   ry   rt   rO   ru   r-   r   r   r   r   
max_job_id   s   
rz   c                   C   rh   )z*
    Get movie categories dictionary.
    )rX   r   r   r   r   r   movie_categories   rj   r{   c                   C   rh   )z#
    Get user info dictionary.
    )rX   rO   r   r   r   r   	user_info  rj   r|   c                   C   rh   )z$
    Get movie info dictionary.
    )rX   r?   r   r   r   r   
movie_info  rj   r}   c                  C   s:   t t  D ]\} }qt t  D ]\}}qt| | d S r   )rI   traintestprint)Ztrain_countrW   Z
test_countr   r   r   unittest(  s
   r   c                   C   s   t jjtdt d S )Nr1   )r9   r:   r;   r<   r=   r>   r   r   r   r   fetch1  s   r   __main__)r   rY   F)&r'   rr   r@   rB   numpyr[   Zpaddle.dataset.commonr9   Zpaddle.utilsr   __all__r+   r=   r>   r   r(   r?   r   r   rO   rX   ra   rg   partialr~   r   ri   ro   rv   rx   ry   rz   r{   r|   r}   r   r   r$   r   r   r   r   <module>   s   
/








	

