o
    hqi\                     @   s  d dl Z d dlmZ d dlmZ d dlZd dlZd dlmZ d dl	Z	d dl
mZ d dlmZmZmZ d dlmZmZ d dlmZmZmZ d d	lmZ d d
lmZmZmZmZmZ d dl m!Z!m"Z" e#d$dZ%dd Z&ej'(dddde)ddfg ddfg ddfdde*dddfe*d ddfej+g dej,ddfej+g dej-ddfej+g dej.ddfddgdfde+ddgdfd g d!dfd"e*d#d$dfe+g d!dfej+g d!e/ddfgd%d& Z0d'd( Z1d)d* Z2eej'j(d+e ed,d-d. Z3ej'(d/g d0ej'(d1g d2d3d4 Z4ej'(d/g d5ej'(d1g d2d6d7 Z5ej'(d/g d8ej'(d1g d2ej'(d9d:dgd;d<ggd=d> Z6ej'(d?ddgej'(d@ddgej'(d/g d8ej'(d1dAdBgej'(dCd g dDg dEgfd:ddFgdGdHgdIdggfgdJdK Z7ej'(d/g d5ej'(d1g dLdMdN Z8ej'(d/g d8ej'(d1g dLej'(dOd g dDg dEgfd:ddFgdGdHgdIdggfgdPdQ Z9ej'(dRg dSdTdU Z:ej'(d/g d5dVdW Z;ej'(dRg dXej'(d9dd<gdYdZ Z<ej'(d/g d[d\d] Z=d^d_ Z>ej'(d`ddFgdadb Z?ej'(dcg dddedf Z@dgdh ZAdidj ZBej'(d9d d d:ge*d de+d d:ggdkdl ZCej'(d/g dmdndo ZDej'(dpdqdre/ dsfgdtdu ZEej'(dvdwgdxgdwdxgdwdygdxdyggdzd{ ZFd|d} ZGd~d ZHdd ZIdd ZJdd ZKdd ZLdd ZMej'(de"dd ZNdd ZOej'(de!dd ZPdS )    N)copy)SkipTest)kstest)parse)_safe_indexingresampleshuffle)_get_namespace_device_dtype_ids)yield_namespace_device_dtype_combinations)_determine_key_type_get_column_indices_safe_assign)MockDataFrame)_array_api_for_tests_convert_containerassert_allclose_dense_sparseassert_array_equal'skip_if_array_api_compat_not_configured)CSC_CONTAINERSCSR_CONTAINERS	   )   r   c                  C   s\  t jddd} | jg dg dg dddd	}d
dlm} dgddgg ddgdgg}|D ]}t||dd}||| | q.g dddgfg ddgfg}|D ]\}}t||dd}||dd|f | qPd
dgddgfdgdgfg}	|	D ]\}
}t||
dd}||dd|f | qwd
dgddgddgg}|D ]}t||d
d}||| | qdS )z,Check _safe_indexing for polars as expected.polarsz0.18.2)Z
minversion)      r      )r            )r   r   r   
   abcrow)Zorientr   )assert_frame_equalr"   r!   )r"   r!   r#   r#   r   axisTFT)FFTNr   r   )pytestimportorskip	DataFrameZpolars.testingr%   r   )pldfr%   Zstr_keyskeyoutZ	bool_keysZbool_keyZstr_keyZint_keysZint_keyZaxis_0_keys r0   h/home/app/PaddleOCR-VL/.venv_paddleocr/lib/python3.10/site-packages/sklearn/utils/tests/test_indexing.pytest_polars_indexing#   s,   r2   z
key, dtype)r   int)0str)TboolTr6   r   r   r   r3   r4   12r5   )r7   r3   )r8   r5   r   dtypeF))TFr6   )col_0r5   r=   col_1col_2)r>   r5   beginendc                 C   s   t | |ksJ d S N)r   )r.   r<   r0   r0   r1   test_determine_key_typeD   s   rD   c                   C   s<   t jtdd td W d    d S 1 sw   Y  d S )NNo valid specification of thematch      ?)r)   raises
ValueErrorr   r0   r0   r0   r1   test_determine_key_type_errorc   s   
"rK   c                   C   sH   t jtdd ttddddd W d    d S 1 sw   Y  d S )NzOnly array-like or scalar arerF   r   r   r   F)Zaccept_slice)r)   rI   	TypeErrorr   slicer0   r0   r0   r1   #test_determine_key_type_slice_errorh   s   "rN   z#array_namespace, device, dtype_name)idsc              	   C   s   t | |}tjddk |g d}t|dksJ |g d}t|dks*J z	|g d}W n ty>   d }Y nw |d urftjtdd	 t| W d    n1 sYw   Y  W d    d S W d    d S W d    d S 1 syw   Y  d S )
NT)Zarray_api_dispatchr   r   r   r3   r(   r6   )y      ?      ?y       @       @y      @      @rE   rF   )	r   sklearnZconfig_contextasarrayr   rL   r)   rI   rJ   )Zarray_namespaceZdeviceZ
dtype_nameZxpZint_array_keyZbool_array_keyZcomplex_array_keyr0   r0   r1   !test_determine_key_type_array_apim   s(   

"rS   
array_type)listarraysparse	dataframer   pyarrowindices_type)rU   tuplerV   seriesrM   c                 C   s   ddg}|dkrt |d tr|d  d7  < tg dg dg dg| }t||}t||dd}t|tg dg dg|  d S )	Nr   r   rM   rP   r   r   r      r   r   r   r&   
isinstancer3   r   r   r   rT   rZ   indicesrV   subsetr0   r0   r1   &test_safe_indexing_2d_container_axis_0   s   
re   )rU   rV   r\   polars_seriespyarrow_arrayc                 C   sl   ddg}|dkrt |d tr|d  d7  < tg d| }t||}t||dd}t|tddg|  d S )Nr   r   rM   	r   r   r   r   r   r   r_   r   r   r   r&   r   r`   rb   r0   r0   r1   test_safe_indexing_1d_container   s   
ri   )rV   rW   rX   r   rY   rc   r   r?   r@   c                 C   s   t |}|dkrt|d tr|d  d7  < g d}tg dg dg dg| |}t||}t|d tr]| dv r]d	}tjt|d
 t||dd W d    d S 1 sVw   Y  d S t||dd}t	|tddgddgddgg|  d S )NrM   r   r>   rP   r]   r^   r   rV   rW   ESpecifying the columns using strings is only supported for dataframesrF   r&   r   r   r   r   r   r   )
r   ra   r3   r   r5   r)   rI   rJ   r   r   )rT   rZ   rc   Zindices_convertedcolumns_namerV   err_msgrd   r0   r0   r1   &test_safe_indexing_2d_container_axis_1   s$   
"rn   array_read_onlyindices_read_onlyrV   r\   zaxis, expected_arrayr]   r^   r   r   r   r   c           	      C   s   t g dg dg dg}| r|jdd t||}t ddg}|r*|jdd t||}t|||d}t|t|| d S )	NrP   r]   r^   F)writer   r   r&   )nprV   Zsetflagsr   r   r   )	ro   rp   rT   rZ   r'   expected_arrayrV   rc   rd   r0   r0   r1   &test_safe_indexing_2d_read_only_axis_1   s   

rt   )rU   r[   rV   r\   c                 C   sX   dgdgd  dgd  }t g d| }t ||}t||dd}t|t ddg|  d S )	NFTr   r   rh   r   r&   r   r   r   r   rb   r0   r0   r1   $test_safe_indexing_1d_container_mask   s
   
rv   zaxis, expected_subsetc                 C   sZ   g d}t g dg dg dg| |}g d}t ||}t|||d}t|t ||  d S )Nr>   rP   r]   r^   )FTTr&   ru   )rT   rZ   r'   Zexpected_subsetrl   rV   rc   rd   r0   r0   r1   test_safe_indexing_2d_mask   s   	

rw   z array_type, expected_output_type))rU   rU   rV   rV   rW   rW   rX   r\   r   rf   rY   rg   c                 C   sJ   t g dg dg dg| }d}t||dd}t g d|}t|| d S )NrP   r]   r^   r   r   r&   ru   )rT   expected_output_typerV   rc   rd   rs   r0   r0   r1   #test_safe_indexing_2d_scalar_axis_0   s
   r~   c                 C   s0   t g d| }d}t||dd}|dksJ d S )Nrh   r   r   r&   r   )r   r   )rT   rV   rc   rd   r0   r0   r1   test_safe_indexing_1d_scalar  s   r   )rx   ry   rz   r{   r|   c           	      C   s   g d}t g dg dg dg| |}t|tr?| dv r?d}tjt|d t||dd	 W d    d S 1 s8w   Y  d S t||dd	}g d
}|dkrVdgdgdgg}t ||}t|| d S )Nr>   rP   r]   r^   rj   rk   rF   r   r&   )r   r   r   rW   r   r   r   )r   ra   r5   r)   rI   rJ   r   r   )	rT   r}   rc   rl   rV   rm   rd   Zexpected_outputrs   r0   r0   r1   #test_safe_indexing_2d_scalar_axis_1  s    "
r   )rU   rV   rW   c                 C   s8   t g dg dg dg| }t|d dd}t|| d S )NrP   r]   r^   r   r&   ru   )rT   XZX_subsetr0   r0   r1   test_safe_indexing_None_axis_0=  s   r   c                  C   s\   t d} d}| t}t jt|d t|dgdd W d    d S 1 s'w   Y  d S )Npandasz&No valid specification of the columns.rF   rH   r   r&   )r)   r*   r+   X_toyrI   rJ   r   )pdrm   r   r0   r0   r1   0test_safe_indexing_pandas_no_matching_cols_errorD  s   

"r   r'   c                 C   sF   t jtdd ttddg| d W d    d S 1 sw   Y  d S )Nz'axis' should be either 0rF   r   r   r&   )r)   rI   rJ   r   r   r&   r0   r0   r1   test_safe_indexing_error_axisL  s   "r   X_constructor)rV   r\   rf   rg   c                 C   s   t td}| dkrt|} n-| dkrtd}||} n| dkr/td}|j|d} n| dkr=td	}||} d
}tjt	|d t
| ddgdd W d    d S 1 s[w   Y  d S )Nr   rV   r\   r   rf   r   )valuesrg   rY   z='X' should be a 2D NumPy array, 2D sparse matrix or dataframerF   r   r   r&   )rU   rangerr   rR   r)   r*   ZSeriesrV   rI   rJ   r   )r   r   r   r,   parm   r0   r0   r1   !test_safe_indexing_1d_array_errorR  s    



"r   c                  C   sd   ddg} g dg dg dg}d}t jt|d t|| dd	 W d    d S 1 s+w   Y  d S )
Nr?   r@   rP   r]   r^   z.String indexing is not supported with 'axis=0'rF   r   r&   r)   rI   rJ   r   )rc   rV   rm   r0   r0   r1   4test_safe_indexing_container_axis_0_unsupported_typej  s   "r   c                  C   s   t d} t| j}t|j}|tdkrtd| g dg dd}t|ddgdd	}t| j	d
r9| j	j
}n| jjj
}t  td| d|jd< W d    n1 sXw   Y  |jd dksfJ d S )Nr   3z;SettingWithCopyWarning has been removed in pandas 3.0.0.devrP   )r   r   r   )r!   r"   r   r   r&   SettingWithCopyWarningerrorr   )r   r   )r)   r*   parse_version__version__base_versionr   r+   r   hasattrerrorsr   corecommonwarningscatch_warningssimplefilterZiloc)r   Z
pd_versionZpd_base_versionr   rd   r   r0   r0   r1   4test_safe_indexing_pandas_no_settingwithcopy_warningr  s   





r   c                 C   s\   ddgddgddgg}d}t jt|d t|| dd	 W d
   d
S 1 s'w   Y  d
S )z@Check that we raise a ValueError when axis=1 with input as list.r   r   r   r   r_   r   z!axis=1 is not supported for listsrF   r&   Nr   )rc   r   rm   r0   r0   r1   *test_safe_indexing_list_axis_1_unsupported  s
   "r   )rV   rW   rX   c                 C   s   t jd}|dd}ddg}|t||jd }t|| }t|||d t||dd}t	|t||  ddg}||jd t|}t|| }t|||d t||dd}t	|t||  d	\}}|j|j }t|| }t|||d t	|t||  d
S )z,Check that `_safe_assign` works as expected.r   r   r   r   r   )row_indexerr&   )column_indexer)NNN)
rr   randomRandomStateZrandnlenshaper   r   r   r   )rT   rngZX_arrayr   r   r   Zassigned_portionr   r0   r0   r1   test_safe_assign  s.   




r   zkey, err_msg)r   z all features must be in \[0, 2\])Zwhatever/A given column is not a column of the dataframez%No valid specification of the columnsc                 C   sZ   t d}|jtg dd}t jt|d t||  W d    d S 1 s&w   Y  d S )Nr   r>   columnsrF   )r)   r*   r+   r   rI   rJ   r   )r.   rm   r   ZX_dfr0   r0   r1   test_get_column_indices_error  s
   
	"r   r.   col1col2col3c                 C   s   t d}tjdtd}g d}|j||d}d| }t t}t	||  W d    n1 s1w   Y  t
|j|ks?J d S )Nr   )r   r   r;   )r   r   r   r   r   r   z1Selected columns, {}, are not unique in dataframe)r)   r*   rr   Zzerosr3   r+   formatrI   rJ   r   r5   value)r.   r   Ztoyr   r   rm   exc_infor0   r0   r1   6test_get_column_indices_pandas_nonunique_columns_error  s   

r   c               
   C   sR  t d} | jg dg dgg dd}tddddgftddd	dgftdddgfd
dgddgftdd
d	dgftddg dftddd	gfddgdd	gfg g fg	}|D ]\}}t|||ksdJ qWd}t jt|d t|dg W d   n1 sw   Y  d}t jt|d t|tddd W d   dS 1 sw   Y  dS )z=Check _get_column_indices for edge cases with the interchanger   rP   r]   r    )Zschemar   Nr   r   r"   r#   r!   r7   r   rF   Znot_a_columnzkey.step must be 1 or None)r)   r*   r+   rM   r   rI   rJ   NotImplementedError)r,   r-   Zkey_resultsr.   resultmsgr0   r0   r1   #test_get_column_indices_interchange  s,   
"r   c                   C   s   t  d u sJ tt t dgddg W d    n1 sw   Y  tt t ddgddgddd W d    n1 s@w   Y  tt ddgdddksSJ d S )	Nr   r   Fr   )replace	n_samplesr   r   )r   )r   r)   rI   rJ   r   r0   r0   r0   r1   test_resample  s    r   c               
   C   s   t g d} t g d}g }g }tdD ]&}|t| |d|| jd d  |t| |d|| jd d  qt |}t |}t	||}t 
|dksUJ |jdks\J d S )	N)r   r   )r   d   r   r   Tr   )r   random_stater   )sample_weightr   r   r   g?)rr   rV   rR   r   appendr   repeatr   meanr   allZpvalue)datar   Zmean_repeatedZmean_reweightedseedZtest_resultr0   r0   r1   test_resample_weighted  s:   



r   c                  C   s   t jd} d}d}| j|dfd}| jd||d}t||ddd d\}}t |dks.J t||dd|d\}}t |dkrBJ t |dksKJ d S )	Nr   r   g?r   sizer   r   r   stratifyr   )rr   r   r   normalZbinomialr   r   sum)r   r   pr   y_Zy_not_stratifiedZy_stratifiedr0   r0   r1   test_resample_stratified)  s   r   c                  C   s   t jd} d}| j|dfd}| jdd|d}t||dd| |d\}}t||d	d| |d\}}t |jd dk s<J t |jd dksHJ t||dd
| |d\}}|jd d
ks]J t |jd dksiJ d S )Nr   r   r   r   r   T2   )r   r   r   r   Fi  )rr   r   r   r   randintr   uniquer   )r   r   r   r   Z	X_replacer   ZX_no_replacer0   r0   r1    test_resample_stratified_replace9  s"   


r   c                  C   s\   t jd} d}| j|dfd}| jdd|dfd}t||d| |d\}}|jdks,J d S )Nr   r   r   r   r   r   r   )rr   r   r   r   r   r   ndim)r   r   r   r   r0   r0   r1   test_resample_stratify_2dyR  s   r   c                   C   s   t jtdd tddgddgddgdd W d    n1 s w   Y  t jtdd tddgddgddgddgd W d    d S 1 sHw   Y  d S )	NzCResampling with sample_weight is only implemented for replace=True.rF   r   r   F)r   r   zCResampling with sample_weight is only implemented for stratify=None)r   r   )r)   rI   r   r   r0   r0   r0   r1   test_notimplementederror\  s   ""r   csr_containerc                 C   s   t jd}d}|j|dfd}|jdd|d}| |dd}tjtdd t	||d	||d
\}}W d    d S 1 s>w   Y  d S )Nr   r   r   r   r   r   zSparse data was passedrF   r   r   )
rr   r   r   r   r   reshaper)   rI   rL   r   )r   r   r   r   r   r   r0   r0   r1   #test_resample_stratify_sparse_errorj  s   "r   c                  C   sZ   dd } t ddgddggddgd	d
ggg}t| |}t| t| ||ks+J d S )Nc                 S   s   t dd | D S )Nc                 s   s"    | ]}t d d |D V  qdS )c                 s   s    | ]}t |V  qd S rC   r[   ).0Cr0   r0   r1   	<genexpr>x  s    zPtest_shuffle_on_ndim_equals_three.<locals>.to_tuple.<locals>.<genexpr>.<genexpr>Nr   )r   Br0   r0   r1   r   x  s     zFtest_shuffle_on_ndim_equals_three.<locals>.to_tuple.<locals>.<genexpr>r   )Ar0   r0   r1   to_tuplew  s   z3test_shuffle_on_ndim_equals_three.<locals>.to_tupler   r   r   r   r   r   r_   r   )rr   rV   setr   )r   r   Sr0   r0   r1   !test_shuffle_on_ndim_equals_threev  s
   &r   csc_containerc                 C   s6  g d}t jg dtd}g d}tt jddgddgdd	ggtd}| t d
dd	}t|||||dd\}}}}	}
|g dksEJ t|tksMJ t	|g d |j
tks[J |g dkscJ t|tkskJ t	|	t jdd	gddgddggtd t|	tksJ t	|
 t ddgd	dgddgg d S )Nr    r;   rP   r!   r   r"   r   r#   r   r   r   )r   )r#   r"   r!   )r   r   r   r   r   )rr   rV   objectr   aranger   r   typerU   r   r<   Ztoarray)r   r!   r"   r#   deZa_sZb_sZc_sZd_sZe_sr0   r0   r1   "test_shuffle_dont_convert_to_array  s   $&*r   )Qr   r   Zunittestr   numpyrr   r)   Zscipy.statsr   rQ   Z$sklearn.externals._packaging.versionr   r   Zsklearn.utilsr   r   r   Zsklearn.utils._array_apir	   r
   Zsklearn.utils._indexingr   r   r   Zsklearn.utils._mockingr   Zsklearn.utils._testingr   r   r   r   r   Zsklearn.utils.fixesr   r   r   r   r   r2   markZparametrizeZbool_rM   rV   Zint32Zint64Zuint8r   rD   rK   rN   rS   re   ri   rn   rt   rv   rw   r~   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r0   r0   r0   r1   <module>   s$   !





.,





(

!

(


