o
    i                     @  sH  d dl mZ d dlmZ d dlmZ d dlmZmZ d dl	m
Z
 d dlmZ erud dlZd dlmZ d d	lmZ ee d d
lmZ W d   n1 sNw   Y  d dlmZmZ d dlmZ d dlmZmZ d dlmZ d dlmZ G dd dZ eddG dd dZ!eddG dd dZ"eG dd dZ#d ddZ$dS )!    )annotations)Mapping)	dataclass)TYPE_CHECKINGLiteral)parse_into_list_of_expressions)issue_unstable_warningN)Path)	DataFrame)PyExpr)CallableSequence)IO)StorageOptionsDictSyncOnCloseMethod)Expr)CredentialProviderBuilderc                   @  s(   e Zd ZdZdddddddddZdS )PartitionBya  
    Configuration for writing to multiple output files.

    .. warning::
        This functionality is currently considered **unstable**. It may be
        changed at any point without it being considered a breaking change.

    Parameters
    ----------
    base_path
        Base path to write to.
    file_path_provider
        Callable for custom file output paths.
    key
        Expressions to partition by.
    include_key
        Include the partition key expression outputs in the output files.
    max_rows_per_file
        Maximum number of rows to write for each file. Note that files may have
        less than this amount of rows.
    approximate_bytes_per_file
        Approximate number of bytes to write to each file. This is measured as
        the estimated size of the DataFrame in memory.

    Examples
    --------
    Split to multiple files partitioned by year:

    >>> pl.LazyFrame({"year": [2026, 2027, 1970], "month": [0, 0, 0]}).sink_parquet(
    ...     pl.PartitionBy("data/", key="year")
    ... )  # doctest: +SKIP

    Split to multiple files based on size:

    >>> pl.LazyFrame({"year": [2026, 2027, 1970], "month": [0, 0, 0]}).sink_parquet(
    ...     pl.PartitionBy(
    ...         "data/", max_rows_per_file=1000, approximate_bytes_per_file=100_000_000
    ...     )
    ... )  # doctest: +SKIP

    Split to multiple files partitioned by year, with limits on individual file sizes:

    >>> pl.LazyFrame({"year": [2026, 2027, 1970], "month": [0, 0, 0]}).sink_parquet(
    ...     pl.PartitionBy(
    ...         "data/",
    ...         key="year",
    ...         max_rows_per_file=1000,
    ...         approximate_bytes_per_file=100_000_000,
    ...     )
    ... )  # doctest: +SKIP
    Nauto)file_path_providerkeyinclude_keymax_rows_per_fileapproximate_bytes_per_file	base_path
str | Pathr   ECallable[[FileProviderArgs], str | Path | IO[bytes] | IO[str]] | Noner   =str | Expr | Sequence[str | Expr] | Mapping[str, Expr] | Noner   bool | Noner   
int | Noner   int | Literal['auto'] | NonereturnNonec                C  s   d}t | |d u r|d u r|dkrd}t||d u r&|d ur&d}t|t|}|dkr6|d u r4dnd }|d u r<d}t|||d urGt|nd |||d| _d S )Nz2`PartitionBy` functionality is considered unstabler   zlat least one of ('key', 'max_rows_per_file', 'approximate_bytes_per_file') must be specified for PartitionByz1cannot use 'include_key' without specifying 'key'l    l    )r   r   r   r   r   r   )r   
ValueErrorstr_PartitionByInner_parse_to_pyexpr_listZ_pl_partition_by)selfr   r   r   r   r   r   msg r)   J/home/app/Keep/.python/lib/python3.10/site-packages/polars/io/partition.py__init__P   s2   zPartitionBy.__init__)r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   )__name__
__module____qualname____doc__r+   r)   r)   r)   r*   r      s    ;r   T)kw_onlyc                   @  s"   e Zd ZU dZded< ded< dS )FileProviderArgsz
    Holds information on the file being sinked to.

    .. warning::
        This functionality is currently considered **unstable**. It may be
        changed at any point without it being considered a breaking change.
    intZindex_in_partitionr
   Zpartition_keysNr,   r-   r.   r/   __annotations__r)   r)   r)   r*   r1      s   
 r1   c                   @  sB   e Zd ZU dZded< ded< ded< ded	< d
ed< ded< dS )r%   zG
    Holds parsed partitioned sink options.

    For internal use.
    r$   r   r   r   zlist[PyExpr] | Noner   r   r   r   r   r2   r   Nr3   r)   r)   r)   r*   r%      s   
 r%   c                   @  sF   e Zd ZU dZded< ded< dZded< dZded	< dZd
ed< dS )_SinkOptionsz
    Holds sink options that are generic over file / target type.

    For internal use. Most of the options will parse into `UnifiedSinkArgs`.
    boolmkdirZmaintain_orderNzSyncOnCloseMethod | Nonesync_on_closezStorageOptionsDict | Nonestorage_optionsz CredentialProviderBuilder | Nonecredential_provider)r,   r-   r.   r/   r4   r8   r9   r:   r)   r)   r)   r*   r5      s   
 r5   exprs_or_columns6str | Expr | Sequence[str | Expr] | Mapping[str, Expr]r!   list[PyExpr]c                 C  s$   t | trdd |  D S t| S )Nc                 S  s   g | ]
\}}| |jqS r)   )aliasZ_pyexpr).0ker)   r)   r*   
<listcomp>   s    z)_parse_to_pyexpr_list.<locals>.<listcomp>)
isinstancer   itemsr   )r;   r)   r)   r*   r&      s   
r&   )r;   r<   r!   r=   )%
__future__r   collections.abcr   dataclassesr   typingr   r   Zpolars._utils.parse.exprr   Zpolars._utils.unstabler   
contextlibpathlibr	   Zpolarsr
   suppressImportErrorZpolars._plrr   r   r   r   Zpolars._typingr   r   Zpolars.exprr   Z,polars.io.cloud.credential_provider._builderr   r   r1   r%   r5   r&   r)   r)   r)   r*   <module>   s4    i