B
    lnd&                 @   s   d dl Z d dlmZ d dlmZmZmZmZ d dlmZ	 d dl
mZ d dlmZ G dd dZdd	 Zd
ddhZdddZdddZdddZdS )    N)_pandas_api)CodecTableconcat_tablesschema)_feather)FeatherErrorc               @   s6   e Zd ZdZdddZdddZdd	 Zdd
dZdS )FeatherDataseta  
    Encapsulates details of reading a list of Feather files.

    Parameters
    ----------
    path_or_paths : List[str]
        A list of file names
    validate_schema : bool, default True
        Check that individual file schemas are all the same / compatible
    Tc             C   s   || _ || _d S )N)pathsvalidate_schema)selfZpath_or_pathsr    r   3/tmp/pip-unpacked-wheel-i2z0tf32/pyarrow/feather.py__init__)   s    zFeatherDataset.__init__Nc             C   sn   t | jd |d}|g| _|j| _x@| jdd D ].}t ||d}| jrT| || | j| q2W t| jS )a,  
        Read multiple feather files as a single pyarrow.Table

        Parameters
        ----------
        columns : List[str]
            Names of columns to read from the file

        Returns
        -------
        pyarrow.Table
            Content of the file as a table (of columns)
        r   )columns   N)
read_tabler
   Z_tablesr   r   validate_schemasappendr   )r   r   Z_filpathtabler   r   r   r   -   s    zFeatherDataset.read_tablec             C   s(   | j |j s$td|| j |j d S )Nz-Schema in {!s} was different. 
{!s}

vs

{!s})r   equals
ValueErrorformat)r   Zpiecer   r   r   r   r   F   s    zFeatherDataset.validate_schemasc             C   s   | j |dj|dS )a  
        Read multiple Parquet files as a single pandas DataFrame

        Parameters
        ----------
        columns : List[str]
            Names of columns to read from the file
        use_threads : bool, default True
            Use multiple threads when converting to pandas

        Returns
        -------
        pandas.DataFrame
            Content of the file as a pandas DataFrame (of columns)
        )r   )use_threads)r   	to_pandas)r   r   r   r   r   r   read_pandasM   s    zFeatherDataset.read_pandas)T)N)NT)__name__
__module____qualname____doc__r   r   r   r   r   r   r   r   r	      s
   


r	   c             C   sN   |j dkrd S |jt t fkr4td| ntd| t|jd S )Nr   zqColumn '{}' exceeds 2GB maximum capacity of a Feather binary column. This restriction may be lifted in the futurezkColumn '{}' of type {} was chunked on conversion to Arrow and cannot be currently written to Feather format)Z
num_chunkstypeextbinarystringr   r   str)namecolr   r   r   check_chunked_overflowa   s    

r(   lz4ZzstdZuncompressed   c             C   s  t jr"t jr"t| t jjr"|  } t | r|dkr:d}n|dkrHd}ntdt	j
| |d}|dkrx0t|jjD ]\}}	|| }
t|	|
 qtW n| }|dkrt|jtt|jkrtd|dk	rtd|dk	rtd	n<|dkrtd
rd}n$|dk	r|tkrtd|tytj||||||d W nL tk
r   t|tr~yt| W n tjk
r|   Y nX  Y nX dS )a  
    Write a pandas.DataFrame to Feather format.

    Parameters
    ----------
    df : pandas.DataFrame or pyarrow.Table
        Data to write out as Feather format.
    dest : str
        Local destination path.
    compression : string, default None
        Can be one of {"zstd", "lz4", "uncompressed"}. The default of None uses
        LZ4 for V2 files if it is available, otherwise uncompressed.
    compression_level : int, default None
        Use a compression level particular to the chosen compressor. If None
        use the default compression level
    chunksize : int, default None
        For V2 files, the internal maximum size of Arrow RecordBatch chunks
        when writing the Arrow IPC file format. None means use the default,
        which is currently 64K
    version : int, default 2
        Feather file version. Version 2 is the current. Version 1 is the more
        limited legacy format
    r   Fr*   Nz%Version value should either be 1 or 2)preserve_indexz'cannot serialize duplicate column namesz2Feather V1 files do not support compression optionz0Feather V1 files do not support chunksize optionZ	lz4_framer)   z1compression="{}" not supported, must be one of {})compressioncompression_level	chunksizeversion)r   Zhave_pandasZ
has_sparse
isinstancepdZSparseDataFrameZto_denseZis_data_framer   r   Zfrom_pandas	enumerater   namesr(   lenZcolumn_namessetr   Zis_available_FEATHER_SUPPORTED_CODECSr   r   write_feather	Exceptionr%   osremoveerror)Zdfdestr,   r-   r.   r/   r+   r   ir&   r'   r   r   r   r7   s   sR    




r7   TFc             K   s    t | |||djf d|i|S )a  
    Read a pandas.DataFrame from Feather format. To read as pyarrow.Table use
    feather.read_table.

    Parameters
    ----------
    source : str file path, or file-like object
        You can use MemoryMappedFile as source, for explicitly use memory map.
    columns : sequence, optional
        Only read a specific set of columns. If not provided, all columns are
        read.
    use_threads : bool, default True
        Whether to parallelize reading using multiple threads. If false the
        restriction is used in the conversion to Pandas as well as in the
        reading from Feather format.
    memory_map : boolean, default False
        Use memory mapping when opening file on disk, when source is a str.
    **kwargs
        Additional keyword arguments passed on to `pyarrow.Table.to_pandas`.

    Returns
    -------
    df : pandas.DataFrame
        The contents of the Feather file as a pandas.DataFrame
    )r   
memory_mapr   r   )r   r   )sourcer   r   r>   kwargsr   r   r   read_feather   s    rA   c             C   s   t j| ||d}|dkr | S dd |D }ttdd |rL||}n<ttdd |rj||}ndd |D }td	|||j	d
k r|S t
t||kr|S ||S dS )a  
    Read a pyarrow.Table from Feather format

    Parameters
    ----------
    source : str file path, or file-like object
        You can use MemoryMappedFile as source, for explicitly use memory map.
    columns : sequence, optional
        Only read a specific set of columns. If not provided, all columns are
        read.
    memory_map : boolean, default False
        Use memory mapping when opening file on disk, when source is a str
    use_threads : bool, default True
        Whether to parallelize reading using multiple threads.

    Returns
    -------
    table : pyarrow.Table
        The contents of the Feather file as a pyarrow.Table
    )Zuse_memory_mapr   Nc             S   s   g | ]}t |qS r   )r!   ).0columnr   r   r   
<listcomp>  s    zread_table.<locals>.<listcomp>c             S   s   | t kS )N)int)tr   r   r   <lambda>      zread_table.<locals>.<lambda>c             S   s   | t kS )N)r%   )rF   r   r   r   rG     rH   c             S   s   g | ]
}|j qS r   )r   )rB   rF   r   r   r   rD     s    z<Columns must be indices or names. Got columns {} of types {}   )r   ZFeatherReaderreadallmapZread_indicesZ
read_names	TypeErrorr   r/   sortedr5   select)r?   r   r>   r   readerZcolumn_typesr   Zcolumn_type_namesr   r   r   r      s"    

r   )NNNr*   )NTF)NFT)r9   Zpyarrow.pandas_compatr   Zpyarrow.libr   r   r   r   libr"   Zpyarrowr   Zpyarrow._featherr   r	   r(   r6   r7   rA   r   r   r   r   r   <module>   s   D
 
S 
