o
    `+ i                     @   s8   d dl Z d dlmZmZ d dlmZ G dd deZdS )    N)AnyOptional)RecursiveCharacterTextSplitterc                       s`   e Zd ZdZ			ddeee  dededed	df
 fd
dZ	ded	ee f fddZ
  ZS )JSFrameworkTextSplitteran  Text splitter that handles React (JSX), Vue, and Svelte code.

    This splitter extends RecursiveCharacterTextSplitter to handle
    React (JSX), Vue, and Svelte code by:

    1. Detecting and extracting custom component tags from the text
    2. Using those tags as additional separators along with standard JS syntax

    The splitter combines:

    * Custom component tags as separators (e.g. <Component, <div)
    * JavaScript syntax elements (function, const, if, etc)
    * Standard text splitting on newlines

    This allows chunks to break at natural boundaries in
    React, Vue, and Svelte component code.
    N  r   
separators
chunk_sizechunk_overlapkwargsreturnc                    s&   t  jd||d| |pg | _dS )aG  Initialize the JS Framework text splitter.

        Args:
            separators: Optional list of custom separator strings to use
            chunk_size: Maximum size of chunks to return
            chunk_overlap: Overlap in characters between chunks
            **kwargs: Additional arguments to pass to parent class
        )r   r	   N )super__init___separators)selfr   r   r	   r
   	__class__r   h/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/langchain_text_splitters/jsx.pyr      s   z JSFrameworkTextSplitter.__init__textc                    sj   t d|}g }|D ]}||vr|| q
dd |D }g d}| j| | g d }|| _t |S )a  Split text into chunks.

        This method splits the text into chunks by:

        * Extracting unique opening component tags using regex
        * Creating separators list with extracted tags and JS separators
        * Splitting the text using the separators by calling the parent class method

        Args:
            text: String containing code to split

        Returns:
            List of text chunks split on component and JS boundaries
        z<\s*([a-zA-Z0-9]+)[^>]*>c                 S   s   g | ]}d | qS )<r   ).0tagr   r   r   
<listcomp>C   s    z6JSFrameworkTextSplitter.split_text.<locals>.<listcomp>)z
export z export z

function z
async function z async function z
const z
let z
var z
class z class z
if z if z
for z for z
while z while z
switch z switch z
case z case z	
default z	 default )z<>z

z&&
z||
)refindallappendr   r   
split_text)r   r   Zopening_tagsZcomponent_tagsr   Zcomponent_separatorsZjs_separatorsr   r   r   r   r   ,   s$   
z"JSFrameworkTextSplitter.split_text)Nr   r   )__name__
__module____qualname____doc__r   liststrintr   r   r   __classcell__r   r   r   r   r      s"    
"r   )r   typingr   r   Zlangchain_text_splittersr   r   r   r   r   r   <module>   s    