o
    * iG                    @  st  d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlZddl	m
Z
 g dZedZdKdd	Z	dLd
dZdLddZdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zd d! Zd"d# ZdKd$d%Zd&d' Z	)dMdNd,d-Z	.dOdNd/d0Z	1dPd2d3Z dQd5d6Z!d7d8 Z"	(	)	9dRdSd;d<Z#d=d> Z$d?d@ Z%dAdB Z&dCdD Z'dEdF Z(dGdH Z)dIdJ Z*dS )T    )annotationsN   )_PRUNE_FUNC)full	full_attn	core_attnZ
auto_tunerFc                 C  st   | dkr| gS t  }d}| d d }||k r0| | dkr(|| || |  |d7 }||k st|}t||dS )z'Return the divisor of the given number.r      r   reverse)setaddlistsorted)numr
   resultsimid r   o/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/paddle/distributed/auto_tuner/utils.pydivisor    s   
r   c                   s    t | |||} fdd|D S )zZReturn the degree of different parallel modes by gpus and nodes num with customized range.c                      g | ]}| v r|qS r   r   ).0Zdegreecustomized_ranger   r   
<listcomp>5   s    z5dist_degree_with_customized_range.<locals>.<listcomp>)dist_degree)modenum_gpus	num_nodesr   	tuner_cfgZdist_degree_allr   r   r   !dist_degree_with_customized_range0   s   r    c                 C  s  | dv sJ g }g }| dkr&| dddkrt|dd}|S t|dd}|S | d	kri|d
kr?| ddr?tt|d
 dd}nt|dd}|D ]}d}|d  dd}|r]|| dkr]d}|sd|| qG|}|S | dkr| ddr|| }	| dddkrt|	dd}nt|	dd}n| dddkrt|dd}nt|dd}|D ]\}
d}|d  dd}|d  dd}|d  dd}|d  dd}| dd}|r||
 dkrd}|r||
 dkrd}|r||
 dkrd}|r||
 dkr|rd}|s||
 q|}|S | dkrt|dd}|S | dkr9| dddkr-t|d d dd}|S t|d d dd}|S | dkr]| dddkrSt|d d dd}|S t|d d dd}|S )zDReturn the degree of different parallel modes by gpus and nodes num.)	dp_degree	mp_degree	pp_degreesharding_degreemicro_batch_size
vpp_degreer!   schedule_modememoryperformanceFr	   Tr#   r   Zenable_pp_pruner   	model_cfg
num_layersNr"   Zenable_mp_prunehidden_size
vocab_sizenum_attention_heads
seq_lengthuse_sequence_parallelr$   r%   global_batch_sizer&   )getr   r   rangeappend)r   r   r   r   r   Zprune_resultsr#   Z
prune_flagr,   gpus_per_noder"   r-   r.   r/   r0   r1   r   r   r   r   8   s   YW
F




r   c                   s  i }d}d| v rd| d v r| d d }|du r| d n|}|du r&| d n|| d  }|dks2J dD ]}t | |d||}t||||| ||< q4t | d	d| d
 d d	}td	|||| |d	< t | dd| d
 d d}td|||| |d< | dd}	t | dddd  fdddD |d< |	dkrt|d dd|d< n
t|d dd|d< | dd}
t|
tr|
 dkr|	dkrddgnddg|d< nVt|
tr|
g|d< nKt|
trt	|
dkrdg|d< n9g |d< |
D ]}|dvrt
d| |d | qt	|d dkrdg|d< n|
du r!dg|d< nt
d| dd}t|trd| dkrH|	dkr?tnttt|d< n}| tv rW| g|d< nnt
dd t d!| t|trt	|dkrwdg|d< nNg |d< |D ] }| tvrt
dd t d!| |d |  q}t	|d dkrdg|d< n|du rdg|d< nt
dd t d!| | d"d}|durg |d"< | D ]\}}|d" |d#  q|S )$JReturn the default candidates of every hyper param which user defined autoNsearch_algoestimated_num_gpusr   nodesr6   r   )r!   r"   r#   r$   r&   r+   r,   r%   r2   r'   r(   sharding_stage   c                   r   r   r   )r   stageZsharding_stage_customized_ranger   r   r      s    z&default_candidates.<locals>.<listcomp>)r<   r   r   r)   Tr	   Fuse_recomputeauto)TFz5use_recompute only supports auto/True/False, but got z&use_recompute supports auto/True/Falserecompute_granularityz)recompute_granularity only supports auto//
, but got custom_search_dimvalue)_param2ranger3   r    r   
isinstancestrlowerboolr   len
ValueErrorr5   #__SUPPORTED_RECOMPUTE_GRANULARITY__reversedjoinitems)r   
candidatesr9   r   r   strategyZstrategy_customized_rangeZvpp_degree_customized_rangeZmbs_customized_ranger'   r?   Zrecompute_settingrA   granularityrD   keyrE   r   r>   r   default_candidates   s   



















rU   c           ;   
     s  | d }|d }|d }|d }|d }|d }|d }|d }|d	 }	|d
 }
d| vs2d| d vr6| d n| d d }g }|D ]i}g }|| dkrKq@| | || }|D ]R}|| dkr_qV| | || }|D ]9}|| dkrsqj| | || }|D ] }||krq~| | t|dksJ | t| |  q~|  qj|  qVq@tt||||	|
}| dd}|dur|d }ttj| }|}g }|D ]}|D ]}t|t| }| | qqg }| dd}|D ]:}|D ]4} |\}}}}t| dd \}!}"}#}$}%| d d |"| |  dkrq| d d ||#  dkr(q|dur|dks=|$r=|$r[|%dkr[t|t|  dd t	t|D  }||vrZ| | q| d d |  tt	d d }&t|}'g }(|&D ]})|)g}|
dd t	|'d D  ||(vr|( | qtd}*|*|'k r|&D ].}) fddt	|*D }|
|)g |
dd t	|'|* d D  ||(vr|( | q|*d7 }*|*|'k s| ddkr|(D ]}+t|t|  t|+ }||vr| | qq|(jd d! |(D ]}+t|t|  t|+ }||vr| | qqt|t|  }| | qqdddddddd	d
d"	},|durL| D ]\}-}.|-|,t|,< q@|dur]|D ]	}/|/|,t|,< qSg }0|D ]}i }1t|D ]\}2}3|3|1|,|2 < qi|0 |1 qat|0}4g }5|| d< |0D ] }6d#}7tD ]}8|8| |6|5}9|9rd }7 nq|7s|5 |6 qt|5}:t|4|:  d$ | d%d#rt|5| }5|5S )&-Permutate the candidates of all hyper params.rQ   r!   r"   r#   r&   r%   r;   r$   r?   rA   r8   r9   r   r      rD   Nrefined_recompute   r+   r2   r,   r   r   c                 S     g | ]}d qS r   r   r   r   r   r   r   r         zsearch_all.<locals>.<listcomp>c                 S  rZ   r[   r   r   _r   r   r   r     r]   c                   s   g | ]} qS r   r   r^   	max_valuer   r   r     r]   c                 S  rZ   r[   r   r^   r   r   r   r     r]   r'   r)   Tr	   )	r   r   r   r<   rW   rY            Fz# tasks are pruned before launching.schedule_prior)r5   rK   copydeepcopypopr   	itertoolsproductr3   r4   extendsortrP   	enumerater   loggerinfosort_by_special);r   rQ   dp_degree_candidatesmp_degree_candidatespp_degree_candidatesZvpp_degree_candidatesmbs_candidatessharding_stage_candidatessharding_degree_candidatesuse_recompute_candidates recompute_granularity_candidatesr   Zvalid_degreesr"   degreesZsharding_resr$   Zpp_resr#   Zdp_resr!   Zother_dim_cfgsrD   Zcustom_search_dim_candidatesZcustom_dim_cfgsZother_cfgs_without_cumtomZcfg_without_cumtomZ
custom_cfgcfgall_cfgsrX   Zvalid_degreeZother_dim_cfgr;   mbsvppr?   rA   Zrr_valid_valuesZop_countZrr_dim_cfgsrE   r   Z
rr_dim_cfgmappingrT   r_   dimnew_all_cfgsnew_cfgidxvalZsearch_space_size_before_pruneZpruned_all_cfgscur_cfgZprunedfuncresultZsearch_space_size_after_pruner   r`   r   
search_all4  s|  




















 


	

	

	S



r   c                 C  s   | ddsJ |d }|jdd |D ]>}d}d}|t| k rB| | }t||r8| | | d| |d7 }|d7 }|t| k s | d | }|  || d |< q| S )Nre   FTr	   r   r   )r3   rl   rK   _matchedrh   insertr
   )Zcfgsr   Zprior_strategyrR   r   Zmatched_countrz   tmpr   r   r   rp     s$   

rp   c                 C  s:   | d  | d  | d  | d  | d  | d | d  fS )Nr"   r#   r&   r$   r;   r%   r?   r   rz   r   r   r   memory_sort  s   r   c                 C  s
   | d  S )Nr%   r   r   r   r   r   performance_sort   s   
r   c              
   C  s0  ddddddddd	d
	}dddd}i }|D ]}|||| < qt |ts&J |d}d}|D ]}d }	|D ]}||r@|}	 nq5|	r|t|	 }
|	dv rr|
dkr`| ||	  dkr_|d7 }q/q/t|
}
| ||	  |
krq|d7 }q/q/|	dkr|
dkr| ||	  r|d7 }q/q/tt|
}
| ||	  |
kr|d7 }q/q/|	dkr|
dkr| |d  dkr|d7 }q/q/t|
}
| ||	  |
kr|d7 }q/q/|	dkr|
dkr|d7 }q/t|
}
| ||	  |
kr|d7 }q/q/|	d	kr|
dkr| |d  r|d7 }q/q/t|
}
||
 }| ||	  |kr|d7 }q/q/|t|krdS dS )Ndpmpppr}   r|   shardingr=   Z	recomputerS   )	r!   r"   r#   r&   r%   r$   r;   r?   rA   r   r   r   )r   r   r   r_   r   )r   r   r   r}   r   *r   r?   TF)rG   rH   split
startswithrK   intrJ   )r   rR   r~   Zgranularity_mappingZreversed_mappingrT   dimsZhas_matchedr   ZmatchedrE   rS   r   r   r   r   $  s   


r   c                 C  s   d}t | tr d|  v rttd|d }|S td| dt | trH| dd}| dd}|r5|s=td| dtt||d }|S t | trQ| }|S t | tr[| g}|S | du rddg}|S td| d	)
z3Convert a param from json file to candidates range.Nr@   r   zIllegal param found: z , only support auto in str type.minmaxz/, min and max should be specified in dict type.z,, only support str, dict, list and int type.)	rG   rH   rI   r   r4   rL   dictr3   r   )Zparam_from_json_filera   Z	param_keyZselected_rangeZcustomized_min_valueZcustomized_max_valuer   r   r   rF   x  s>   




	

rF   c                 C  s  t | }| d dd }|d usJ g }|D ]}t|d |d  |d< d|d< d|d< d|d< |d |d	  |d
< |d
 }|| d krGd}n-|| d  dkrV|| d  }ntd| d d D ]}|| dkrs|| | d krs|} nq_|| dks|J ||d< | d d |d  |d< ||vr|d | d kr|| qg }| d dd rI|D ]}t|}	| d | d  }
|
|d
  }|dkrH||	d< d|	d< d |	d< |	d |	d	  |	d  |	d
< |	d
 }|| d krd}n0|| d  dkr|| d  }n!td| d d D ]}|| dkr|| | d kr|} nq|| dks%J ||	d< |d | |	d< d|	d< ||	 t|	}d|d< || q|| |S )Nr8   r9   r!   r$   Zestimated_dp_degreer   r;   r"   r#   r   r6   r   r   r:   r+   r2   sharding_overlapFT)r   r3   r   r4   r5   rf   rg   rk   )r   r{   r9   r   taskactual_cardsnnodesr   Zsharding_all_cfgsnew_taskZgiven_num_gpusr$   Zoverlap_new_taskr   r   r   search_by_dp_estimation  s   








r   c                 C  sB  | |d d  rd}d}|D ]0}g d}d}|D ]}|| | | kr&d} nq|r>d|vr0|}q|d s>|d | d kr>|}q|dusEJ |dusKJ ||d d  }	| |d d  }
||d d  }|r|
r|	r|
|	 |	 }t t| }|D ]$}|d	r|| rd
| }t|| d|  d||< qzdS dS dS dS dS )z
    In single dp search scenario,
    the overlay acceleration ratio is obtained by automatically running overlap and non overlap tasks,
    and the estimated performance of the multi dp after overlap is obtained.
    Z
metric_cfgnameN)r!   r"   r#   r&   r%   r?   rA   r;   TFr   r$   Zbw_Zoverlap_r   rY   )rf   rg   r   keysr   round)r   r   Zhistory_cfgsZnon_overlap_cfgZraw_cfgrz   r   ZsamerT   Zbefore_overlap_performanceZoverlap_performanceZraw_performanceratioZmew_keyr   r   r   add_overlap_performance  sV   

!	r   c                 C  s  d|d vrdS t |d d }g d}|D ]}||vr(td| d| dq|d }|d	 }|d
 }d}	|dkrF|dkrF|dkrFd}	n |dkrU|dkrU|dkrUd}	n|dkrd|dkrd|dkrdd}	nd}	|	duslJ |	|v rd||	 d v rt||	 }
|
d dksJ d}||
k r||	 | t||	 |d  g}| | |d7 }||
k sdS dS d||	 d v r| ||	  dS d||	 d v rCddl}||	 d }zt|d}||}W d   n1 sw   Y  W n   td||	 d 	d}d}|dt|d  D ]}|r|| }q|| }q|r*||	 d ||d < n
||	 d ||d < |
|t||	 d d dS d||	 d v rddl}||	 d }zt|d}||}W d   n	1 smw   Y  W n   tdt||	 d }
|
d dksJ d}||
k r||	 | 	d}d}|dt|d  D ]}|r|| }q|| }q|r|d7 }||	 | ||d < n|d7 }||	 | ||d < |d7 }||
k s|
|t||	 d d dS dS dS )"Generate args of sharding overlap.r   r8   N)sharding_mpsharding_ppsharding_mp_pp
no_overlapOnly support rC   .r$   r"   r#   r   r   r   r   r   --r   r   -o.jsonr0Please check your auto tuner json whether valid.r*   w.yaml)rf   rg   rL   rK   rH   rk   jsonopenloadr   dumpyaml	safe_load)res_argsrz   r   cmdvalid_hybrid_strategyrT   r$   r"   r#   argZarg_map_lenr   Znew_argr   	file_pathfcmd_cfgr   rE   r   r   r   r   (gen_sharding_overlap_args_of_grid_search"  s   
 


#r   c                 C  sf  d|d vrdS t |d d }d|v rg d}|D ]}||vr-td| d| dq|d }|d	ks8J |d
 }|d }d}	|d	krM|d	krMd}	n|d	krX|d	krXd}	n|d	krc|d	krcd}	ndS |	duskJ |	|v rd||	 d v r| ||	  dS d||	 d v r| ||	  dS d||	 d v rddl}
||	 d }zt|d}|
|}W d   n1 sw   Y  W n   td||	 d	 d}d}|dt|d	  D ]}|r|| }q|| }q|r||	 d ||d < n
||	 d ||d < |
	|t||	 d d dS d||	 d v rddl
}||	 d }zt|d}||}W d   n	1 s<w   Y  W n   td||	 d	 d}d}|dt|d	  D ]}|rj|| }q_|| }q_|r|d r~||	 d n||	 d ||d < n|d r||	 d n||	 d ||d < |	|t||	 d d dS dS dS dS )r   r   r8   N)r   r   r   r   rC   r   r$   r   r"   r#   r   r   r   r   r   r   r   r   r   r   r*   r   r   r<   )rf   rg   rL   rk   r   r   r   r   rK   r   r   r   )r   rz   r   r   r   rT   r$   r"   r#   r   r   r   r   r   r   rE   r   r   r   r   gen_sharding_overlap_args  s   



  
r   c              	     s  t |} fdd}fdd}d|v sJ t |d  t | }g d}d|v r7|d D ]}|| q/|D ]}	||	 || ||	 ||| q9|d dd	rst |d d   D ]$}	d
 |	 d v ru| |	  qbd |	 d v r| |	  qbd |	 d v rdd	l}
 |	 d }zt|d}|
|}W d	   n1 sw   Y  W n   td |	 d 	d}d	}|d	t
|d  D ]}|r|| }q|| }q|r |	 d ||d < n
 |	 d ||d < |
|t |	 d d qbd |	 d v rdd	l} |	 d }zt|d}||}W d	   n	1 s.w   Y  W n   td |	 d 	d}d	}|d	t
|d  D ]}|r\|| }qQ|| }qQ|rp |	 d ||d < n
 |	 d ||d < ||t |	 d d qb|d dd	rЈrt |d d   D ]/}	d
 |	 d v r| |	  qd |	 d v r| |	  qd |	 d v rKdd	l}
 |	 d }zt|d}|
|}W d	   n	1 sw   Y  W n   td |	 d 	d}d	}|d	t
|d  D ]}|r|| }q|| }q|r2 |	 d ||d < n
 |	 d ||d < |
|t |	 d d qd |	 d v rdd	l} |	 d }zt|d}||}W d	   n	1 suw   Y  W n   td |	 d 	d}d	}|d	t
|d  D ]}|r|| }q|| }q|r |	 d ||d < n
 |	 d ||d < ||t |	 d d q|d d dkrt||| |S t||| |S )zGenerate new script args.c                   s8  | dkr$|  v r$d|v r|d n|d d }||d  |d  }||d< | dkrU|  v rUz"d|v r5|d n|d d }||d  |d  |d  }||d< W n   Y d S | dkrt|  v rtz|d	 d
krfd
nd}||d< W n   Y d S | dkr|  v rzd|v r|d n|d d }||d< W d S    Y d S d S d S )Nlocal_batch_sizer2   r+   r$   r!   gradient_accumulation_stepsr%   sequence_parallelr"   r   r   r   )r   Zcmgrz   r   r2   r   r   r   )r   r   r   _get_new_cfg  sR   


z"gen_new_args.<locals>._get_new_cfgc                   sl  | |v r| |v rd||  d v r+||  d t ||   ||  d< |||   d S d||  d v rN||  d d t ||   ||  d< |||   d S d||  d v rdd l}||  d }d}t||  dkrq||  d	 }zt|d
}||}	W d    n1 sw   Y  W n   td||  d d}
d }|
d t|
d  D ]}|s|	| }q|| }q|r|r|t ||   n||  ||
d < n|r|t ||   n||  |	|
d < ||	t||  d d |d 	ddr st
j||  d d d |d  d }||	t|d d S d S d S d||  d v rdd l}||  d }d}t||  dkr?||  d	 }zt|d
}||}	W d    n	1 sVw   Y  W n   td||  d d}
d }|
d t|
d  D ]}|s|	| }qy|| }qy|r|r|t ||   n||  ||
d < n|r|t ||   n||  |	|
d < ||	t||  d d |d 	ddr st
j||  d d |d  d }||	t|d d S d S d S d S | dkr| |v rd|d d v rtdd|d d v rtdd||  d v rdd l}||  d }t||  dkr4tdzt|d
}||}	W d    n	1 sKw   Y  W n   td||  d d}
d }i }|	dd }|sqd S |D ]	}|| ||< qs|
d t|
d  D ]}|s|	| }q|| }q|r|||
d < n||	|
d < ||	t||  d d |d 	ddrވ st
j||  d d |d  d }||	t|d d S d S d S d||  d v rdd l}||  d }t||  dkrtdzt|d
}||}	W d    n	1 sw   Y  W n   td||  d d}
d }i }|	dd }|s?d S |D ]	}|| ||< qA|
d t|
d  D ]}|s`|	| }qU|| }qU|rp|||
d < n||	|
d < ||	t||  d d |d 	ddr st
j||  d d |d  d }||	t|d d S d S d S d S d S d S )Nr   r   r   r   =r    r<   r   r   r   r   r*   r   run_cmdZgenerate_launch_cfgTr_   Zlog_dir_namer   rX   z;refined recompute is not supported by command in autotuner.z8refined recompute is not supported by '-o' in autotuner.z1The 3rd arg is not supported in refined_recompute)rH   rk   r   rK   r   r   rL   r   r   r3   ospathsplitextr   r   NotImplementedError)r   r   rz   r   r   r   r   prefixr   r   r   rE   rT   Znew_cmd_apthr   Z	rr_valuesrr)run_bestr   r   _gen_new_arg	  sv   $



5!z"gen_new_args.<locals>._gen_new_argr   )r!   r"   r#   r&   r%   r$   r;   r?   rA   r   r   r2   r   rX   rD   Zsearch_stageNr   r   r   r   r   r   r   r   r   r*   r   r   Zrun_best_stager8   r   grid)rf   rg   r5   r3   rk   r   r   r   rL   r   rK   r   r   r   r   r   )Zraw_argsrz   r   r   r   r   r   new_argsrT   r   r   r   r   r   r   rE   r   r   )r   r   r   gen_new_args  s   
/ =




r   c                 C  s2  t | }d|v rd|d v r|d dksJ |d dksJ |d |d  |d  }||d	 krId
dd t|D |j_|jjrGd|j_|S ||d	  dkrX||d	  }ntd|d d D ]}|| dkru|| |d	 kru|} nqa|| dks~J d
dd t|| D |j_| d| |j_|S )zGenerate new running context.r8   r9   r!   r   r;   r"   r#   r$   r6   ,c                 S     g | ]}t |qS r   rH   r\   r   r   r   r   g      zgen_new_ctx.<locals>.<listcomp>z1:1r   r   r:   c                 S  r   r   r   r\   r   r   r   r   x  r   :)rf   rg   rO   r4   argsZdevicesZmasterr   )ctxr   r   Znew_ctxr   r   r   r   r   r   gen_new_ctxW  s@   
r   workerlog.0step/sreturntuple[float, int]c              	   C  s~  	 d}| d | }t j|sdS t|d}|d | }tjdr%dnd}d}g }	| }
|
D ]4}t	||}t	||}|rad	}|d D ]}zt
|}|	| W  n   Y qG|d	usaJ |red
}q1|rpd}||d
> B }|	syd}|d
B }n,t|	dk r|	d }n!t|	dk rt|	dd	 t|	dd	  }n
t|	dd	 d }t|d}W d	   n1 sw   Y  ||f}|S )z$For extracting metric from log file.r   rB   )        r   r   !:* *(\d+(\.\d*)?)|(\d+(\.\d*)?) *Znpuzout of memoryzOut of memory error onNr   r   
   r*      	   rY   )r   r   existsr   paddleZdeviceZis_compiled_with_custom_device	readlinesrefindallfloatr5   rK   sumr   )r   filetarget_metricerr_codetarget_filer   re_metric_patternZre_out_of_memory_patternZout_of_memory_flagmetric_listlineslinemetricZout_of_memoryrE   item
metric_averesr   r   r   read_metric_log~  s\   




",r   interval_runtimec              	   C  sB  | d | }t j|sd S t|d}|d | }g }| }|D ]*}t||}	|	rLd }
|	d D ]}zt|}
||
 W  n   Y q2|
d usLJ q"|sZd }	 W d    d S t	|dk re|d }n!t	|dk r|t
|dd  t	|dd   }n
t
|d	d  d }t|d
}W d    |}|S 1 sw   Y  |}|S )NrB   r   r   r   r   r*   r   r   r   rY   )r   r   r   r   r   r   r   r   r5   rK   r   r   )r   r   r   r   r   r   r   r   r   r   rE   r   r   r   r   r   r   read_step_time_log  sJ   


"
r   max_memory_allocatedc              	   C  s   | d | }t j|sd S t|dV}|d | }g }| }|D ],}t||}	|	rNd }
|	d D ]}ztt|}
|	|
 W  n   Y q2|
d usNJ q"|sZ	 W d    d S |
  |d W  d    S 1 slw   Y  d S )NrB   r   r   r   r*   )r   r   r   r   r   r   r   r   r   r5   rl   )r   r   r   r   r   r   r   r   r   r   rE   r   r   r   r   read_allocated_memory_log  s6   

$r   tuple[float, bool]c                 C  s   t j| |}t j|sdS g }g }g }t|dK}t|}d}|s6t|}	t|	dkr4d|	v r4d}|r$|D ]%}	t|	dkr]|	\}
}}}}}|	t
|
 |	t
| |	t
| q8W d    n1 shw   Y  t|dfS )N)r   Tr   Frb   memory_usedT)r   r   rO   r   r   csvreadernextrK   r5   r   r   )r   r   log_pathr   Zutilization_gpuindicesr   r  flagrowindexZutil_gpur_   Zmem_usedr   r   r   read_memory_log  s0   
r  c              
   C  s   t | D ]U\}}}|D ]M}|dsq| d | }t j|s$  dS t|d(}d}| }|D ]}	t||	tj	}
|
rI W d     dS q2W d   n1 sTw   Y  qqdS )zi
    check if training is completed
    return:
        True: completed
        False: not completed
    	workerlogrB   Fr   zTraining completed.NT)
r   walkr   r   r   r   r   r   r   
IGNORECASE)r   rootdirsfilesr   r   r   Zre_completed_patternr   r   	completedr   r   r   read_completed"  s.   
r  	0.gpu.logtuple[float, float, int]c                 C  s   d}t | D ] \}}}|D ]}|dsqt| ||\}	}
|
r&|
d@ |B }qqt| ||\}}
|
|B }zt| |\}}|d> |B }W n
   d}d|B }Y |||fS )z
    extract metric and max memory usage from log file
    return:
        metric: average metric of last 10 steps
        memory: max memory used
        err_code: 00: no error, 01: no metric, 10: out of memory, 100: no memory log
    r   r	  r   r   rW   )r   r
  r   r   r  )r   Zmetric_filer   Zmemory_filer   r  r  r  r   r   Zmetric_flagZ
res_metricZ
res_memoryZmemory_flagr   r   r   read_log=  s&   


r  c                 C  s   g }d}t | d0}| }|dd }|D ]}t||tj}|r1d|v r,|d q|| qW d   n1 s<w   Y  tt|S )z]
    get error info from log file
    return:
        error_info: Specific error message
    Errorr   iNzOut of memory)r   r   r   r   r  r5   r   r   )filenameerror_infosZerror_patternr   r   Z
last_linesr   
error_infor   r   r   get_error_infoa  s   

r  c           	      C  s~   d}g }t | D ]\}}}|D ]}|dsqt| d | }||7 }qq	tt|}|D ]}|| d }q.|dd }|S )zp
    find error infos from log directory
    return:
        error_info: all error message on log directory
    r   r	  rB   r   Nr*   )r   r
  r   r  r   r   )	r   Zunique_error_infoZall_error_infosr  r  r  r   r  ro   r   r   r   find_error_from_logv  s   

r  c                 C  sl   g }t d| d d D ](}| | dkr3t || d d D ]}| | | dkr2|||| | | f qq|S )zAReturn the combinations of three numbers which product is target.r   r<   r   r   )r4   r5   )targetr   r   jr   r   r   three_mul_combinations  s   r  c                 C  sx   t |d }t|ddD ]+}|| dkr7|| }t |d }|| dkr.|d8 }|| dks$|||| f  S qtd)z&Return middle candidates of dp, mp, ppgUUUUUU?r   r*   g      ?r   zCannot distribute GPUs equally)r   r4   rL   )r   r   r   startr   	remainingr  r   r   r   gbs_dp_mp_pp_candidates  s   r  c                   s   i }| d }| d }|dksJ |  di  dd}|dkrft| ||\ } }dg|d< |g|d	< g|d
< |g|d< dg|d< dg|d< dg|d< dd tddD |d<  fdd|d D |d< |S )r7   r   r:   r   r+   r2   r@   r   r!   r"   r#   r$   r;   Fr?   NrA   c                 S  s   g | ]}d | qS )r   r   r\   r   r   r   r     r   z*gbs_default_candidates.<locals>.<listcomp>r   r%   c                   s   g | ]}  | qS r   r   )r   eZdp_candidateZpp_candidater   r   r     s    
)r3   r  r4   )r   rQ   r   r   r2   Zmp_candidateZsharding_degree_candidater   r!  r   gbs_default_candidates  s0   








r"  c                 C  s   | d }|d }|d }|d }|d }|d }|d }|d }|d	 }	t t||||||||	}
dddddddd	d
}g }|
D ]&}i }t|D ]
\}}|||| < qI|d |d  |d  |d< || qA|S )rV   rQ   r!   r"   r#   r%   r;   r$   r?   rA   )r   r   r   r<   rY   rW   rb   rc   r2   )r   ri   rj   rm   r5   )r   rQ   rq   rr   rs   rt   ru   rv   rw   rx   r{   r~   r   rz   r   r   r   r   r   r   gbs_search_all  sV   r#  c              
   C  s:  g }g d}ddg}t | d}t|}t|}W d   n1 s#w   Y  |D ]p}i }|D ] }	||	d}
zt|
||	< W q0 tyP   t|	 d|
 w |dd}| dv sfJ | d	| | d
k|d< |dd}|dks| tv sJ | dt d| d|dkr|nd|d< |	| q*|S )zLoad the configs from csv file.)r!   r"   r#   r&   r%   r$   r;   r?   rA   r   Nr   z must be integer, but got )truefalsez  must be true or false, but got r$  z must be one of rC   r   )
r   r   
DictReaderr   r3   r   rL   rI   rM   r5   )Zconfigs_csvZall_configsZextract_keys_integerZextract_keys_stringr   r  Zraw_configsZ
raw_configconfigZextract_keyr   r?   rA   r   r   r   load_configs_from_csv  sD   	

r(  )F)N)r   r   )r   r   )r   r   )r   r   )r   r   )r   r   r  )r   r  )+
__future__r   rf   r   ri   loggingr   r   r   Zpruner   rM   	getLoggerrn   r   r    r   rU   r   rp   r   r   r   rF   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r  r  r"  r#  r(  r   r   r   r   <module>   sh   



j  LT!S6a
S   (A)

 $3