
    t3h
                         S r SSKrSSKJr  SSKJr  / SQrSrSrSr	S	\
\   4S
 jrS\
\   S	\4S jrS r\S:X  a  \" 5         gg)zj
Summarization pipeline for codebase trace descriptions.
Uses nlp.summerizer for Markdown and code files.
    N)Path)
summerizer)zapi/srczapi/tests/unitscriptszGonk/GonkUI/viewsz#trace_description_intermediate.json   
   returnc                      / n [          Hl  n[        U5      nUR                  5       (       d  M%  U R                  UR	                  S5       Vs/ s H  o3R                  5       (       d  M  UPM     sn5        Mn     U $ s  snf )z3Recursively gather all Python files from CODE_DIRS.z*.py)	CODE_DIRSr   existsextendrglobis_file)files	directorypathfs       7/root/zotify-API/scripts/trace_description_generator.pygather_filesr      s^    E	I;;==LLTZZ%7G%799;!%7GH  L Hs   A;
(A;
r   c                 Z   0 n/ nU  H  n UR                  SS9n[        UR	                  5       5      [
        :  a  UR                  U5        MG  [        R                  " U5      nU(       a  Xa[        U5      '   Ms  [        SU S35        M     U(       am  SR                  S	 U 5       5      n[        R                  " [        S
5      SS9nU(       a  U H  n	X[        U	5      '   M     U$ U H  n	[        SU	 S35        M     U$ ! [         a  n[        SU SU 35         SnAGM  SnAff = f)z8Summarize each file and handle batching for short files.utf-8encodingz[WARN] Failed to read z: Nz[WARN] Skipping z due to summarization failurez

c              3   >   #    U  H  oR                  S S9v   M     g7f)r   r   N)	read_text).0r   s     r   	<genexpr>"summarize_files.<locals>.<genexpr>?   s     #W;aKKK$A;s   zcombined_temp.py)outputsz[WARN] Skipping short file )r   	ExceptionprintlensplitBATCH_THRESHOLDappendr   summarize_codestrjoinr   )
r   	summariesshort_batchfilecontentedesccombined_textcombined_descr   s
             r   summarize_filesr0   &   s*   IK	nngn6G
 w}}/1t$ ((.#'c$i $TF*GHI! & #W;#WW"11$7I2JTXY $1#a&! !  !3A36STU ! 5  	*4&1#67	s   D
D*D%%D*c                  d   [        5       n [        S[        U 5       S35        [        U 5      n [	        [
        SSS9 n[        R                  " XSS9  S S S 5        [        S[
         S	[        U5       S
35        g ! , (       d  f       N.= f! [         a  n[        SU 35         S nAg S nAff = f)Nz[INFO] Found z Python files to summarizewr   r      )indentz"[INFO] Trace summaries written to z (z files summarized)z[ERROR] Failed to write JSON: )	r   r    r!   r0   openOUTPUT_FILEjsondumpr   )r   r(   r   r,   s       r   mainr9   N   s    NE	M#e*%?
@A&I4+sW5IIi1- 62;-r#i.AQQcde 65  4.qc2334s.   B A<'B <
B
B 
B/B**B/__main__)__doc__r7   pathlibr   nlpr   r
   r6   r#   
MIN_TOKENSlistr   dictr0   r9   __name__     r   <module>rD      sn   
   
	 4

d4j "4: "$ "P4 zF rC   