
    nNhb                     t    d dl Z d dlZd dlZd dlmZmZ d dlmZ  e j        e	          Z
 G d d          ZdS )    N)AutoTokenizerAutoModelForCausalLM)Optionalc                   T    e Zd ZddedefdZd Zd ZdefdZ	dd
ededefdZ
d ZdS )
LLMManagerr   
model_pathgpu_idc                 t    || _         || _        d | _        d | _        d | _        |                                  d S N)r   r	   model	tokenizerdevice_initialize_model)selfr   r	   s       D:\omnillm\worker\llm_manager.py__init__zLLMManager.__init__
   s=    $
         c                 8   	 t                               d| j         d| j                    t          j                                        r| j        t          j                                        k     rat	          j        d| j                   | _        t                               dt          j        	                    | j                              n3t	          j        d          | _        t                               d           t          j                            | j                  rt                               d| j                    t          j        | j        d	          | _        t!          j        | j        d| j        j        d
k    rt          j        nt          j        | j        j        d
k    rdndd          | _        nt                               d| j                    t          j        | j                  | _        t!          j        | j        | j        j        d
k    rt          j        nt          j        | j        j        d
k    rdndd          | _        | j        j        | j        j        | j        _        | j        j        dk    r$| j                            | j                  | _        t                               d           dS # t0          $ r<}t                               d|            |                                  Y d}~dS d}~ww xY w)zInizializza il modello LLMzLoading model from z on GPU zcuda:zUsing GPU: cpuz	Using CPUzLoading from local path: T)local_files_onlycudaautoN)r   torch_dtype
device_maplow_cpu_mem_usagezLoading from HuggingFace Hub: )r   r   r   zModel loaded successfullyzError loading model: )loggerinfor   r	   torchr   is_availabledevice_countr   get_device_nameospathexistsr   from_pretrainedr   r   typefloat16float32r   	pad_token	eos_tokento	Exceptionerror_load_fallback_model)r   es     r   r   zLLMManager._initialize_model   s   .	(KKTdoTTt{TTUUU z&&(( )T[5:;R;R;T;T-T-T#l+@4;+@+@AAS%**D*DT[*Q*QSSTTTT#l511K((( w~~do.. IIIJJJ!.!>tae!f!f!f1AO%)151AV1K1KQVQ^)-)9V)C)Cvv&*  

 NT_NNOOO!.!>t!O!O1AO151AV1K1KQVQ^)-)9V)C)Cvv&*	  
 ~'/+/>+C({5((!Z]]4;77
KK344444 	( 	( 	(LL444555%%'''''''''	(s   KK 
L1LLc                 B   	 t                               d           d}t          j        |          | _        | j        j        | j        _        t          j        || j        j	        dk    rt          j        nt          j        | j        j	        dk    rdnd          | _        | j        j	        dk    r$| j                            | j                  | _        t                               d           dS # t          $ r#}t                               d	|             d}~ww xY w)
z4Carica un modello di fallback molto piccolo per testz0Attempting to load fallback model for testing...zmicrosoft/DialoGPT-smallr   r   N)r   r   r   z"Fallback model loaded successfullyzError loading fallback model: )r   r   r   r%   r   r*   r)   r   r   r&   r   r'   r(   r   r+   r,   r-   )r   fallback_modelr/   s      r   r.   zLLMManager._load_fallback_modelD   s   	KKJKKK 8N*:>JJDN'+~'?DN$-=-1[-=-G-GEMMU]%)[%5%?%?66T  DJ {5((!Z]]4;77
KK<===== 	 	 	LL=!==>>>	s   C-C1 1
D;DDreturnc                     | j         ddiS | j        t          | j                   j        t	          | j                  t          d | j                                         D                       ddS )z$Restituisce informazioni sul modelloNr-   zModel not loadedc              3   >   K   | ]}|                                 V  d S r   )numel).0ps     r   	<genexpr>z,LLMManager.get_model_info.<locals>.<genexpr>g   s*      IIAaggiiIIIIIIr   T)r   
model_typer   
parametersloaded)r   r   r&   __name__strr   sumr:   r   s    r   get_model_infozLLMManager.get_model_info^   st    :/00 /tz**3$+&&II1F1F1H1HIIIII
 
 	
r   d   prompt
max_lengthc                 n   | j         | j        dS 	 | j                            |d                              | j                  }t          j                    5  | j                             |t          |d                   |z   ddd| j        j	        |
                    | j        j                  	          }ddd           n# 1 swxY w Y   | j                            |d         d
          }|                    |          r*|t          |          d                                         }n|                                }t                              dt          |           d           |S # t"          $ r9}t                              d|            dt'          |           cY d}~S d}~ww xY w)zGenera testo dal promptNzError: Model not loadedpt)return_tensorsr      gffffff?T)rC   num_return_sequencestemperature	do_samplepad_token_idattention_mask)skip_special_tokensz
Generated z characterszError during generation: zError during text generation: )r   r   encoder+   r   r   no_gradgenerateleneos_token_idnerK   decode
startswithstripr   r   r,   r-   r=   )r   rB   rC   inputsoutputsgenerated_textresultr/   s           r   rP   zLLMManager.generatek   s   :!7,,	=^**6$*GGJJ4;WWF  	 	*--"6!9~~
:)* #"!%!<#)99T^-H#I#I .  	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 "^2271:SW2XXN ((00 0'F5;;=='--//KK=S[[===>>>M 	= 	= 	=LL8Q88999<CFF<<<<<<<<	=sD   AE1 AC8E1 CE1 CB$E1 1
F4;.F/)F4/F4c                     | j         r| ` t          j                                        r t          j                                         dS dS )zPulizia delle risorseN)r   r   r   r   empty_cacher?   s    r   cleanupzLLMManager.cleanup   sK    : 	
:""$$ 	%J""$$$$$	% 	%r   N)r   )rA   )r<   
__module____qualname__r=   intr   r   r.   dictr@   rP   r]    r   r   r   r   	   s        ! !3 ! ! ! ! !0( 0( 0(d  4
 
 
 
 
#= #=s #= #=c #= #= #= #=J% % % % %r   r   )loggingr   r"   transformersr   r   typingr   	getLoggerr<   r   r   rb   r   r   <module>rg      s      				 < < < < < < < <      		8	$	$L% L% L% L% L% L% L% L% L% L%r   