@ -3,33 +3,36 @@ import os.path 
			
		
	
		
			
				
					import  re  
			
		
	
		
			
				
					import  string  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					import  textract  
			
		
	
		
			
				
					from  django . db . models  import  Q ,  F ,  Value  
			
		
	
		
			
				
					from  django . db . models . fields  import  TextField  
			
		
	
		
			
				
					from  django . db . models . fields . files  import  FieldFile  
			
		
	
		
			
				
					from  django . db . models . functions  import  Concat  
			
		
	
		
			
				
					from  django . template  import  loader  
			
		
	
		
			
				
					from  haystack  import  indexes  
			
		
	
		
			
				
					from  haystack . constants  import  Indexable  
			
		
	
		
			
				
					from  haystack . fields  import  CharField  
			
		
	
		
			
				
					from  haystack . indexes  import  SearchIndex  
			
		
	
		
			
				
					from  haystack . utils  import  get_model_ct_tuple  
			
		
	
		
			
				
					from  textract . exceptions  import  ExtensionNotSupported  
			
		
	
		
			
				
					import  textract  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					from  sapl . compilacao . models  import  TextoArticulado ,  Dispositivo , \ 
			
		
	
		
			
				
					    STATUS_TA_PUBLIC ,  STATUS_TA_IMMUTABLE_PUBLIC  
			
		
	
		
			
				
					from  sapl . materia . models  import  DocumentoAcessorio ,  MateriaLegislativa  
			
		
	
		
			
				
					from  sapl . norma . models  import  NormaJuridica  
			
		
	
		
			
				
					from  sapl . settings  import  BASE_DIR ,  SOLR_URL  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					logger  =  logging . getLogger ( BASE_DIR . name )  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					class  DocumentoAcessorioIndex ( indexes . SearchIndex ,  indexes . Indexable ) :  
			
		
	
		
			
				
					    text  =  indexes . CharField ( document = True ,  use_template = True )  
			
		
	
		
			
				
					class  TextExtractField ( CharField ) :  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					    filename  =  ' arquivo '  
			
		
	
		
			
				
					    model  =  DocumentoAcessorio  
			
		
	
		
			
				
					    template_name  =  ' materia/documentoacessorio_text.txt '  
			
		
	
		
			
				
					    def  __init__ ( self ,  * * kwargs ) :  
			
		
	
		
			
				
					        super ( ) . __init__ ( * * kwargs )   
			
		
	
		
			
				
					        assert  self . model_attr   
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					    def  get_model ( self ) :  
			
		
	
		
			
				
					        return  self . model  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					    def  index_queryset ( self ,  using = None ) :  
			
		
	
		
			
				
					        return  self . get_model ( ) . objects . all ( )  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					    def  get_updated_field ( self ) :  
			
		
	
		
			
				
					        return  ' data_ultima_atualizacao '  
			
		
	
		
			
				
					        if  not  isinstance ( self . model_attr ,  ( list ,  tuple ) ) :  
			
		
	
		
			
				
					            self . model_attr  =  ( self . model_attr ,  )  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					    def  solr_extraction ( self ,  arquivo ) :  
			
		
	
		
			
				
					        extracted_data  =  self . _get_backend ( None ) . extract_file_contents (  
			
		
	
	
		
			
				
					
						
							
								 
						
						
							
								 
						
						
					 
				
				@ -59,71 +62,109 @@ class DocumentoAcessorioIndex(indexes.SearchIndex, indexes.Indexable): 
			
		
	
		
			
				
					        print ( msg )  
			
		
	
		
			
				
					        logger . error ( msg )  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					    def  prepare ( self ,  obj ) :  
			
		
	
		
			
				
					        if  not  self . filename  or  not  self . model  or  not  self . template_name :  
			
		
	
		
			
				
					            raise  Exception  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					        data  =  super ( DocumentoAcessorioIndex ,  self ) . prepare ( obj )  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					        arquivo  =  getattr ( obj ,  self . filename )  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					        if  arquivo :  
			
		
	
		
			
				
					            if  not  os . path . exists ( arquivo . path ) :  
			
		
	
		
			
				
					                return  self . prepared_data  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					            if  not  os . path . splitext ( arquivo . path ) [ 1 ] [ : 1 ] :  
			
		
	
		
			
				
					                return  self . prepared_data  
			
		
	
		
			
				
					    def  file_extractor ( self ,  arquivo ) :  
			
		
	
		
			
				
					        if  not  os . path . exists ( arquivo . path )  or  \ 
			
		
	
		
			
				
					                not  os . path . splitext ( arquivo . path ) [ 1 ] [ : 1 ] :  
			
		
	
		
			
				
					            return  ' '  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					        # Em ambiente de produção utiliza-se o SOLR  
			
		
	
		
			
				
					        if  SOLR_URL :  
			
		
	
		
			
				
					            try :  
			
		
	
		
			
				
					                    extracted_data  =   self . solr_extraction ( arquivo )  
			
		
	
		
			
				
					                return  self . solr_extraction ( arquivo )  
			
		
	
		
			
				
					            except  Exception :  
			
		
	
		
			
				
					                self . print_error ( arquivo )  
			
		
	
		
			
				
					                    return  self . prepared_data  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					        # Em ambiente de DEV utiliza-se o Whoosh  
			
		
	
		
			
				
					        # Como ele não possui extração, faz-se uso do textract  
			
		
	
		
			
				
					        else :  
			
		
	
		
			
				
					            try :  
			
		
	
		
			
				
					                    extracted_data  =   self . whoosh_extraction ( arquivo )  
			
		
	
		
			
				
					                return self . whoosh_extraction ( arquivo )  
			
		
	
		
			
				
					            except  ExtensionNotSupported  as  e :  
			
		
	
		
			
				
					                print ( str ( e ) )  
			
		
	
		
			
				
					                logger . error ( str ( e ) )  
			
		
	
		
			
				
					                    return  self . prepared_data  
			
		
	
		
			
				
					            except  Exception :  
			
		
	
		
			
				
					                self . print_error ( arquivo )  
			
		
	
		
			
				
					                    return  self . prepared_data  
			
		
	
		
			
				
					        return  ' '  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					    def  ta_extractor ( self ,  value ) :  
			
		
	
		
			
				
					        r  =  [ ]  
			
		
	
		
			
				
					        for  ta  in  value . filter ( privacidade__in = [  
			
		
	
		
			
				
					                STATUS_TA_PUBLIC ,  
			
		
	
		
			
				
					                STATUS_TA_IMMUTABLE_PUBLIC ] ) :  
			
		
	
		
			
				
					            dispositivos  =  Dispositivo . objects . filter (  
			
		
	
		
			
				
					                Q ( ta = ta )  |  Q ( ta_publicado = ta )  
			
		
	
		
			
				
					            ) . order_by (  
			
		
	
		
			
				
					                ' ordem '  
			
		
	
		
			
				
					            ) . annotate (  
			
		
	
		
			
				
					                rotulo_texto = Concat (  
			
		
	
		
			
				
					                    F ( ' rotulo ' ) ,  Value ( '   ' ) ,  F ( ' texto ' ) ,  
			
		
	
		
			
				
					                    output_field = TextField ( ) ,  
			
		
	
		
			
				
					                )  
			
		
	
		
			
				
					            ) . values_list (  
			
		
	
		
			
				
					                ' rotulo_texto ' ,  flat = True )  
			
		
	
		
			
				
					            r  + =  list ( filter ( lambda  x :  x . strip ( ) ,  dispositivos ) )  
			
		
	
		
			
				
					        return  '   ' . join ( r )  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					    def  extract_data ( self ,  obj ) :  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					        data  =  ' '  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					        for  attr ,  func  in  self . model_attr :  
			
		
	
		
			
				
					            if  not  hasattr ( obj ,  attr )  or  not  hasattr ( self ,  func ) :  
			
		
	
		
			
				
					                raise  Exception  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					            # Now we'll finally perform the template processing to render the  
			
		
	
		
			
				
					            # text field with *all* of our metadata visible for templating:  
			
		
	
		
			
				
					            t  =  loader . select_template ( (  
			
		
	
		
			
				
					                ' search/indexes/ '  +  self . template_name ,  ) )  
			
		
	
		
			
				
					            data [ ' text ' ]  =  t . render ( { ' object ' :  obj ,  
			
		
	
		
			
				
					                                     ' extracted ' :  extracted_data } )  
			
		
	
		
			
				
					            value  =  getattr ( obj ,  attr )  
			
		
	
		
			
				
					            if  not  value :  
			
		
	
		
			
				
					                continue  
			
		
	
		
			
				
					            data  + =  getattr ( self ,  func ) ( value )  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					        return  data  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					        return  self . prepared_data  
			
		
	
		
			
				
					    def  prepare_template ( self ,  obj ) :  
			
		
	
		
			
				
					        app_label ,  model_name  =  get_model_ct_tuple ( obj )  
			
		
	
		
			
				
					        template_names  =  [ ' search/indexes/ %s / %s _ %s .txt '  %  
			
		
	
		
			
				
					                          ( app_label ,  model_name ,  self . instance_name ) ]  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					        t  =  loader . select_template ( template_names )  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					class  MateriaLegislativaIndex ( DocumentoAcessorioIndex ) :  
			
		
	
		
			
				
					    text  =  indexes . CharField ( document = True ,  use_template = True )  
			
		
	
		
			
				
					        return  t . render ( { ' object ' :  obj ,   
			
		
	
		
			
				
					                         ' extracted ' :  self . extract_data ( obj ) }  )  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					    filename  =  ' texto_original '  
			
		
	
		
			
				
					    model  =  MateriaLegislativa  
			
		
	
		
			
				
					    template_name  =  ' materia/materialegislativa_text.txt '  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					class  DocumentoAcessorioIndex ( SearchIndex ,  Indexable ) :  
			
		
	
		
			
				
					    model  =  DocumentoAcessorio  
			
		
	
		
			
				
					    text  =  TextExtractField (  
			
		
	
		
			
				
					        document = True ,  use_template = True ,  
			
		
	
		
			
				
					        model_attr = ( ( ' arquivo ' ,  ' file_extractor ' ) ,  )  
			
		
	
		
			
				
					    )  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					    def  get_model ( self ) :  
			
		
	
		
			
				
					        return  self . model  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					    def  index_queryset ( self ,  using = None ) :  
			
		
	
		
			
				
					        return  self . get_model ( ) . objects . all ( )  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					    def  get_updated_field ( self ) :  
			
		
	
		
			
				
					        return  ' data_ultima_atualizacao '  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					class  NormaJuridicaIndex ( DocumentoAcessorioIndex ) :  
			
		
	
		
			
				
					    text  =  indexes . CharField ( document = True ,  use_template = True )  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					    filename  =  ' texto_integral '  
			
		
	
		
			
				
					    model  =  NormaJuridica  
			
		
	
		
			
				
					    template_name  =  ' norma/normajuridica_text.txt '  
			
		
	
		
			
				
					    text  =  TextExtractField (  
			
		
	
		
			
				
					        document = True ,  use_template = True ,  
			
		
	
		
			
				
					        model_attr = (  
			
		
	
		
			
				
					            ( ' texto_integral ' ,  ' file_extractor ' ) ,  
			
		
	
		
			
				
					            ( ' texto_articulado ' ,  ' ta_extractor ' )  
			
		
	
		
			
				
					        )  
			
		
	
		
			
				
					    )  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					    def  get_updated_field ( self ) :  
			
		
	
		
			
				
					        return  ' data_ultima_atualizacao '  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					class  MateriaLegislativaIndex ( DocumentoAcessorioIndex ) :  
			
		
	
		
			
				
					    model  =  MateriaLegislativa  
			
		
	
		
			
				
					    text  =  TextExtractField (  
			
		
	
		
			
				
					        document = True ,  use_template = True ,  
			
		
	
		
			
				
					        model_attr = (  
			
		
	
		
			
				
					            ( ' texto_original ' ,  ' file_extractor ' ) ,  
			
		
	
		
			
				
					            ( ' texto_articulado ' ,  ' ta_extractor ' )  
			
		
	
		
			
				
					        )  
			
		
	
		
			
				
					    )