Sistema de Apoio ao Processo Legislativo
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

71 lines
2.2 KiB

import os.path
import textract
from django.template import Context, loader
from haystack import indexes
from sapl.materia.models import DocumentoAcessorio, MateriaLegislativa
from sapl.norma.models import NormaJuridica
class DocumentoAcessorioIndex(indexes.SearchIndex, indexes.Indexable):
text = indexes.CharField(document=True, use_template=True)
filename = 'arquivo'
model = DocumentoAcessorio
template_name = 'materia/documentoacessorio_text.txt'
def get_model(self):
return self.model
def index_queryset(self, using=None):
return self.get_model().objects.all()
def prepare(self, obj):
if not self.filename or not self.model or not self.template_name:
raise Exception
data = super(DocumentoAcessorioIndex, self).prepare(obj)
arquivo = getattr(obj, self.filename)
if arquivo:
try:
arquivo.open()
except OSError:
return self.prepared_data
if not os.path.splitext(arquivo.path)[1][:1]:
return self.prepared_data
extracted_data = textract.process(
arquivo.path).decode(
'utf-8').replace('\n', ' ')
extracted_data = extracted_data.replace('\t', ' ')
# Now we'll finally perform the template processing to render the
# text field with *all* of our metadata visible for templating:
t = loader.select_template((
'search/indexes/' + self.template_name, ))
data['text'] = t.render(Context({'object': obj,
'extracted': extracted_data}))
return data
return self.prepared_data
class MateriaLegislativaIndex(DocumentoAcessorioIndex):
text = indexes.CharField(document=True, use_template=True)
filename = 'texto_original'
model = MateriaLegislativa
template_name = 'materia/materialegislativa_text.txt'
class NormaJuridicaIndex(DocumentoAcessorioIndex):
text = indexes.CharField(document=True, use_template=True)
filename = 'texto_integral'
model = NormaJuridica
template_name = 'norma/normajuridica_text.txt'