mirror of https://github.com/interlegis/sapl.git
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
71 lines
2.2 KiB
71 lines
2.2 KiB
import os.path
|
|
import textract
|
|
|
|
from django.template import Context, loader
|
|
from haystack import indexes
|
|
from sapl.materia.models import DocumentoAcessorio, MateriaLegislativa
|
|
from sapl.norma.models import NormaJuridica
|
|
|
|
|
|
class DocumentoAcessorioIndex(indexes.SearchIndex, indexes.Indexable):
|
|
text = indexes.CharField(document=True, use_template=True)
|
|
|
|
filename = 'arquivo'
|
|
model = DocumentoAcessorio
|
|
template_name = 'materia/documentoacessorio_text.txt'
|
|
|
|
def get_model(self):
|
|
return self.model
|
|
|
|
def index_queryset(self, using=None):
|
|
return self.get_model().objects.all()
|
|
|
|
def prepare(self, obj):
|
|
if not self.filename or not self.model or not self.template_name:
|
|
raise Exception
|
|
|
|
data = super(DocumentoAcessorioIndex, self).prepare(obj)
|
|
|
|
arquivo = getattr(obj, self.filename)
|
|
|
|
if arquivo:
|
|
try:
|
|
arquivo.open()
|
|
except OSError:
|
|
return self.prepared_data
|
|
|
|
if not os.path.splitext(arquivo.path)[1][:1]:
|
|
return self.prepared_data
|
|
|
|
extracted_data = textract.process(
|
|
arquivo.path).decode(
|
|
'utf-8').replace('\n', ' ')
|
|
|
|
extracted_data = extracted_data.replace('\t', ' ')
|
|
|
|
# Now we'll finally perform the template processing to render the
|
|
# text field with *all* of our metadata visible for templating:
|
|
t = loader.select_template((
|
|
'search/indexes/' + self.template_name, ))
|
|
data['text'] = t.render(Context({'object': obj,
|
|
'extracted': extracted_data}))
|
|
|
|
return data
|
|
|
|
return self.prepared_data
|
|
|
|
|
|
class MateriaLegislativaIndex(DocumentoAcessorioIndex):
|
|
text = indexes.CharField(document=True, use_template=True)
|
|
|
|
filename = 'texto_original'
|
|
model = MateriaLegislativa
|
|
template_name = 'materia/materialegislativa_text.txt'
|
|
|
|
|
|
class NormaJuridicaIndex(DocumentoAcessorioIndex):
|
|
text = indexes.CharField(document=True, use_template=True)
|
|
|
|
filename = 'texto_integral'
|
|
model = NormaJuridica
|
|
template_name = 'norma/normajuridica_text.txt'
|
|
|