|
@ -7,6 +7,8 @@ from haystack import indexes |
|
|
from sapl.materia.models import DocumentoAcessorio, MateriaLegislativa |
|
|
from sapl.materia.models import DocumentoAcessorio, MateriaLegislativa |
|
|
from sapl.norma.models import NormaJuridica |
|
|
from sapl.norma.models import NormaJuridica |
|
|
|
|
|
|
|
|
|
|
|
from textract.exceptions import ExtensionNotSupported |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class DocumentoAcessorioIndex(indexes.SearchIndex, indexes.Indexable): |
|
|
class DocumentoAcessorioIndex(indexes.SearchIndex, indexes.Indexable): |
|
|
text = indexes.CharField(document=True, use_template=True) |
|
|
text = indexes.CharField(document=True, use_template=True) |
|
@ -32,15 +34,19 @@ class DocumentoAcessorioIndex(indexes.SearchIndex, indexes.Indexable): |
|
|
if arquivo: |
|
|
if arquivo: |
|
|
try: |
|
|
try: |
|
|
arquivo.open() |
|
|
arquivo.open() |
|
|
|
|
|
arquivo.close() |
|
|
except OSError: |
|
|
except OSError: |
|
|
return self.prepared_data |
|
|
return self.prepared_data |
|
|
|
|
|
|
|
|
if not os.path.splitext(arquivo.path)[1][:1]: |
|
|
if not os.path.splitext(arquivo.path)[1][:1]: |
|
|
return self.prepared_data |
|
|
return self.prepared_data |
|
|
|
|
|
|
|
|
extracted_data = textract.process( |
|
|
try: |
|
|
arquivo.path).decode( |
|
|
extracted_data = textract.process( |
|
|
'utf-8').replace('\n', ' ') |
|
|
arquivo.path).decode( |
|
|
|
|
|
'utf-8').replace('\n', ' ') |
|
|
|
|
|
except ExtensionNotSupported: |
|
|
|
|
|
return self.prepared_data |
|
|
|
|
|
|
|
|
extracted_data = extracted_data.replace('\t', ' ') |
|
|
extracted_data = extracted_data.replace('\t', ' ') |
|
|
|
|
|
|
|
|