Browse Source

Fix #2642 (#2643)

pull/2648/head
Ulysses Lara 6 years ago
committed by Edward
parent
commit
baf4ad99ec
  1. 2
      requirements/requirements.txt
  2. 29
      sapl/base/search_indexes.py
  3. 5
      sapl/settings.py
  4. 3
      setup.py

2
requirements/requirements.txt

@ -27,9 +27,7 @@ WeasyPrint==44
Pillow==5.1.0 Pillow==5.1.0
gunicorn==19.9.0 gunicorn==19.9.0
textract==1.5.0
pysolr==3.6.0 pysolr==3.6.0
whoosh==2.7.4
pyoai==2.5.0 pyoai==2.5.0

29
sapl/base/search_indexes.py

@ -1,5 +1,4 @@
import os.path import os.path
import textract
import logging import logging
from django.db.models import F, Q, Value from django.db.models import F, Q, Value
@ -11,7 +10,6 @@ from haystack.constants import Indexable
from haystack.fields import CharField from haystack.fields import CharField
from haystack.indexes import SearchIndex from haystack.indexes import SearchIndex
from haystack.utils import get_model_ct_tuple from haystack.utils import get_model_ct_tuple
from textract.exceptions import ExtensionNotSupported
from sapl.compilacao.models import (STATUS_TA_IMMUTABLE_PUBLIC, from sapl.compilacao.models import (STATUS_TA_IMMUTABLE_PUBLIC,
STATUS_TA_PUBLIC, Dispositivo) STATUS_TA_PUBLIC, Dispositivo)
@ -49,19 +47,6 @@ class TextExtractField(CharField):
data = '' data = ''
return data return data
def whoosh_extraction(self, arquivo):
if arquivo.path.endswith('html') or arquivo.path.endswith('xml'):
with open(arquivo.path, 'r', encoding="utf8", errors='ignore') as f:
content = ' '.join(f.read())
return RemoveTag(content)
else:
return textract.process(
arquivo.path,
language='pt-br').decode('utf-8').replace('\n', ' ').replace(
'\t', ' ')
def print_error(self, arquivo, error): def print_error(self, arquivo, error):
msg = 'Erro inesperado processando arquivo %s erro: %s' % ( msg = 'Erro inesperado processando arquivo %s erro: %s' % (
arquivo.path, error) arquivo.path, error)
@ -80,20 +65,6 @@ class TextExtractField(CharField):
except Exception as err: except Exception as err:
print(str(err)) print(str(err))
self.print_error(arquivo, err) self.print_error(arquivo, err)
# Em ambiente de DEV utiliza-se o Whoosh
# Como ele não possui extração, faz-se uso do textract
else:
try:
self.logger.debug("Tentando whoosh_extraction no arquivo {}".format(arquivo.path))
return self.whoosh_extraction(arquivo)
self.print_error(arquivo)
except ExtensionNotSupported as err:
print(str(err))
self.logger.error(str(err))
except Exception as err:
print(str(err))
self.print_error(arquivo, str(err))
return '' return ''
def ta_extractor(self, value): def ta_extractor(self, value):

5
sapl/settings.py

@ -95,7 +95,6 @@ INSTALLED_APPS = (
'reversion_compare', 'reversion_compare',
'haystack', 'haystack',
'whoosh',
'speedinfo', 'speedinfo',
'webpack_loader', 'webpack_loader',
@ -106,8 +105,8 @@ INSTALLED_APPS = (
# Desabilita a indexação textual até encontramos uma solução para a issue # Desabilita a indexação textual até encontramos uma solução para a issue
# https://github.com/interlegis/sapl/issues/2055 # https://github.com/interlegis/sapl/issues/2055
HAYSTACK_SIGNAL_PROCESSOR = 'haystack.signals.BaseSignalProcessor' # Disable auto index HAYSTACK_SIGNAL_PROCESSOR = 'haystack.signals.BaseSignalProcessor' # Disable auto index
SEARCH_BACKEND = 'haystack.backends.whoosh_backend.WhooshEngine' SEARCH_BACKEND = ''
SEARCH_URL = ('PATH', PROJECT_DIR.child('whoosh')) SEARCH_URL = ['','']
# SOLR # SOLR
USE_SOLR = config('USE_SOLR', cast=bool, default=False) USE_SOLR = config('USE_SOLR', cast=bool, default=False)

3
setup.py

@ -36,10 +36,7 @@ install_requires = [
'unipath==1.1', 'unipath==1.1',
'WeasyPrint==44', 'WeasyPrint==44',
'gunicorn==19.9.0', 'gunicorn==19.9.0',
'textract==1.5.0',
'pysolr==3.6.0', 'pysolr==3.6.0',
'whoosh==2.7.4',
# 'git+git://github.com/interlegis/trml2pdf.git', # 'git+git://github.com/interlegis/trml2pdf.git',
# 'git+git://github.com/interlegis/django-admin-bootstrapped', # 'git+git://github.com/interlegis/django-admin-bootstrapped',

Loading…
Cancel
Save