diff --git a/requirements/requirements.txt b/requirements/requirements.txt index 6a101e223..7e6189373 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -27,9 +27,7 @@ WeasyPrint==44 Pillow==5.1.0 gunicorn==19.9.0 -textract==1.5.0 pysolr==3.6.0 -whoosh==2.7.4 pyoai==2.5.0 diff --git a/sapl/base/search_indexes.py b/sapl/base/search_indexes.py index 0e0283ba8..359fbd44b 100644 --- a/sapl/base/search_indexes.py +++ b/sapl/base/search_indexes.py @@ -1,5 +1,4 @@ import os.path -import textract import logging from django.db.models import F, Q, Value @@ -11,7 +10,6 @@ from haystack.constants import Indexable from haystack.fields import CharField from haystack.indexes import SearchIndex from haystack.utils import get_model_ct_tuple -from textract.exceptions import ExtensionNotSupported from sapl.compilacao.models import (STATUS_TA_IMMUTABLE_PUBLIC, STATUS_TA_PUBLIC, Dispositivo) @@ -49,19 +47,6 @@ class TextExtractField(CharField): data = '' return data - def whoosh_extraction(self, arquivo): - - if arquivo.path.endswith('html') or arquivo.path.endswith('xml'): - with open(arquivo.path, 'r', encoding="utf8", errors='ignore') as f: - content = ' '.join(f.read()) - return RemoveTag(content) - - else: - return textract.process( - arquivo.path, - language='pt-br').decode('utf-8').replace('\n', ' ').replace( - '\t', ' ') - def print_error(self, arquivo, error): msg = 'Erro inesperado processando arquivo %s erro: %s' % ( arquivo.path, error) @@ -80,20 +65,6 @@ class TextExtractField(CharField): except Exception as err: print(str(err)) self.print_error(arquivo, err) - - # Em ambiente de DEV utiliza-se o Whoosh - # Como ele não possui extração, faz-se uso do textract - else: - try: - self.logger.debug("Tentando whoosh_extraction no arquivo {}".format(arquivo.path)) - return self.whoosh_extraction(arquivo) - self.print_error(arquivo) - except ExtensionNotSupported as err: - print(str(err)) - self.logger.error(str(err)) - except Exception as err: - print(str(err)) - self.print_error(arquivo, str(err)) return '' def ta_extractor(self, value): diff --git a/sapl/settings.py b/sapl/settings.py index dc2db1d6e..ac1209521 100644 --- a/sapl/settings.py +++ b/sapl/settings.py @@ -95,7 +95,6 @@ INSTALLED_APPS = ( 'reversion_compare', 'haystack', - 'whoosh', 'speedinfo', 'webpack_loader', @@ -106,8 +105,8 @@ INSTALLED_APPS = ( # Desabilita a indexação textual até encontramos uma solução para a issue # https://github.com/interlegis/sapl/issues/2055 HAYSTACK_SIGNAL_PROCESSOR = 'haystack.signals.BaseSignalProcessor' # Disable auto index -SEARCH_BACKEND = 'haystack.backends.whoosh_backend.WhooshEngine' -SEARCH_URL = ('PATH', PROJECT_DIR.child('whoosh')) +SEARCH_BACKEND = '' +SEARCH_URL = ['',''] # SOLR USE_SOLR = config('USE_SOLR', cast=bool, default=False) diff --git a/setup.py b/setup.py index 91f75da8e..bce1af445 100644 --- a/setup.py +++ b/setup.py @@ -36,10 +36,7 @@ install_requires = [ 'unipath==1.1', 'WeasyPrint==44', 'gunicorn==19.9.0', - - 'textract==1.5.0', 'pysolr==3.6.0', - 'whoosh==2.7.4', # 'git+git://github.com/interlegis/trml2pdf.git', # 'git+git://github.com/interlegis/django-admin-bootstrapped',