diff --git a/.gitignore b/.gitignore index 38805b350..83de47a86 100644 --- a/.gitignore +++ b/.gitignore @@ -92,3 +92,4 @@ collected_static bower bower_components media +whoosh/ \ No newline at end of file diff --git a/requirements/requirements.txt b/requirements/requirements.txt index 36ed3844b..a35827827 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -13,19 +13,23 @@ django-extensions==1.6.7 django-extra-views==0.8.0 django-filter==0.15.3 django-floppyforms==1.6.2 +django-haystack==2.5.1 django-model-utils==2.5 django-sass-processor==0.4.6 djangorestframework drfdocs easy-thumbnails==2.3 git+git://github.com/interlegis/trml2pdf.git +haystack==0.36 libsass==0.11.1 psycopg2==2.6.2 python-decouple==3.0 pytz==2016.4 pyyaml==3.11 rtyaml==0.0.3 +textract==1.5.0 unipath==1.1 python-magic==0.4.12 gunicorn==19.6.0 django-reversion==2.0.8 +whoosh==2.7.4 diff --git a/sapl/base/search_indexes.py b/sapl/base/search_indexes.py new file mode 100644 index 000000000..eb86d4f00 --- /dev/null +++ b/sapl/base/search_indexes.py @@ -0,0 +1,57 @@ +import textract + +from haystack import indexes +from sapl.materia.models import DocumentoAcessorio, MateriaLegislativa +from django.template import Context, loader + + +class DocumentoAcessorioIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField(document=True, use_template=True) + + filename = 'arquivo' + model = DocumentoAcessorio + template_name = 'materia/documentoacessorio_text.txt' + + def get_model(self): + return self.model + + def index_queryset(self, using=None): + """Used when the entire index for model is updated.""" + return self.get_model().objects.all() + + def prepare(self, obj): + if not self.filename or not self.model or not self.template_name: + raise Exception + + data = super(DocumentoAcessorioIndex, self).prepare(obj) + + arquivo = getattr(obj, self.filename) + + if arquivo: + try: + arquivo.open() + except OSError: + return self.prepared_data + + extracted_data = textract.process( + arquivo.path).decode( + 'utf-8').replace('\n', ' ') + + # Now we'll finally perform the template processing to render the + # text field with *all* of our metadata visible for templating: + t = loader.select_template(( + 'search/indexes/' + self.template_name, )) + data['text'] = t.render(Context({'object': obj, + 'extracted': extracted_data})) + + return data + + return self.prepared_data + + +class MateriaLegislativaIndex(DocumentoAcessorioIndex): + text = indexes.CharField(document=True, use_template=True) + + filename = 'texto_original' + model = MateriaLegislativa + template_name = 'materia/materialegislativa_text.txt' \ No newline at end of file diff --git a/sapl/base/templatetags/common_tags.py b/sapl/base/templatetags/common_tags.py index 9d643e70c..a908b7396 100644 --- a/sapl/base/templatetags/common_tags.py +++ b/sapl/base/templatetags/common_tags.py @@ -2,6 +2,7 @@ from compressor.utils import get_class from django import template from sapl.base.models import AppConfig +from sapl.materia.models import DocumentoAcessorio, MateriaLegislativa from sapl.parlamentares.models import Filiacao register = template.Library() @@ -124,7 +125,7 @@ def url(value): @register.filter -def cronometro_to_seconds(value): +def cronometro_to_seconds(value): if not AppConfig.attr('cronometro_' + value): return 0 @@ -137,3 +138,13 @@ def cronometro_to_seconds(value): @register.filter def to_list_pk(object_list): return [o.pk for o in object_list] + + +@register.filter +def search_get_model(object): + if type(object) == MateriaLegislativa: + return 'm' + elif type(object) == DocumentoAcessorio: + return 'd' + + return None diff --git a/sapl/base/urls.py b/sapl/base/urls.py index 361ab2cda..4e2d21f9b 100644 --- a/sapl/base/urls.py +++ b/sapl/base/urls.py @@ -99,4 +99,6 @@ urlpatterns = [ name='login'), url(r'^logout/$', views.logout, {'next_page': '/login'}, name='logout'), + url(r'^search/', include('haystack.urls')), + ] + recuperar_senha diff --git a/sapl/settings.py b/sapl/settings.py index 37646e675..d9875994b 100644 --- a/sapl/settings.py +++ b/sapl/settings.py @@ -76,12 +76,23 @@ INSTALLED_APPS = ( 'crispy_forms', 'easy_thumbnails', 'floppyforms', + 'haystack', 'sass_processor', 'rest_framework', 'reversion', + 'whoosh', ) + SAPL_APPS + +HAYSTACK_CONNECTIONS = { + 'default': { + 'ENGINE': 'haystack.backends.whoosh_backend.WhooshEngine', + 'PATH': PROJECT_DIR.child('whoosh'), + }, +} + + if DEBUG: INSTALLED_APPS += ('debug_toolbar', 'rest_framework_docs',) diff --git a/sapl/templates/search/indexes/materia/documentoacessorio_text.txt b/sapl/templates/search/indexes/materia/documentoacessorio_text.txt new file mode 100644 index 000000000..0f9218324 --- /dev/null +++ b/sapl/templates/search/indexes/materia/documentoacessorio_text.txt @@ -0,0 +1,7 @@ +{% for k, v in extracted.metadata.items %} + {% for val in v %} + {{ k }}: {{ val|safe }} + {% endfor %} +{% endfor %} + +{{ extracted|striptags|safe }} \ No newline at end of file diff --git a/sapl/templates/search/indexes/materia/materialegislativa_text.txt b/sapl/templates/search/indexes/materia/materialegislativa_text.txt new file mode 100644 index 000000000..0f9218324 --- /dev/null +++ b/sapl/templates/search/indexes/materia/materialegislativa_text.txt @@ -0,0 +1,7 @@ +{% for k, v in extracted.metadata.items %} + {% for val in v %} + {{ k }}: {{ val|safe }} + {% endfor %} +{% endfor %} + +{{ extracted|striptags|safe }} \ No newline at end of file diff --git a/sapl/templates/search/search.html b/sapl/templates/search/search.html new file mode 100644 index 000000000..e049f3a89 --- /dev/null +++ b/sapl/templates/search/search.html @@ -0,0 +1,70 @@ +{% extends 'crud/form.html' %} +{% load crispy_forms_tags %} +{% load common_tags %} + +{% block base_content %} +

Pesquisa Textual

+
+ +
+
+ +
+
+ {{ form.q|as_crispy_field }} +
+
+ +
+
+
+

Em quais tipos de documento deseja pesquisar?

+
+
+ +
+
+ {{ form.models }} +
+
+ + +
+
+ +
+
+ +
+ + {% if query %} +

Resultados

+ + {% for result in page.object_list %} + {% if result.object|search_get_model == 'm' %} +

+ {{ result.object }} +

+ + {% elif result.object|search_get_model == 'd' %} +

+ {{ result.object }} +

+ {% endif %} + + {% empty %} +

No results found.

+ {% endfor %} + + {% if page.has_previous or page.has_next %} +
+ {% if page.has_previous %}{% endif %}« Previous{% if page.has_previous %}{% endif %} + | + {% if page.has_next %}{% endif %}Next »{% if page.has_next %}{% endif %} +
+ {% endif %} + {% else %} + {# Show some example queries to run, maybe query syntax, something else? #} + {% endif %} +
+{% endblock %} \ No newline at end of file