From 8689a62a20fbc562576aabb59ea1b016f873ee22 Mon Sep 17 00:00:00 2001
From: Gustavo274 <51173319+Gustavo274@users.noreply.github.com>
Date: Wed, 23 Feb 2022 13:13:18 -0300
Subject: [PATCH] =?UTF-8?q?Consertar=20erros=20de=20indexa=C3=A7=C3=A3o=20?=
=?UTF-8?q?do=20no=20Solr=208.9=20(#3503)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
docker/docker-compose.yml | 2 +-
requirements/requirements.txt | 2 +-
sapl/base/search_indexes.py | 11 ++++++--
solr/docs_stats.py | 37 -------------------------
solr/sapl_configset/conf/managed-schema | 4 +--
solr/sapl_configset/conf/solrconfig.xml | 2 +-
6 files changed, 13 insertions(+), 45 deletions(-)
delete mode 100644 solr/docs_stats.py
diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml
index 221aad26b..ec2051081 100644
--- a/docker/docker-compose.yml
+++ b/docker/docker-compose.yml
@@ -18,7 +18,7 @@ services:
networks:
- sapl-net
saplsolr:
- image: solr:8.3
+ image: solr:8.9
restart: always
command: bin/solr start -c -f
container_name: solr
diff --git a/requirements/requirements.txt b/requirements/requirements.txt
index 3b522725c..3feed756f 100644
--- a/requirements/requirements.txt
+++ b/requirements/requirements.txt
@@ -1,5 +1,5 @@
django==2.2.24
-django-haystack==2.8.1
+django-haystack==3.1.1
django-filter==2.4.0
djangorestframework==3.12.4
dj-database-url==0.5.0
diff --git a/sapl/base/search_indexes.py b/sapl/base/search_indexes.py
index 7e5df5737..e76f00168 100644
--- a/sapl/base/search_indexes.py
+++ b/sapl/base/search_indexes.py
@@ -38,9 +38,14 @@ class TextExtractField(CharField):
try:
with open(arquivo.path, 'rb') as f:
content = self.backend.extract_file_contents(f)
- if not content or not content['contents']:
- return ''
- data = content['contents']
+ data = ''
+ if content:
+ # update from Solr 7.5 to 8.9
+ if content['contents']:
+ data += content['contents']
+ if content['file']:
+ data += content['file']
+ return data
except Exception as e:
print('erro processando arquivo: ' % arquivo.path)
self.logger.error(arquivo.path)
diff --git a/solr/docs_stats.py b/solr/docs_stats.py
deleted file mode 100644
index 5343cfb15..000000000
--- a/solr/docs_stats.py
+++ /dev/null
@@ -1,37 +0,0 @@
-import requests
-
-"""
- Imprime quantidade de colletions, qtd de documentos por collection e
- total de documentos indexados.
-"""
-
-BASE_URL='http://localhost:8983/solr'
-
-
-if __name__=='__main__':
-
- resp = requests.get(BASE_URL+'/admin/collections?action=LIST')
-
- collections = sorted(resp.json()['collections'])
-
- largest_col = (None,-1)
- total_docs = 0
-
- print("Collection\t\t\tNumber of documents")
- print("--------------------------------------------------")
-
- for c in collections:
- r = requests.get(BASE_URL+'/{}/select?q=*:*&rows=0'.format(c))
- num_docs = r.json()['response']['numFound']
- total_docs += num_docs
-
- if num_docs >= largest_col[1]:
- largest_col = (c, num_docs)
-
- print("%30s\t%6s" % (c, num_docs))
-
- print("------------------------------------------")
- print("- Number of collections: %s\n" % len(collections))
- print("- Largest collection: '%s' (%s docs)\n" % largest_col)
- print("- Total documents accross all collections: %s\n" % total_docs)
-
diff --git a/solr/sapl_configset/conf/managed-schema b/solr/sapl_configset/conf/managed-schema
index b6ecffaaf..6b4ec2a35 100644
--- a/solr/sapl_configset/conf/managed-schema
+++ b/solr/sapl_configset/conf/managed-schema
@@ -120,7 +120,7 @@
-
+
@@ -552,7 +552,7 @@
-
+
diff --git a/solr/sapl_configset/conf/solrconfig.xml b/solr/sapl_configset/conf/solrconfig.xml
index 8f5220a80..b79103f6d 100644
--- a/solr/sapl_configset/conf/solrconfig.xml
+++ b/solr/sapl_configset/conf/solrconfig.xml
@@ -310,7 +310,7 @@
have some sort of hard autoCommit to limit the log size.
-->
- 180000
+ 300000
false