From 8689a62a20fbc562576aabb59ea1b016f873ee22 Mon Sep 17 00:00:00 2001 From: Gustavo274 <51173319+Gustavo274@users.noreply.github.com> Date: Wed, 23 Feb 2022 13:13:18 -0300 Subject: [PATCH] =?UTF-8?q?Consertar=20erros=20de=20indexa=C3=A7=C3=A3o=20?= =?UTF-8?q?do=20no=20Solr=208.9=20(#3503)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docker/docker-compose.yml | 2 +- requirements/requirements.txt | 2 +- sapl/base/search_indexes.py | 11 ++++++-- solr/docs_stats.py | 37 ------------------------- solr/sapl_configset/conf/managed-schema | 4 +-- solr/sapl_configset/conf/solrconfig.xml | 2 +- 6 files changed, 13 insertions(+), 45 deletions(-) delete mode 100644 solr/docs_stats.py diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 221aad26b..ec2051081 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -18,7 +18,7 @@ services: networks: - sapl-net saplsolr: - image: solr:8.3 + image: solr:8.9 restart: always command: bin/solr start -c -f container_name: solr diff --git a/requirements/requirements.txt b/requirements/requirements.txt index 3b522725c..3feed756f 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -1,5 +1,5 @@ django==2.2.24 -django-haystack==2.8.1 +django-haystack==3.1.1 django-filter==2.4.0 djangorestframework==3.12.4 dj-database-url==0.5.0 diff --git a/sapl/base/search_indexes.py b/sapl/base/search_indexes.py index 7e5df5737..e76f00168 100644 --- a/sapl/base/search_indexes.py +++ b/sapl/base/search_indexes.py @@ -38,9 +38,14 @@ class TextExtractField(CharField): try: with open(arquivo.path, 'rb') as f: content = self.backend.extract_file_contents(f) - if not content or not content['contents']: - return '' - data = content['contents'] + data = '' + if content: + # update from Solr 7.5 to 8.9 + if content['contents']: + data += content['contents'] + if content['file']: + data += content['file'] + return data except Exception as e: print('erro processando arquivo: ' % arquivo.path) self.logger.error(arquivo.path) diff --git a/solr/docs_stats.py b/solr/docs_stats.py deleted file mode 100644 index 5343cfb15..000000000 --- a/solr/docs_stats.py +++ /dev/null @@ -1,37 +0,0 @@ -import requests - -""" - Imprime quantidade de colletions, qtd de documentos por collection e - total de documentos indexados. -""" - -BASE_URL='http://localhost:8983/solr' - - -if __name__=='__main__': - - resp = requests.get(BASE_URL+'/admin/collections?action=LIST') - - collections = sorted(resp.json()['collections']) - - largest_col = (None,-1) - total_docs = 0 - - print("Collection\t\t\tNumber of documents") - print("--------------------------------------------------") - - for c in collections: - r = requests.get(BASE_URL+'/{}/select?q=*:*&rows=0'.format(c)) - num_docs = r.json()['response']['numFound'] - total_docs += num_docs - - if num_docs >= largest_col[1]: - largest_col = (c, num_docs) - - print("%30s\t%6s" % (c, num_docs)) - - print("------------------------------------------") - print("- Number of collections: %s\n" % len(collections)) - print("- Largest collection: '%s' (%s docs)\n" % largest_col) - print("- Total documents accross all collections: %s\n" % total_docs) - diff --git a/solr/sapl_configset/conf/managed-schema b/solr/sapl_configset/conf/managed-schema index b6ecffaaf..6b4ec2a35 100644 --- a/solr/sapl_configset/conf/managed-schema +++ b/solr/sapl_configset/conf/managed-schema @@ -120,7 +120,7 @@ - + @@ -552,7 +552,7 @@ - + diff --git a/solr/sapl_configset/conf/solrconfig.xml b/solr/sapl_configset/conf/solrconfig.xml index 8f5220a80..b79103f6d 100644 --- a/solr/sapl_configset/conf/solrconfig.xml +++ b/solr/sapl_configset/conf/solrconfig.xml @@ -310,7 +310,7 @@ have some sort of hard autoCommit to limit the log size. --> - 180000 + 300000 false