From f35490d10721c0fbf78b0c11f909af4f3ea232ea Mon Sep 17 00:00:00 2001 From: Edward Ribeiro Date: Mon, 11 Feb 2019 18:36:46 -0200 Subject: [PATCH] Atualiza schema e adiciona script de stats --- solr/docs_stats.py | 37 +++++++++++++++++++++++++++++ solr/sapl_configset/conf/schema.xml | 1 + 2 files changed, 38 insertions(+) create mode 100644 solr/docs_stats.py diff --git a/solr/docs_stats.py b/solr/docs_stats.py new file mode 100644 index 000000000..5343cfb15 --- /dev/null +++ b/solr/docs_stats.py @@ -0,0 +1,37 @@ +import requests + +""" + Imprime quantidade de colletions, qtd de documentos por collection e + total de documentos indexados. +""" + +BASE_URL='http://localhost:8983/solr' + + +if __name__=='__main__': + + resp = requests.get(BASE_URL+'/admin/collections?action=LIST') + + collections = sorted(resp.json()['collections']) + + largest_col = (None,-1) + total_docs = 0 + + print("Collection\t\t\tNumber of documents") + print("--------------------------------------------------") + + for c in collections: + r = requests.get(BASE_URL+'/{}/select?q=*:*&rows=0'.format(c)) + num_docs = r.json()['response']['numFound'] + total_docs += num_docs + + if num_docs >= largest_col[1]: + largest_col = (c, num_docs) + + print("%30s\t%6s" % (c, num_docs)) + + print("------------------------------------------") + print("- Number of collections: %s\n" % len(collections)) + print("- Largest collection: '%s' (%s docs)\n" % largest_col) + print("- Total documents accross all collections: %s\n" % total_docs) + diff --git a/solr/sapl_configset/conf/schema.xml b/solr/sapl_configset/conf/schema.xml index 597033929..e230533e5 100644 --- a/solr/sapl_configset/conf/schema.xml +++ b/solr/sapl_configset/conf/schema.xml @@ -151,6 +151,7 @@ +