Browse Source

Atualiza schema e adiciona script de stats

pull/2465/head
Edward Ribeiro 6 years ago
parent
commit
f35490d107
  1. 37
      solr/docs_stats.py
  2. 1
      solr/sapl_configset/conf/schema.xml

37
solr/docs_stats.py

@ -0,0 +1,37 @@
import requests
"""
Imprime quantidade de colletions, qtd de documentos por collection e
total de documentos indexados.
"""
BASE_URL='http://localhost:8983/solr'
if __name__=='__main__':
resp = requests.get(BASE_URL+'/admin/collections?action=LIST')
collections = sorted(resp.json()['collections'])
largest_col = (None,-1)
total_docs = 0
print("Collection\t\t\tNumber of documents")
print("--------------------------------------------------")
for c in collections:
r = requests.get(BASE_URL+'/{}/select?q=*:*&rows=0'.format(c))
num_docs = r.json()['response']['numFound']
total_docs += num_docs
if num_docs >= largest_col[1]:
largest_col = (c, num_docs)
print("%30s\t%6s" % (c, num_docs))
print("------------------------------------------")
print("- Number of collections: %s\n" % len(collections))
print("- Largest collection: '%s' (%s docs)\n" % largest_col)
print("- Total documents accross all collections: %s\n" % total_docs)

1
solr/sapl_configset/conf/schema.xml

@ -151,6 +151,7 @@
<field name="django_id" type="string" indexed="true" stored="true" multiValued="false"/>
<field name="_version_" type="plong" indexed="true" stored ="true"/>
<field name="text" type="text_pt" indexed="true" stored="true" multiValued="false" />
<field name="indextime" type="pdate" default="NOW" />
</fields>

Loading…
Cancel
Save