Sistema de Apoio ao Processo Legislativo
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

167 lines
7.1 KiB

import requests
import subprocess
import sys
class SolrClient:
LIST_CONFIGSETS = "{}://{}:{}/solr/admin/configs?action=LIST&omitHeader=true&wt=json"
UPLOAD_CONFIGSET = "{}://{}:{}/solr/admin/configs?action=UPLOAD&name={}&wt=json"
LIST_COLLECTIONS = "{}://{}:{}/solr/admin/collections?action=LIST&wt=json"
STATUS_COLLECTION = "{}://{}:{}/solr/admin/collections?action=CLUSTERSTATUS&collection={}&wt=json"
STATUS_CORE = "{}/admin/cores?action=STATUS&name={}"
EXISTS_COLLECTION = "{}://{}:{}/solr/{}/admin/ping?wt=json"
OPTIMIZE_COLLECTION = "{}://{}:{}/solr/{}/update?optimize=true&wt=json"
CREATE_COLLECTION = "{}://{}:{}/solr/admin/collections?action=CREATE" \
"&name={}&collection.configName={}&numShards={}&replicationFactor={}&wt=json"
DELETE_COLLECTION = "{}://{}:{}/solr/admin/collections?action=DELETE&name={}&wt=json"
DELETE_DATA = "{}://{}:{}/solr/{}/update?commitWithin=1000&overwrite=true&wt=json"
CONFIGSET_NAME = "sapl_configset"
NUM_SHARDS = 1
NUM_REPLICAS = 1
def __init__(self, address='localhost', port=8983, protocol='http'):
self.protocol = protocol
self.address = address
self.port = port
def status_collection(self, collection_name):
col_url = self.STATUS_COLLECTION.format(self.protocol, self.address, self.port, collection_name)
resp = requests.get(col_url)
status_cluster = resp.json()
# TODO: test if collection exists!
shards = status_cluster['cluster']['collections'][collection_name]['shards']
num_docs = 0
deleted_docs = 0
for shard in shards.values():
for replica in shard['replicas'].values():
replica_base_url = replica['base_url']
replica_core = replica['core']
req_url = self.STATUS_CORE.format(replica_base_url, replica_core)
resp = requests.get(req_url)
data = resp.json()
# TODO: test if collection exists!
prefix = data['status'][replica_core]['index']
num_docs += prefix['numDocs']
deleted_docs += prefix['deletedDocs']
# get a single replica per shard
break
return num_docs, deleted_docs
def list_collections(self):
req_url = self.LIST_COLLECTIONS.format(self.protocol, self.address, self.port)
res = requests.get(req_url)
dic = res.json()
return dic['collections']
def exists_collection(self, collection_name):
collections = self.list_collections()
return True if collection_name in collections else False
def maybe_upload_configset(self, force=False):
req_url = self.LIST_CONFIGSETS.format(self.protocol,
self.address,
self.port)
res = requests.get(req_url)
dic = res.json()
configsets = dic['configSets']
# UPLOAD configset
if not self.CONFIGSET_NAME in configsets or force:
files = {'file': ('saplconfigset.zip',
open('./solr/sapl_configset/conf/saplconfigset.zip',
'rb'),
'application/octet-stream',
{'Expires': '0'})}
req_url = self.UPLOAD_CONFIGSET.format(self.protocol, self.address, self.port, self.CONFIGSET_NAME)
resp = requests.post(req_url, files=files)
print(resp.content)
else:
print('O %s já presente no servidor, NÃO enviando.' % self.CONFIGSET_NAME)
def create_collection(self, collection_name):
self.maybe_upload_configset()
req_url = self.CREATE_COLLECTION.format(self.protocol,
self.address,
self.port,
collection_name,
self.CONFIGSET_NAME,
self.NUM_SHARDS,
self.NUM_REPLICAS)
print(req_url)
res = requests.post(req_url)
if res.ok:
print("Collection '%s' created succesfully" % collection_name)
else:
print("Error creating collection '%s'" % collection_name)
as_json = res.json()
print("Error %s: %s" % (res.status_code, as_json['error']['msg']))
return False
return True
# TODO: redo to collections
# def optimize_collection(self, collection_name):
# req_url = self.OPTIMIZE_COLLECTION.format(self.protocol, self.address, self.port, collection_name)
# res = requests.get(req_url)
# if not res.ok:
# print("Error optimizing collection '{}'".format(collection_name))
# print("Code {}: {}".format(res.status_code, res.text))
# else:
# print("Collection '{}' optimized successfully!".format(collection_name))
def delete_collection(self, collection_name):
req_url = self.DELETE_COLLECTION.format(self.protocol, self.address, self.port, collection_name)
res = requests.post(req_url)
if not res.ok:
print("Error deleting collection '%s'", collection_name)
print("Code {}: {}".format(res.status_code, res.text))
else:
print("Collection '%s' deleted successfully!" % collection_name)
def delete_index_data(self, collection_name):
req_url = self.DELETE_DATA.format(self.protocol, self.address, self.port, collection_name)
res = requests.post(req_url,
data='<delete><query>*:*</query></delete>',
headers={'Content-Type': 'application/xml'})
if not res.ok:
print("Error deleting index for collection '%s'", collection_name)
print("Code {}: {}".format(res.status_code, res.text))
else:
print("Collection '%s' data deleted successfully!" % collection_name)
indexed, deleted = self.status_collection(collection_name)
print("Num docs: %s" % indexed)
print("Delete docs: %s" % deleted)
if __name__ == '__main__':
args = sys.argv
if len(args) < 2:
print("Usage: python3 docker_solr_init.py <collection name> <address>")
sys.exit(-1)
collection = args[1]
client = SolrClient()
if len(args) == 3:
hostname = args[2]
client = SolrClient(address=hostname)
if not client.exists_collection(collection):
print("Collection '%s' doesn't exists. Creating a new one..." % collection)
created = client.create_collection(collection)
if not created:
sys.exit(-1)
else:
print("Collection '%s' exists. Updating indexes..." % collection)
collection_data = client.status_collection(collection)
indexed, _ = client.status_collection(collection)
del _
if indexed == 0:
print("Performing a full reindex of '%s' collection..." % collection)
p = subprocess.call(["python3", "manage.py", "rebuild_index", "--noinput"])