mirror of https://github.com/interlegis/sapl.git
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
359 lines
13 KiB
359 lines
13 KiB
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
import argparse
|
|
import datetime
|
|
import logging
|
|
import re
|
|
import secrets
|
|
import subprocess
|
|
import sys
|
|
import zipfile
|
|
from base64 import b64encode, b64decode
|
|
from hashlib import sha256
|
|
from io import BytesIO
|
|
from pathlib import Path
|
|
|
|
import requests
|
|
from kazoo.client import KazooClient
|
|
|
|
#
|
|
# Este módulo deve ser executado na raiz do projeto
|
|
#
|
|
|
|
logging.basicConfig()
|
|
|
|
SECURITY_FILE_TEMPLATE = """
|
|
{
|
|
"authentication":{
|
|
"blockUnknown": true,
|
|
"class":"solr.BasicAuthPlugin",
|
|
"credentials":{"%s":"%s %s"},
|
|
"forwardCredentials": false,
|
|
"realm": "Solr Login"
|
|
},
|
|
"authorization":{
|
|
"class":"solr.RuleBasedAuthorizationPlugin",
|
|
"permissions":[{"name":"security-edit", "role":"admin"}],
|
|
"user-role":{"%s":"admin"}
|
|
}
|
|
}
|
|
"""
|
|
|
|
URL_PATTERN = 'https?://(([a-zA-Z0-9]+):([a-zA-Z0-9]+)@)?([a-zA-Z0-9.-]+)(:[0-9]{4})?'
|
|
|
|
|
|
def solr_hash_password(password: str, salt: str = None):
|
|
"""
|
|
Generates a password and salt to be used in Basic Auth Solr
|
|
|
|
password: clean text password string
|
|
salt (optional): base64 salt string
|
|
returns: sha256 hash of password and salt (both base64 strings)
|
|
"""
|
|
m = sha256()
|
|
if salt is None:
|
|
salt = secrets.token_bytes(32)
|
|
else:
|
|
salt = b64decode(salt)
|
|
m.update(salt + password.encode('utf-8'))
|
|
digest = m.digest()
|
|
|
|
m = sha256()
|
|
m.update(digest)
|
|
digest = m.digest()
|
|
|
|
cypher = b64encode(digest).decode('utf-8')
|
|
salt = b64encode(salt).decode('utf-8')
|
|
return cypher, salt
|
|
|
|
|
|
def create_security_file(username, password):
|
|
print("Creating security.json file...")
|
|
with open("security.json", "w") as f:
|
|
cypher, salt = solr_hash_password(password)
|
|
f.write(SECURITY_FILE_TEMPLATE % (username, cypher, salt, username))
|
|
print("file created!")
|
|
|
|
|
|
def upload_security_file(zk_host):
|
|
zk_port = 9983 # embedded ZK port
|
|
print(f"Uploading security file to Solr, ZK server={zk_host}:{zk_port}...")
|
|
try:
|
|
with open('security.json', 'r') as f:
|
|
data = f.read()
|
|
zk = KazooClient(hosts=f"{zk_host}:{zk_port}")
|
|
zk.start()
|
|
print("Uploading security.json file...")
|
|
if zk.exists('/security.json'):
|
|
zk.set("/security.json", str.encode(data))
|
|
else:
|
|
zk.create("/security.json", str.encode(data))
|
|
data, stat = zk.get('/security.json')
|
|
print("file uploaded!")
|
|
print(data.decode('utf-8'))
|
|
zk.stop()
|
|
except Exception as e:
|
|
print(e)
|
|
sys.exit(-1)
|
|
|
|
|
|
class SolrClient:
|
|
LIST_CONFIGSETS = "{}/solr/admin/configs?action=LIST&omitHeader=true&wt=json"
|
|
UPLOAD_CONFIGSET = "{}/solr/admin/configs?action=UPLOAD&name={}&wt=json"
|
|
LIST_COLLECTIONS = "{}/solr/admin/collections?action=LIST&wt=json"
|
|
STATUS_COLLECTION = "{}/solr/admin/collections?action=CLUSTERSTATUS" \
|
|
"&collection={}&wt=json"
|
|
STATUS_CORE = "{}/admin/cores?action=STATUS&name={}"
|
|
EXISTS_COLLECTION = "{}/solr/{}/admin/ping?wt=json"
|
|
OPTIMIZE_COLLECTION = "{}/solr/{}/update?optimize=true&wt=json"
|
|
CREATE_COLLECTION = "{}/solr/admin/collections?action=CREATE&name={}" \
|
|
"&collection.configName={}&numShards={}" \
|
|
"&replicationFactor={}&maxShardsPerNode={}&wt=json"
|
|
DELETE_COLLECTION = "{}/solr/admin/collections?action=DELETE&name={}&wt=json"
|
|
DELETE_DATA = "{}/solr/{}/update?commitWithin=1000&overwrite=true&wt=json"
|
|
QUERY_DATA = "{}/solr/{}/select?q=*:*"
|
|
REBUILD_INDEX = "{}/solr/{}/dataimport?command=full-import&wt=json"
|
|
UPDATE_INDEX = "{}/solr/{}/dataimport?command=delta-import&wt=json"
|
|
|
|
CONFIGSET_NAME = "sapl_configset"
|
|
|
|
CONFIGSET_PATH = "./solr/sapl_configset/conf"
|
|
|
|
def __init__(self, url):
|
|
self.url = url
|
|
|
|
def get_num_docs(self, collection_name):
|
|
final_url = self.QUERY_DATA.format(self.url, collection_name)
|
|
res = requests.get(final_url)
|
|
if res.ok:
|
|
try:
|
|
dic = res.json()
|
|
return dic["response"]["numFound"]
|
|
except Exception as e:
|
|
print(F"Erro no get_num_docs. Erro: {e}")
|
|
print(res.content)
|
|
|
|
return 0
|
|
|
|
def list_collections(self):
|
|
req_url = self.LIST_COLLECTIONS.format(self.url)
|
|
res = requests.get(req_url)
|
|
try:
|
|
dic = res.json()
|
|
return dic['collections']
|
|
except Exception as e:
|
|
print(F"Erro no list_collections. Erro: {e}")
|
|
print(res.content)
|
|
return 0
|
|
|
|
def exists_collection(self, collection_name):
|
|
collections = self.list_collections()
|
|
return True if collection_name in collections else False
|
|
|
|
def zip_configset(self):
|
|
try:
|
|
base_path = Path(self.CONFIGSET_PATH).expanduser().resolve(strict=True)
|
|
|
|
# zip files in memory
|
|
_zipfile = BytesIO()
|
|
with zipfile.ZipFile(_zipfile, 'w', zipfile.ZIP_DEFLATED) as zipf:
|
|
for file in base_path.rglob('*'):
|
|
zipf.write(file, file.relative_to(base_path))
|
|
return _zipfile
|
|
except Exception as e:
|
|
print(e)
|
|
raise e
|
|
|
|
def maybe_upload_configset(self, force=False):
|
|
req_url = self.LIST_CONFIGSETS.format(self.url)
|
|
res = requests.get(req_url)
|
|
try:
|
|
dic = res.json()
|
|
configsets = dic['configSets']
|
|
except Exception as e:
|
|
print(F"Erro ao configurar configsets. Erro: {e}")
|
|
print(res.content)
|
|
|
|
# UPLOAD configset
|
|
if not self.CONFIGSET_NAME in configsets or force:
|
|
|
|
# GENERATE in memory configset
|
|
configset_zip = self.zip_configset()
|
|
data = configset_zip.getvalue()
|
|
configset_zip.close()
|
|
|
|
files = {'file': ('saplconfigset.zip',
|
|
data,
|
|
'application/octet-stream',
|
|
{'Expires': '0'})}
|
|
|
|
req_url = self.UPLOAD_CONFIGSET.format(self.url, self.CONFIGSET_NAME)
|
|
|
|
resp = requests.post(req_url, files=files)
|
|
print(resp.content)
|
|
|
|
else:
|
|
print('O %s já presente no servidor, NÃO enviando.' % self.CONFIGSET_NAME)
|
|
|
|
def create_collection(self, collection_name, shards=1, replication_factor=1, max_shards_per_node=1):
|
|
self.maybe_upload_configset()
|
|
req_url = self.CREATE_COLLECTION.format(self.url,
|
|
collection_name,
|
|
self.CONFIGSET_NAME,
|
|
shards,
|
|
replication_factor,
|
|
max_shards_per_node)
|
|
res = requests.post(req_url)
|
|
if res.ok:
|
|
print("Collection '%s' created succesfully" % collection_name)
|
|
else:
|
|
print("Error creating collection '%s'" % collection_name)
|
|
try:
|
|
as_json = res.json()
|
|
print("Error %s: %s" % (res.status_code, as_json['error']['msg']))
|
|
except Exception as e:
|
|
print(F"Erro ao verificar erro na resposta. Erro: {e}")
|
|
print(res.content)
|
|
return False
|
|
return True
|
|
|
|
def delete_collection(self, collection_name):
|
|
if collection_name == '*':
|
|
collections = self.list_collections()
|
|
else:
|
|
collections = [collection_name]
|
|
|
|
for c in collections:
|
|
req_url = self.DELETE_COLLECTION.format(self.url, c)
|
|
res = requests.post(req_url)
|
|
if not res.ok:
|
|
print("Error deleting collection '%s'", c)
|
|
print("Code {}: {}".format(res.status_code, res.text))
|
|
else:
|
|
print("Collection '%s' deleted successfully!" % c)
|
|
|
|
def delete_index_data(self, collection_name):
|
|
req_url = self.DELETE_DATA.format(self.url, collection_name)
|
|
res = requests.post(req_url,
|
|
data='<delete><query>*:*</query></delete>',
|
|
headers={'Content-Type': 'application/xml'})
|
|
if not res.ok:
|
|
print("Error deleting index for collection '%s'", collection_name)
|
|
print("Code {}: {}".format(res.status_code, res.text))
|
|
else:
|
|
print("Collection '%s' data deleted successfully!" % collection_name)
|
|
|
|
num_docs = self.get_num_docs(collection_name)
|
|
print("Num docs: %s" % num_docs)
|
|
|
|
def update_index_last_day(self, collection_name):
|
|
date = (datetime.now() - datetime.timedelta(days=1)).strftime('%Y-%m-%dT%H:%M:%SZ')
|
|
now = datetime.now().strftime('%Y-%m-%dT%H:%M:%SZ')
|
|
|
|
req_url = self.UPDATE_INDEX.format(self.url, collection_name)
|
|
res = requests.post(req_url,
|
|
data='<update><query>*:[%s TO %s]</query></update>' % date % now,
|
|
headers={'Content-Type': 'application/xml'})
|
|
if not res.ok:
|
|
print("Error updating index for collection '%s'", collection_name)
|
|
print("Code {}: {}".format(res.status_code, res.text))
|
|
else:
|
|
print("Collection '%s' data updated successfully!" % collection_name)
|
|
|
|
num_docs = self.get_num_docs(collection_name)
|
|
print("Num docs: %s" % num_docs)
|
|
|
|
def rebuild_index(self, collection_name):
|
|
req_url = self.REBUILD_INDEX.format(self.url, collection_name)
|
|
res = requests.post(req_url)
|
|
if not res.ok:
|
|
print("Error rebuilding index for collection '%s'", collection_name)
|
|
print("Code {}: {}".format(res.status_code, res.text))
|
|
else:
|
|
print("Collection '%s' index rebuilt successfully!" % collection_name)
|
|
|
|
num_docs = self.get_num_docs(collection_name)
|
|
print("Num docs: %s" % num_docs)
|
|
|
|
|
|
def setup_embedded_zk(solr_url):
|
|
match = re.match(URL_PATTERN, solr_url)
|
|
if match:
|
|
_, solr_user, solr_pwd, solr_host, solr_port = match.groups()
|
|
|
|
if solr_user and solr_pwd and solr_host:
|
|
create_security_file(solr_user, solr_pwd)
|
|
upload_security_file(solr_host)
|
|
else:
|
|
print(f"Missing Solr's username, password, and host: {solr_user}/{solr_pwd}/{solr_host}")
|
|
sys.exit(-1)
|
|
else:
|
|
print(f"Solr URL path doesn't match the required format: {solr_url}")
|
|
sys.exit(-1)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
parser = argparse.ArgumentParser(description='Cria uma collection no Solr')
|
|
|
|
# required arguments
|
|
parser.add_argument('-u', type=str, metavar='URL', nargs=1, dest='url',
|
|
required=True, help='Endereço do servidor Solr na forma http(s)://<address>[:port]')
|
|
parser.add_argument('-c', type=str, metavar='COLLECTION', dest='collection', nargs=1,
|
|
required=True, help='Collection Solr a ser criada')
|
|
|
|
# optional arguments
|
|
parser.add_argument('-s', type=int, dest='shards', nargs='?',
|
|
help='Number of shards (default=1)', default=1)
|
|
parser.add_argument('-rf', type=int, dest='replication_factor', nargs='?',
|
|
help='Replication factor (default=1)', default=1)
|
|
parser.add_argument('-ms', type=int, dest='max_shards_per_node', nargs='?',
|
|
help='Max shards per node (default=1)', default=1)
|
|
parser.add_argument("--embedded_zk", default=False, action="store_true",
|
|
help="Embedded ZooKeeper")
|
|
parser.add_argument("--rebuild_index", default=False, action="store_true",)
|
|
parser.add_argument("--update_index", default=False, action="store_true",)
|
|
|
|
try:
|
|
args = parser.parse_args()
|
|
except IOError as msg:
|
|
parser.error(str(msg))
|
|
sys.exit(-1)
|
|
|
|
url = args.url.pop()
|
|
|
|
if args.embedded_zk:
|
|
print("Setup embedded ZooKeeper...")
|
|
setup_embedded_zk(url)
|
|
|
|
collection = args.collection.pop()
|
|
client = SolrClient(url=url)
|
|
|
|
## Add --force to force upload security.json, configset upload and collection recreation
|
|
## it will clean the solr server before proceeding
|
|
## Add --clean option to clean uploadconfig and collection
|
|
if not client.exists_collection(collection):
|
|
print("Collection '%s' doesn't exists. Creating a new one..." % collection)
|
|
created = client.create_collection(collection,
|
|
shards=args.shards,
|
|
replication_factor=args.replication_factor,
|
|
max_shards_per_node=args.max_shards_per_node)
|
|
if not created:
|
|
sys.exit(-1)
|
|
else:
|
|
print("Collection '%s' exists." % collection)
|
|
|
|
## Add --disable-index to disable auto index
|
|
num_docs = client.get_num_docs(collection)
|
|
if num_docs == 0:
|
|
print("Performing a full reindex of '%s' collection..." % collection)
|
|
p = subprocess.call(["python3", "manage.py", "rebuild_index", "--noinput"])
|
|
|
|
if args.rebuild_index:
|
|
print("Rebuilding index of '%s' collection..." % collection)
|
|
client.rebuild_index(collection)
|
|
|
|
if args.update_index:
|
|
print("Updating index of '%s' collection..." % collection)
|
|
client.update_index_last_day(collection)
|
|
|
|
|