Browse Source

Refatora Solr (#3585)

adicionar-cron-job
Edward 2 years ago
committed by GitHub
parent
commit
3efdf6a8f9
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 0
      dist/bin/upload_configset.sh
  2. 18
      docker/Dockerfile
  3. 77
      docker/docker-compose.yaml
  4. 127
      docker/solr_cli.py
  5. 30
      docker/start.sh
  6. 0
      docker/wait-for-pg.sh
  7. 8
      docker/wait-for-solr.sh
  8. 1
      requirements/requirements.txt
  9. 2
      solr/bin/solr_password.py

0
dist/bin/upload_configset.sh

18
docker/Dockerfile

@ -1,12 +1,10 @@
FROM python:3.7-slim-buster FROM python:3.9-slim-buster
# Setup env # Setup env
ENV LANG C.UTF-8 ENV LANG C.UTF-8
ENV LC_ALL C.UTF-8 ENV LC_ALL C.UTF-8
ENV PYTHONDONTWRITEBYTECODE 1 ENV PYTHONDONTWRITEBYTECODE 1
ENV PYTHONUNBUFFERED=1
#ENV PYTHONFAULTHANDLER 1
ENV DEBIAN_FRONTEND noninteractive ENV DEBIAN_FRONTEND noninteractive
ENV BUILD_PACKAGES apt-utils apt-file libpq-dev graphviz-dev build-essential git pkg-config \ ENV BUILD_PACKAGES apt-utils apt-file libpq-dev graphviz-dev build-essential git pkg-config \
@ -34,12 +32,13 @@ RUN apt-get update && \
SUDO_FORCE_REMOVE=yes apt-get purge -y --auto-remove $BUILD_PACKAGES && \ SUDO_FORCE_REMOVE=yes apt-get purge -y --auto-remove $BUILD_PACKAGES && \
apt-get autoremove && apt-get clean && rm -rf /var/lib/apt/lists/* apt-get autoremove && apt-get clean && rm -rf /var/lib/apt/lists/*
ENV HOME=/var/interlegis/sapl WORKDIR /var/interlegis/sapl/
ADD . /var/interlegis/sapl/
COPY docker/start.sh $HOME COPY docker/start.sh $HOME
COPY docker/check_solr.sh $HOME COPY docker/solr_cli.py $HOME
COPY docker/solr_api.py $HOME COPY docker/wait-for-pg.sh $HOME
COPY docker/busy-wait.sh $HOME COPY docker/wait-for-solr.sh $HOME
COPY docker/create_admin.py $HOME COPY docker/create_admin.py $HOME
COPY docker/genkey.py $HOME COPY docker/genkey.py $HOME
COPY docker/gunicorn_start.sh $HOME COPY docker/gunicorn_start.sh $HOME
@ -55,7 +54,8 @@ RUN rm -rf /var/interlegis/sapl/sapl/.env && \
rm -rf /var/interlegis/sapl/sapl.db rm -rf /var/interlegis/sapl/sapl.db
RUN chmod +x /var/interlegis/sapl/start.sh && \ RUN chmod +x /var/interlegis/sapl/start.sh && \
chmod +x /var/interlegis/sapl/check_solr.sh && \ chmod +x /var/interlegis/sapl/wait-for-solr.sh && \
chmod +x /var/interlegis/sapl/wait-for-pg.sh && \
ln -sf /dev/stdout /var/log/nginx/access.log && \ ln -sf /dev/stdout /var/log/nginx/access.log && \
ln -sf /dev/stderr /var/log/nginx/error.log && \ ln -sf /dev/stderr /var/log/nginx/error.log && \
mkdir /var/log/sapl/ && touch /var/interlegis/sapl/sapl.log && \ mkdir /var/log/sapl/ && touch /var/interlegis/sapl/sapl.log && \

77
docker/docker-compose.yaml

@ -0,0 +1,77 @@
version: "3.7"
services:
sapldb:
image: postgres:10.5-alpine
restart: always
container_name: postgres
labels:
NAME: "postgres"
environment:
POSTGRES_PASSWORD: sapl
POSTGRES_USER: sapl
POSTGRES_DB: sapl
PGDATA : /var/lib/postgresql/data/
volumes:
- sapldb_data:/var/lib/postgresql/data/
ports:
- "5433:5432"
networks:
- sapl-net
saplsolr:
image: solr:8.11
restart: always
command: bin/solr start -c -f
container_name: solr
labels:
NAME: "solr"
volumes:
- solr_data:/opt/solr/server/solr
- solr_configsets:/opt/solr/server/solr/configsets
ports:
- "8983:8983"
networks:
- sapl-net
sapl:
# image: interlegis/sapl:3.1.162-RC13
build:
context: ../
dockerfile: ./docker/Dockerfile
container_name: sapl
labels:
NAME: "sapl"
restart: always
environment:
ADMIN_PASSWORD: interlegis
ADMIN_EMAIL: email@dominio.net
DEBUG: 'False'
EMAIL_PORT: 587
EMAIL_USE_TLS: 'False'
EMAIL_HOST: smtp.dominio.net
EMAIL_HOST_USER: usuariosmtp
EMAIL_SEND_USER: usuariosmtp
EMAIL_HOST_PASSWORD: senhasmtp
USE_SOLR: 'True'
SOLR_COLLECTION: sapl
SOLR_URL: http://solr:solr@saplsolr:8983
IS_ZK_EMBEDDED: 'True'
TZ: America/Sao_Paulo
volumes:
- sapl_data:/var/interlegis/sapl/data
- sapl_media:/var/interlegis/sapl/media
depends_on:
- sapldb
- saplsolr
ports:
- "80:80"
networks:
- sapl-net
networks:
sapl-net:
name: sapl-net
driver: bridge
volumes:
sapldb_data:
sapl_data:
sapl_media:
solr_data:
solr_configsets:

127
docker/solr_api.py → docker/solr_cli.py

@ -1,18 +1,102 @@
from io import BytesIO #!/usr/bin/env python3
# -*- coding: utf-8 -*-
import argparse import argparse
import os import logging
import requests import re
import secrets
import subprocess import subprocess
import sys import sys
import zipfile import zipfile
from base64 import b64encode, b64decode
from hashlib import sha256
from io import BytesIO
from pathlib import Path from pathlib import Path
## import requests
## Este módulo deve ser executado na raiz do projeto from kazoo.client import KazooClient
##
#
# Este módulo deve ser executado na raiz do projeto
#
logging.basicConfig()
SECURITY_FILE_TEMPLATE = """
{
"authentication":{
"blockUnknown": true,
"class":"solr.BasicAuthPlugin",
"credentials":{"%s":"%s %s"},
"forwardCredentials": false,
"realm": "Solr Login"
},
"authorization":{
"class":"solr.RuleBasedAuthorizationPlugin",
"permissions":[{"name":"security-edit", "role":"admin"}],
"user-role":{"%s":"admin"}
}
}
"""
URL_PATTERN = 'https?://(([a-zA-Z0-9]+):([a-zA-Z0-9]+)@)?([a-zA-Z0-9.-]+)(:[0-9]{4})?'
def solr_hash_password(password: str, salt: str = None):
"""
Generates a password and salt to be used in Basic Auth Solr
password: clean text password string
salt (optional): base64 salt string
returns: sha256 hash of password and salt (both base64 strings)
"""
m = sha256()
if salt is None:
salt = secrets.token_bytes(32)
else:
salt = b64decode(salt)
m.update(salt + password.encode('utf-8'))
digest = m.digest()
class SolrClient: m = sha256()
m.update(digest)
digest = m.digest()
cypher = b64encode(digest).decode('utf-8')
salt = b64encode(salt).decode('utf-8')
return cypher, salt
def create_security_file(username, password):
print("Creating security.json file...")
with open("security.json", "w") as f:
cypher, salt = solr_hash_password(password)
f.write(SECURITY_FILE_TEMPLATE % (username, cypher, salt, username))
print("file created!")
def upload_security_file(zk_host):
zk_port = 9983 # embedded ZK port
print(f"Uploading security file to Solr, ZK server={zk_host}:{zk_port}...")
try:
with open('security.json', 'r') as f:
data = f.read()
zk = KazooClient(hosts=f"{zk_host}:{zk_port}")
zk.start()
print("Uploading security.json file...")
if zk.exists('/security.json'):
zk.set("/security.json", str.encode(data))
else:
zk.create("/security.json", str.encode(data))
data, stat = zk.get('/security.json')
print("file uploaded!")
print(data.decode('utf-8'))
zk.stop()
except Exception as e:
print(e)
sys.exit(-1)
class SolrClient:
LIST_CONFIGSETS = "{}/solr/admin/configs?action=LIST&omitHeader=true&wt=json" LIST_CONFIGSETS = "{}/solr/admin/configs?action=LIST&omitHeader=true&wt=json"
UPLOAD_CONFIGSET = "{}/solr/admin/configs?action=UPLOAD&name={}&wt=json" UPLOAD_CONFIGSET = "{}/solr/admin/configs?action=UPLOAD&name={}&wt=json"
LIST_COLLECTIONS = "{}/solr/admin/collections?action=LIST&wt=json" LIST_COLLECTIONS = "{}/solr/admin/collections?action=LIST&wt=json"
@ -160,6 +244,22 @@ class SolrClient:
print("Num docs: %s" % num_docs) print("Num docs: %s" % num_docs)
def setup_embedded_zk(solr_url):
match = re.match(URL_PATTERN, solr_url)
if match:
_, solr_user, solr_pwd, solr_host, solr_port = match.groups()
if solr_user and solr_pwd and solr_host:
create_security_file(solr_user, solr_pwd)
upload_security_file(solr_host)
else:
print(f"Missing Solr's username, password, and host: {solr_user}/{solr_pwd}/{solr_host}")
sys.exit(-1)
else:
print(f"Solr URL path doesn't match the required format: {solr_url}")
sys.exit(-1)
if __name__ == '__main__': if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Cria uma collection no Solr') parser = argparse.ArgumentParser(description='Cria uma collection no Solr')
@ -178,6 +278,9 @@ if __name__ == '__main__':
parser.add_argument('-ms', type=int, dest='max_shards_per_node', nargs='?', parser.add_argument('-ms', type=int, dest='max_shards_per_node', nargs='?',
help='Max shards per node (default=1)', default=1) help='Max shards per node (default=1)', default=1)
parser.add_argument("--embedded_zk", default=False, action="store_true",
help="Embedded ZooKeeper")
try: try:
args = parser.parse_args() args = parser.parse_args()
except IOError as msg: except IOError as msg:
@ -185,10 +288,17 @@ if __name__ == '__main__':
sys.exit(-1) sys.exit(-1)
url = args.url.pop() url = args.url.pop()
collection = args.collection.pop()
if args.embedded_zk:
print("Setup embedded ZooKeeper...")
setup_embedded_zk(url)
collection = args.collection.pop()
client = SolrClient(url=url) client = SolrClient(url=url)
## Add --force to force upload security.json, configset upload and collection recreation
## it will clean the solr server before proceeding
## Add --clean option to clean uploadconfig and collection
if not client.exists_collection(collection): if not client.exists_collection(collection):
print("Collection '%s' doesn't exists. Creating a new one..." % collection) print("Collection '%s' doesn't exists. Creating a new one..." % collection)
created = client.create_collection(collection, created = client.create_collection(collection,
@ -200,6 +310,7 @@ if __name__ == '__main__':
else: else:
print("Collection '%s' exists." % collection) print("Collection '%s' exists." % collection)
## Add --disable-index to disable auto index
num_docs = client.get_num_docs(collection) num_docs = client.get_num_docs(collection)
if num_docs == 0: if num_docs == 0:
print("Performing a full reindex of '%s' collection..." % collection) print("Performing a full reindex of '%s' collection..." % collection)

30
docker/start.sh

@ -22,7 +22,6 @@ create_env() {
touch $FILENAME touch $FILENAME
# explicitly use '>' to erase any previous content # explicitly use '>' to erase any previous content
echo "SECRET_KEY="$KEY > $FILENAME echo "SECRET_KEY="$KEY > $FILENAME
# now only appends # now only appends
@ -39,14 +38,14 @@ create_env() {
echo "USE_SOLR = ""${USE_SOLR-False}" >> $FILENAME echo "USE_SOLR = ""${USE_SOLR-False}" >> $FILENAME
echo "SOLR_COLLECTION = ""${SOLR_COLLECTION-sapl}" >> $FILENAME echo "SOLR_COLLECTION = ""${SOLR_COLLECTION-sapl}" >> $FILENAME
echo "SOLR_URL = ""${SOLR_URL-http://localhost:8983}" >> $FILENAME echo "SOLR_URL = ""${SOLR_URL-http://localhost:8983}" >> $FILENAME
echo "IS_ZK_EMBEDDED = ""${IS_ZK_EMBEDDED-False}" >> $FILENAME
echo "[ENV FILE] done." echo "[ENV FILE] done."
} }
create_env create_env
/bin/bash busy-wait.sh $DATABASE_URL /bin/bash wait-for-pg.sh $DATABASE_URL
yes yes | python3 manage.py migrate yes yes | python3 manage.py migrate
@ -55,39 +54,46 @@ yes yes | python3 manage.py migrate
USE_SOLR="${USE_SOLR:=False}" USE_SOLR="${USE_SOLR:=False}"
SOLR_URL="${SOLR_URL:=http://localhost:8983}" SOLR_URL="${SOLR_URL:=http://localhost:8983}"
SOLR_COLLECTION="${SOLR_COLLECTION:=sapl}" SOLR_COLLECTION="${SOLR_COLLECTION:=sapl}"
NUM_SHARDS=${NUM_SHARDS:=1} NUM_SHARDS=${NUM_SHARDS:=1}
RF=${RF:=1} RF=${RF:=1}
MAX_SHARDS_PER_NODE=${MAX_SHARDS_PER_NODE:=1} MAX_SHARDS_PER_NODE=${MAX_SHARDS_PER_NODE:=1}
IS_ZK_EMBEDDED="${IS_ZK_EMBEDDED:=False}"
if [ "${USE_SOLR-False}" == "True" ] || [ "${USE_SOLR-False}" == "true" ]; then if [ "${USE_SOLR-False}" == "True" ] || [ "${USE_SOLR-False}" == "true" ]; then
echo "SOLR configurations" echo "Solr configurations"
echo "===================" echo "==================="
echo "URL: $SOLR_URL" echo "URL: $SOLR_URL"
echo "COLLECTION: $SOLR_COLLECTION" echo "COLLECTION: $SOLR_COLLECTION"
echo "NUM_SHARDS: $NUM_SHARDS" echo "NUM_SHARDS: $NUM_SHARDS"
echo "REPLICATION FACTOR: $RF" echo "REPLICATION FACTOR: $RF"
echo "MAX SHARDS PER NODE: $MAX_SHARDS_PER_NODE" echo "MAX SHARDS PER NODE: $MAX_SHARDS_PER_NODE"
echo "ASSUME ZK EMBEDDED: $IS_ZK_EMBEDDED"
echo "=========================================" echo "========================================="
echo "running solr script" echo "running Solr script"
/bin/bash check_solr.sh $SOLR_URL /bin/bash wait-for-solr.sh $SOLR_URL
CHECK_SOLR_RETURN=$? CHECK_SOLR_RETURN=$?
if [ $CHECK_SOLR_RETURN == 1 ]; then if [ $CHECK_SOLR_RETURN == 1 ]; then
echo "Connecting to solr..." echo "Connecting to Solr..."
python3 solr_api.py -u $SOLR_URL -c $SOLR_COLLECTION -s $NUM_SHARDS -rf $RF -ms $MAX_SHARDS_PER_NODE &
# python3 manage.py rebuild_index --noinput &
if [ "${IS_ZK_EMBEDDED-False}" == "True" ] || [ "${IS_ZK_EMBEDDED-False}" == "true" ]; then
ZK_EMBEDDED="--embedded_zk"
echo "Assuming embedded ZooKeeper instalation..."
fi
python3 solr_cli.py -u $SOLR_URL -c $SOLR_COLLECTION -s $NUM_SHARDS -rf $RF -ms $MAX_SHARDS_PER_NODE $ZK_EMBEDDED &
else else
echo "Solr is offline, not possible to connect." echo "Solr is offline, not possible to connect."
fi fi
else else
echo "Suporte a SOLR não inicializado." echo "Solr support is not initialized."
fi fi
echo "Criando usuário admin..." echo "Creating admin user..."
user_created=$(python3 create_admin.py 2>&1) user_created=$(python3 create_admin.py 2>&1)

0
docker/busy-wait.sh → docker/wait-for-pg.sh

8
docker/check_solr.sh → docker/wait-for-solr.sh

@ -4,10 +4,10 @@
SOLR_URL=$1 SOLR_URL=$1
RETRY_COUNT=1 RETRY_COUNT=0
RETRY_LIMIT=4 RETRY_LIMIT=60 # wait until 1 min
echo "Waiting for solr connection at $SOLR_URL ..." echo "Waiting for Solr connection at $SOLR_URL ..."
while [[ $RETRY_COUNT < $RETRY_LIMIT ]]; do while [[ $RETRY_COUNT < $RETRY_LIMIT ]]; do
echo "Attempt to connect to solr: $RETRY_COUNT of $RETRY_LIMIT" echo "Attempt to connect to solr: $RETRY_COUNT of $RETRY_LIMIT"
let RETRY_COUNT=RETRY_COUNT+1; let RETRY_COUNT=RETRY_COUNT+1;
@ -18,7 +18,7 @@ while [[ $RETRY_COUNT < $RETRY_LIMIT ]]; do
echo "Solr server is up!" echo "Solr server is up!"
exit 1 exit 1
else else
sleep 3 sleep 1
fi fi
done done
echo "Solr connection failed." echo "Solr connection failed."

1
requirements/requirements.txt

@ -33,6 +33,7 @@ PyPDF4==1.27.0
pyoai==2.5.0 pyoai==2.5.0
Unidecode==1.1.1 Unidecode==1.1.1
whitenoise==5.1.0 whitenoise==5.1.0
kazoo==2.8.0
git+https://github.com/interlegis/trml2pdf git+https://github.com/interlegis/trml2pdf
git+https://github.com/interlegis/django-admin-bootstrapped git+https://github.com/interlegis/django-admin-bootstrapped

2
scripts/solr_password.py → solr/bin/solr_password.py

@ -18,7 +18,7 @@ from base64 import b64encode, b64decode
## ##
def solr_hash_password(password: str, salt: str = None) -> (str, str): def solr_hash_password(password: str, salt: str = None):
""" """
Generates a password and salt to be used in Basic Auth Solr Generates a password and salt to be used in Basic Auth Solr
Loading…
Cancel
Save