Browse Source

Refatora Solr (#3585)

adicionar-cron-job
Edward 2 years ago
committed by GitHub
parent
commit
3efdf6a8f9
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 0
      dist/bin/upload_configset.sh
  2. 18
      docker/Dockerfile
  3. 77
      docker/docker-compose.yaml
  4. 127
      docker/solr_cli.py
  5. 30
      docker/start.sh
  6. 0
      docker/wait-for-pg.sh
  7. 8
      docker/wait-for-solr.sh
  8. 1
      requirements/requirements.txt
  9. 2
      solr/bin/solr_password.py

0
dist/bin/upload_configset.sh

18
docker/Dockerfile

@ -1,12 +1,10 @@
FROM python:3.7-slim-buster
FROM python:3.9-slim-buster
# Setup env
ENV LANG C.UTF-8
ENV LC_ALL C.UTF-8
ENV PYTHONDONTWRITEBYTECODE 1
#ENV PYTHONFAULTHANDLER 1
ENV PYTHONUNBUFFERED=1
ENV DEBIAN_FRONTEND noninteractive
ENV BUILD_PACKAGES apt-utils apt-file libpq-dev graphviz-dev build-essential git pkg-config \
@ -34,12 +32,13 @@ RUN apt-get update && \
SUDO_FORCE_REMOVE=yes apt-get purge -y --auto-remove $BUILD_PACKAGES && \
apt-get autoremove && apt-get clean && rm -rf /var/lib/apt/lists/*
ENV HOME=/var/interlegis/sapl
WORKDIR /var/interlegis/sapl/
ADD . /var/interlegis/sapl/
COPY docker/start.sh $HOME
COPY docker/check_solr.sh $HOME
COPY docker/solr_api.py $HOME
COPY docker/busy-wait.sh $HOME
COPY docker/solr_cli.py $HOME
COPY docker/wait-for-pg.sh $HOME
COPY docker/wait-for-solr.sh $HOME
COPY docker/create_admin.py $HOME
COPY docker/genkey.py $HOME
COPY docker/gunicorn_start.sh $HOME
@ -55,7 +54,8 @@ RUN rm -rf /var/interlegis/sapl/sapl/.env && \
rm -rf /var/interlegis/sapl/sapl.db
RUN chmod +x /var/interlegis/sapl/start.sh && \
chmod +x /var/interlegis/sapl/check_solr.sh && \
chmod +x /var/interlegis/sapl/wait-for-solr.sh && \
chmod +x /var/interlegis/sapl/wait-for-pg.sh && \
ln -sf /dev/stdout /var/log/nginx/access.log && \
ln -sf /dev/stderr /var/log/nginx/error.log && \
mkdir /var/log/sapl/ && touch /var/interlegis/sapl/sapl.log && \

77
docker/docker-compose.yaml

@ -0,0 +1,77 @@
version: "3.7"
services:
sapldb:
image: postgres:10.5-alpine
restart: always
container_name: postgres
labels:
NAME: "postgres"
environment:
POSTGRES_PASSWORD: sapl
POSTGRES_USER: sapl
POSTGRES_DB: sapl
PGDATA : /var/lib/postgresql/data/
volumes:
- sapldb_data:/var/lib/postgresql/data/
ports:
- "5433:5432"
networks:
- sapl-net
saplsolr:
image: solr:8.11
restart: always
command: bin/solr start -c -f
container_name: solr
labels:
NAME: "solr"
volumes:
- solr_data:/opt/solr/server/solr
- solr_configsets:/opt/solr/server/solr/configsets
ports:
- "8983:8983"
networks:
- sapl-net
sapl:
# image: interlegis/sapl:3.1.162-RC13
build:
context: ../
dockerfile: ./docker/Dockerfile
container_name: sapl
labels:
NAME: "sapl"
restart: always
environment:
ADMIN_PASSWORD: interlegis
ADMIN_EMAIL: email@dominio.net
DEBUG: 'False'
EMAIL_PORT: 587
EMAIL_USE_TLS: 'False'
EMAIL_HOST: smtp.dominio.net
EMAIL_HOST_USER: usuariosmtp
EMAIL_SEND_USER: usuariosmtp
EMAIL_HOST_PASSWORD: senhasmtp
USE_SOLR: 'True'
SOLR_COLLECTION: sapl
SOLR_URL: http://solr:solr@saplsolr:8983
IS_ZK_EMBEDDED: 'True'
TZ: America/Sao_Paulo
volumes:
- sapl_data:/var/interlegis/sapl/data
- sapl_media:/var/interlegis/sapl/media
depends_on:
- sapldb
- saplsolr
ports:
- "80:80"
networks:
- sapl-net
networks:
sapl-net:
name: sapl-net
driver: bridge
volumes:
sapldb_data:
sapl_data:
sapl_media:
solr_data:
solr_configsets:

127
docker/solr_api.py → docker/solr_cli.py

@ -1,18 +1,102 @@
from io import BytesIO
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import argparse
import os
import requests
import logging
import re
import secrets
import subprocess
import sys
import zipfile
from base64 import b64encode, b64decode
from hashlib import sha256
from io import BytesIO
from pathlib import Path
##
## Este módulo deve ser executado na raiz do projeto
##
import requests
from kazoo.client import KazooClient
#
# Este módulo deve ser executado na raiz do projeto
#
logging.basicConfig()
SECURITY_FILE_TEMPLATE = """
{
"authentication":{
"blockUnknown": true,
"class":"solr.BasicAuthPlugin",
"credentials":{"%s":"%s %s"},
"forwardCredentials": false,
"realm": "Solr Login"
},
"authorization":{
"class":"solr.RuleBasedAuthorizationPlugin",
"permissions":[{"name":"security-edit", "role":"admin"}],
"user-role":{"%s":"admin"}
}
}
"""
URL_PATTERN = 'https?://(([a-zA-Z0-9]+):([a-zA-Z0-9]+)@)?([a-zA-Z0-9.-]+)(:[0-9]{4})?'
def solr_hash_password(password: str, salt: str = None):
"""
Generates a password and salt to be used in Basic Auth Solr
password: clean text password string
salt (optional): base64 salt string
returns: sha256 hash of password and salt (both base64 strings)
"""
m = sha256()
if salt is None:
salt = secrets.token_bytes(32)
else:
salt = b64decode(salt)
m.update(salt + password.encode('utf-8'))
digest = m.digest()
m = sha256()
m.update(digest)
digest = m.digest()
cypher = b64encode(digest).decode('utf-8')
salt = b64encode(salt).decode('utf-8')
return cypher, salt
class SolrClient:
def create_security_file(username, password):
print("Creating security.json file...")
with open("security.json", "w") as f:
cypher, salt = solr_hash_password(password)
f.write(SECURITY_FILE_TEMPLATE % (username, cypher, salt, username))
print("file created!")
def upload_security_file(zk_host):
zk_port = 9983 # embedded ZK port
print(f"Uploading security file to Solr, ZK server={zk_host}:{zk_port}...")
try:
with open('security.json', 'r') as f:
data = f.read()
zk = KazooClient(hosts=f"{zk_host}:{zk_port}")
zk.start()
print("Uploading security.json file...")
if zk.exists('/security.json'):
zk.set("/security.json", str.encode(data))
else:
zk.create("/security.json", str.encode(data))
data, stat = zk.get('/security.json')
print("file uploaded!")
print(data.decode('utf-8'))
zk.stop()
except Exception as e:
print(e)
sys.exit(-1)
class SolrClient:
LIST_CONFIGSETS = "{}/solr/admin/configs?action=LIST&omitHeader=true&wt=json"
UPLOAD_CONFIGSET = "{}/solr/admin/configs?action=UPLOAD&name={}&wt=json"
LIST_COLLECTIONS = "{}/solr/admin/collections?action=LIST&wt=json"
@ -160,6 +244,22 @@ class SolrClient:
print("Num docs: %s" % num_docs)
def setup_embedded_zk(solr_url):
match = re.match(URL_PATTERN, solr_url)
if match:
_, solr_user, solr_pwd, solr_host, solr_port = match.groups()
if solr_user and solr_pwd and solr_host:
create_security_file(solr_user, solr_pwd)
upload_security_file(solr_host)
else:
print(f"Missing Solr's username, password, and host: {solr_user}/{solr_pwd}/{solr_host}")
sys.exit(-1)
else:
print(f"Solr URL path doesn't match the required format: {solr_url}")
sys.exit(-1)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Cria uma collection no Solr')
@ -178,6 +278,9 @@ if __name__ == '__main__':
parser.add_argument('-ms', type=int, dest='max_shards_per_node', nargs='?',
help='Max shards per node (default=1)', default=1)
parser.add_argument("--embedded_zk", default=False, action="store_true",
help="Embedded ZooKeeper")
try:
args = parser.parse_args()
except IOError as msg:
@ -185,10 +288,17 @@ if __name__ == '__main__':
sys.exit(-1)
url = args.url.pop()
collection = args.collection.pop()
if args.embedded_zk:
print("Setup embedded ZooKeeper...")
setup_embedded_zk(url)
collection = args.collection.pop()
client = SolrClient(url=url)
## Add --force to force upload security.json, configset upload and collection recreation
## it will clean the solr server before proceeding
## Add --clean option to clean uploadconfig and collection
if not client.exists_collection(collection):
print("Collection '%s' doesn't exists. Creating a new one..." % collection)
created = client.create_collection(collection,
@ -200,6 +310,7 @@ if __name__ == '__main__':
else:
print("Collection '%s' exists." % collection)
## Add --disable-index to disable auto index
num_docs = client.get_num_docs(collection)
if num_docs == 0:
print("Performing a full reindex of '%s' collection..." % collection)

30
docker/start.sh

@ -22,7 +22,6 @@ create_env() {
touch $FILENAME
# explicitly use '>' to erase any previous content
echo "SECRET_KEY="$KEY > $FILENAME
# now only appends
@ -39,14 +38,14 @@ create_env() {
echo "USE_SOLR = ""${USE_SOLR-False}" >> $FILENAME
echo "SOLR_COLLECTION = ""${SOLR_COLLECTION-sapl}" >> $FILENAME
echo "SOLR_URL = ""${SOLR_URL-http://localhost:8983}" >> $FILENAME
echo "IS_ZK_EMBEDDED = ""${IS_ZK_EMBEDDED-False}" >> $FILENAME
echo "[ENV FILE] done."
}
create_env
/bin/bash busy-wait.sh $DATABASE_URL
/bin/bash wait-for-pg.sh $DATABASE_URL
yes yes | python3 manage.py migrate
@ -55,39 +54,46 @@ yes yes | python3 manage.py migrate
USE_SOLR="${USE_SOLR:=False}"
SOLR_URL="${SOLR_URL:=http://localhost:8983}"
SOLR_COLLECTION="${SOLR_COLLECTION:=sapl}"
NUM_SHARDS=${NUM_SHARDS:=1}
RF=${RF:=1}
MAX_SHARDS_PER_NODE=${MAX_SHARDS_PER_NODE:=1}
IS_ZK_EMBEDDED="${IS_ZK_EMBEDDED:=False}"
if [ "${USE_SOLR-False}" == "True" ] || [ "${USE_SOLR-False}" == "true" ]; then
echo "SOLR configurations"
echo "Solr configurations"
echo "==================="
echo "URL: $SOLR_URL"
echo "COLLECTION: $SOLR_COLLECTION"
echo "NUM_SHARDS: $NUM_SHARDS"
echo "REPLICATION FACTOR: $RF"
echo "MAX SHARDS PER NODE: $MAX_SHARDS_PER_NODE"
echo "ASSUME ZK EMBEDDED: $IS_ZK_EMBEDDED"
echo "========================================="
echo "running solr script"
/bin/bash check_solr.sh $SOLR_URL
echo "running Solr script"
/bin/bash wait-for-solr.sh $SOLR_URL
CHECK_SOLR_RETURN=$?
if [ $CHECK_SOLR_RETURN == 1 ]; then
echo "Connecting to solr..."
python3 solr_api.py -u $SOLR_URL -c $SOLR_COLLECTION -s $NUM_SHARDS -rf $RF -ms $MAX_SHARDS_PER_NODE &
# python3 manage.py rebuild_index --noinput &
echo "Connecting to Solr..."
if [ "${IS_ZK_EMBEDDED-False}" == "True" ] || [ "${IS_ZK_EMBEDDED-False}" == "true" ]; then
ZK_EMBEDDED="--embedded_zk"
echo "Assuming embedded ZooKeeper instalation..."
fi
python3 solr_cli.py -u $SOLR_URL -c $SOLR_COLLECTION -s $NUM_SHARDS -rf $RF -ms $MAX_SHARDS_PER_NODE $ZK_EMBEDDED &
else
echo "Solr is offline, not possible to connect."
fi
else
echo "Suporte a SOLR não inicializado."
echo "Solr support is not initialized."
fi
echo "Criando usuário admin..."
echo "Creating admin user..."
user_created=$(python3 create_admin.py 2>&1)

0
docker/busy-wait.sh → docker/wait-for-pg.sh

8
docker/check_solr.sh → docker/wait-for-solr.sh

@ -4,10 +4,10 @@
SOLR_URL=$1
RETRY_COUNT=1
RETRY_LIMIT=4
RETRY_COUNT=0
RETRY_LIMIT=60 # wait until 1 min
echo "Waiting for solr connection at $SOLR_URL ..."
echo "Waiting for Solr connection at $SOLR_URL ..."
while [[ $RETRY_COUNT < $RETRY_LIMIT ]]; do
echo "Attempt to connect to solr: $RETRY_COUNT of $RETRY_LIMIT"
let RETRY_COUNT=RETRY_COUNT+1;
@ -18,7 +18,7 @@ while [[ $RETRY_COUNT < $RETRY_LIMIT ]]; do
echo "Solr server is up!"
exit 1
else
sleep 3
sleep 1
fi
done
echo "Solr connection failed."

1
requirements/requirements.txt

@ -33,6 +33,7 @@ PyPDF4==1.27.0
pyoai==2.5.0
Unidecode==1.1.1
whitenoise==5.1.0
kazoo==2.8.0
git+https://github.com/interlegis/trml2pdf
git+https://github.com/interlegis/django-admin-bootstrapped

2
scripts/solr_password.py → solr/bin/solr_password.py

@ -18,7 +18,7 @@ from base64 import b64encode, b64decode
##
def solr_hash_password(password: str, salt: str = None) -> (str, str):
def solr_hash_password(password: str, salt: str = None):
"""
Generates a password and salt to be used in Basic Auth Solr
Loading…
Cancel
Save