From 595613f78bd1ec732c5d160cd72d4a15eeda72ee Mon Sep 17 00:00:00 2001 From: joao Date: Thu, 23 Mar 2023 22:12:43 -0400 Subject: [PATCH 1/2] cria cronjob com horario randomico para atualizar indices do solr --- docker/Dockerfile | 8 ++++++++ docker/solr_cli.py | 44 +++++++++++++++++++++++++++++++++++++++++++- docker/start.sh | 15 +++++++++++++++ 3 files changed, 66 insertions(+), 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index a40bd91fe..8b70e0823 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -27,6 +27,8 @@ RUN apt-get update && \ apt-get install -y --no-install-recommends $BUILD_PACKAGES $RUN_PACKAGES && \ fc-cache -fv && \ pip3 install --no-cache-dir --upgrade pip setuptools && \ + apt-get install cron -y && \ + apk add --no-cache dcron && \ rm -f /etc/nginx/conf.d/* && \ pip install --no-cache-dir -r /var/interlegis/sapl/requirements/dev-requirements.txt --upgrade setuptools && \ SUDO_FORCE_REMOVE=yes apt-get purge -y --auto-remove $BUILD_PACKAGES && \ @@ -71,3 +73,9 @@ EXPOSE 80/tcp 443/tcp VOLUME ["/var/interlegis/sapl/data", "/var/interlegis/sapl/media"] CMD ["/var/interlegis/sapl/start.sh"] + +COPY cronjob /etc/cron.d/rebuild_solr_index +RUN chmod 0644 /etc/cron.d/rebuild_solr_index +RUN crontab /etc/cron.d/rebuild_solr_index +RUN touch /var/log/cron.log +CMD cron && tail -f /var/log/cron.log diff --git a/docker/solr_cli.py b/docker/solr_cli.py index d452d1fe9..87d380b4b 100755 --- a/docker/solr_cli.py +++ b/docker/solr_cli.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- import argparse +import datetime import logging import re import secrets @@ -111,6 +112,8 @@ class SolrClient: DELETE_COLLECTION = "{}/solr/admin/collections?action=DELETE&name={}&wt=json" DELETE_DATA = "{}/solr/{}/update?commitWithin=1000&overwrite=true&wt=json" QUERY_DATA = "{}/solr/{}/select?q=*:*" + REBUILD_INDEX = "{}/solr/{}/dataimport?command=full-import&wt=json" + UPDATE_INDEX = "{}/solr/{}/dataimport?command=delta-import&wt=json" CONFIGSET_NAME = "sapl_configset" @@ -243,6 +246,35 @@ class SolrClient: num_docs = self.get_num_docs(collection_name) print("Num docs: %s" % num_docs) + def update_index_last_day(self, collection_name): + date = (datetime.now() - datetime.timedelta(days=1)).strftime('%Y-%m-%dT%H:%M:%SZ') + now = datetime.now().strftime('%Y-%m-%dT%H:%M:%SZ') + + req_url = self.UPDATE_INDEX.format(self.url, collection_name) + res = requests.post(req_url, + data='*:[%s TO %s]' % date % now, + headers={'Content-Type': 'application/xml'}) + if not res.ok: + print("Error updating index for collection '%s'", collection_name) + print("Code {}: {}".format(res.status_code, res.text)) + else: + print("Collection '%s' data updated successfully!" % collection_name) + + num_docs = self.get_num_docs(collection_name) + print("Num docs: %s" % num_docs) + + def rebuild_index(self, collection_name): + req_url = self.REBUILD_INDEX.format(self.url, collection_name) + res = requests.post(req_url) + if not res.ok: + print("Error rebuilding index for collection '%s'", collection_name) + print("Code {}: {}".format(res.status_code, res.text)) + else: + print("Collection '%s' index rebuilt successfully!" % collection_name) + + num_docs = self.get_num_docs(collection_name) + print("Num docs: %s" % num_docs) + def setup_embedded_zk(solr_url): match = re.match(URL_PATTERN, solr_url) @@ -277,9 +309,10 @@ if __name__ == '__main__': help='Replication factor (default=1)', default=1) parser.add_argument('-ms', type=int, dest='max_shards_per_node', nargs='?', help='Max shards per node (default=1)', default=1) - parser.add_argument("--embedded_zk", default=False, action="store_true", help="Embedded ZooKeeper") + parser.add_argument("--rebuild_index", default=False, action="store_true",) + parser.add_argument("--update_index", default=False, action="store_true",) try: args = parser.parse_args() @@ -315,3 +348,12 @@ if __name__ == '__main__': if num_docs == 0: print("Performing a full reindex of '%s' collection..." % collection) p = subprocess.call(["python3", "manage.py", "rebuild_index", "--noinput"]) + + if args.rebuild_index: + print("Rebuilding index of '%s' collection..." % collection) + client.rebuild_index(collection) + + if args.update_index: + print("Updating index of '%s' collection..." % collection) + client.update_index_last_day(collection) + diff --git a/docker/start.sh b/docker/start.sh index 558b7d7b5..9465f3200 100755 --- a/docker/start.sh +++ b/docker/start.sh @@ -85,6 +85,21 @@ if [ "${USE_SOLR-False}" == "True" ] || [ "${USE_SOLR-False}" == "true" ]; then fi python3 solr_cli.py -u $SOLR_URL -c $SOLR_COLLECTION -s $NUM_SHARDS -rf $RF -ms $MAX_SHARDS_PER_NODE $ZK_EMBEDDED & + + RANDOM_MINUTE_MIN=0 + RANDOM_MINUTE_MAX=60 + RANDOM_HOUR_MIN=0 + RANDOM_HOUR_MAX=3 + + # Generate a random minute within the interval + RANDOM_MINUTE=$((RANDOM % ($RANDOM_MINUTE_MAX-$RANDOM_MINUTE_MIN+1) + $RANDOM_MINUTE_MIN)) + RANDOM_HOUR=$((RANDOM % ($RANDOM_HOUR_MAX-$RANDOM_HOUR_MIN+1) + $RANDOM_HOUR_MIN)) + + # Add the cronjob to the crontab + echo "$RANDOM_MINUTE $RANDOM_HOUR * * * python3 solr_cli.py -u $SOLR_URL -c $SOLR_COLLECTION --update-index" >> /etc/cron.daily/rebuild_index_job + + # Start the cron daemon + crond -f -L /dev/stdout else echo "Solr is offline, not possible to connect." fi From fdb06f49984d5d984cde911ac99d785bbfe99cc3 Mon Sep 17 00:00:00 2001 From: joao Date: Wed, 29 Mar 2023 22:05:01 -0400 Subject: [PATCH 2/2] =?UTF-8?q?fix:=20mudan=C3=A7as=20no=20cronjob=20ap?= =?UTF-8?q?=C3=B3s=20revisao=20de=20codigo?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docker/Dockerfile | 11 +++++------ docker/random_cron_job.sh | 18 ++++++++++++++++++ docker/solr_cli.py | 35 ++--------------------------------- docker/start.sh | 2 +- 4 files changed, 26 insertions(+), 40 deletions(-) create mode 100644 docker/random_cron_job.sh diff --git a/docker/Dockerfile b/docker/Dockerfile index 8b70e0823..558c9c556 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -9,7 +9,8 @@ ENV DEBIAN_FRONTEND noninteractive ENV BUILD_PACKAGES apt-utils apt-file libpq-dev graphviz-dev build-essential git pkg-config \ python3-dev libxml2-dev libjpeg-dev libssl-dev libffi-dev libxslt1-dev \ - libcairo2-dev software-properties-common python3-setuptools python3-pip + libcairo2-dev software-properties-common python3-setuptools python3-pip \ + cron ## NAO EH PRA TIRAR O vim DA LISTA DE COMANDOS INSTALADOS!!! ENV RUN_PACKAGES graphviz python3-lxml python3-magic postgresql-client python3-psycopg2 \ @@ -27,8 +28,6 @@ RUN apt-get update && \ apt-get install -y --no-install-recommends $BUILD_PACKAGES $RUN_PACKAGES && \ fc-cache -fv && \ pip3 install --no-cache-dir --upgrade pip setuptools && \ - apt-get install cron -y && \ - apk add --no-cache dcron && \ rm -f /etc/nginx/conf.d/* && \ pip install --no-cache-dir -r /var/interlegis/sapl/requirements/dev-requirements.txt --upgrade setuptools && \ SUDO_FORCE_REMOVE=yes apt-get purge -y --auto-remove $BUILD_PACKAGES && \ @@ -74,8 +73,8 @@ VOLUME ["/var/interlegis/sapl/data", "/var/interlegis/sapl/media"] CMD ["/var/interlegis/sapl/start.sh"] -COPY cronjob /etc/cron.d/rebuild_solr_index -RUN chmod 0644 /etc/cron.d/rebuild_solr_index -RUN crontab /etc/cron.d/rebuild_solr_index +COPY cronjob /etc/cron.d/update_solr_index +RUN chmod 0644 /etc/cron.d/update_solr_index +RUN crontab /etc/cron.d/update_solr_index RUN touch /var/log/cron.log CMD cron && tail -f /var/log/cron.log diff --git a/docker/random_cron_job.sh b/docker/random_cron_job.sh new file mode 100644 index 000000000..daa478a6a --- /dev/null +++ b/docker/random_cron_job.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +SOLR_URL=$1 +# Define the interval of time to run the cronjob +RANDOM_MINUTE_MIN=0 +RANDOM_MINUTE_MAX=60 +RANDOM_HOUR_MIN=0 +RANDOM_HOUR_MAX=3 + +# Generate a random minute within the interval +RANDOM_MINUTE=$((RANDOM % ($RANDOM_MINUTE_MAX-$RANDOM_MINUTE_MIN+1) + $RANDOM_MINUTE_MIN)) +RANDOM_HOUR=$((RANDOM % ($RANDOM_HOUR_MAX-$RANDOM_HOUR_MIN+1) + $RANDOM_HOUR_MIN)) + +# Add the cronjob to the crontab +echo "$RANDOM_MINUTE $RANDOM_HOUR * * * /path/to/command" >> /etc/crontab + +# Start the cron daemon +crond -f -L /dev/stdout diff --git a/docker/solr_cli.py b/docker/solr_cli.py index 87d380b4b..c375ed8c7 100755 --- a/docker/solr_cli.py +++ b/docker/solr_cli.py @@ -112,8 +112,6 @@ class SolrClient: DELETE_COLLECTION = "{}/solr/admin/collections?action=DELETE&name={}&wt=json" DELETE_DATA = "{}/solr/{}/update?commitWithin=1000&overwrite=true&wt=json" QUERY_DATA = "{}/solr/{}/select?q=*:*" - REBUILD_INDEX = "{}/solr/{}/dataimport?command=full-import&wt=json" - UPDATE_INDEX = "{}/solr/{}/dataimport?command=delta-import&wt=json" CONFIGSET_NAME = "sapl_configset" @@ -246,35 +244,6 @@ class SolrClient: num_docs = self.get_num_docs(collection_name) print("Num docs: %s" % num_docs) - def update_index_last_day(self, collection_name): - date = (datetime.now() - datetime.timedelta(days=1)).strftime('%Y-%m-%dT%H:%M:%SZ') - now = datetime.now().strftime('%Y-%m-%dT%H:%M:%SZ') - - req_url = self.UPDATE_INDEX.format(self.url, collection_name) - res = requests.post(req_url, - data='*:[%s TO %s]' % date % now, - headers={'Content-Type': 'application/xml'}) - if not res.ok: - print("Error updating index for collection '%s'", collection_name) - print("Code {}: {}".format(res.status_code, res.text)) - else: - print("Collection '%s' data updated successfully!" % collection_name) - - num_docs = self.get_num_docs(collection_name) - print("Num docs: %s" % num_docs) - - def rebuild_index(self, collection_name): - req_url = self.REBUILD_INDEX.format(self.url, collection_name) - res = requests.post(req_url) - if not res.ok: - print("Error rebuilding index for collection '%s'", collection_name) - print("Code {}: {}".format(res.status_code, res.text)) - else: - print("Collection '%s' index rebuilt successfully!" % collection_name) - - num_docs = self.get_num_docs(collection_name) - print("Num docs: %s" % num_docs) - def setup_embedded_zk(solr_url): match = re.match(URL_PATTERN, solr_url) @@ -351,9 +320,9 @@ if __name__ == '__main__': if args.rebuild_index: print("Rebuilding index of '%s' collection..." % collection) - client.rebuild_index(collection) + p = subprocess.call(["python3", "manage.py", "rebuild_index", "--noinput"]) if args.update_index: print("Updating index of '%s' collection..." % collection) - client.update_index_last_day(collection) + p = subprocess.call(["python3", "manage.py", "update_index", "--noinput"]) diff --git a/docker/start.sh b/docker/start.sh index 9465f3200..65689ebfa 100755 --- a/docker/start.sh +++ b/docker/start.sh @@ -96,7 +96,7 @@ if [ "${USE_SOLR-False}" == "True" ] || [ "${USE_SOLR-False}" == "true" ]; then RANDOM_HOUR=$((RANDOM % ($RANDOM_HOUR_MAX-$RANDOM_HOUR_MIN+1) + $RANDOM_HOUR_MIN)) # Add the cronjob to the crontab - echo "$RANDOM_MINUTE $RANDOM_HOUR * * * python3 solr_cli.py -u $SOLR_URL -c $SOLR_COLLECTION --update-index" >> /etc/cron.daily/rebuild_index_job + echo "$RANDOM_MINUTE $RANDOM_HOUR * * * python3 solr_cli.py -u $SOLR_URL -c $SOLR_COLLECTION --update-index" >> /etc/cron.daily/update_solr_index # Start the cron daemon crond -f -L /dev/stdout