From 595613f78bd1ec732c5d160cd72d4a15eeda72ee Mon Sep 17 00:00:00 2001 From: joao Date: Thu, 23 Mar 2023 22:12:43 -0400 Subject: [PATCH] cria cronjob com horario randomico para atualizar indices do solr --- docker/Dockerfile | 8 ++++++++ docker/solr_cli.py | 44 +++++++++++++++++++++++++++++++++++++++++++- docker/start.sh | 15 +++++++++++++++ 3 files changed, 66 insertions(+), 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index a40bd91fe..8b70e0823 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -27,6 +27,8 @@ RUN apt-get update && \ apt-get install -y --no-install-recommends $BUILD_PACKAGES $RUN_PACKAGES && \ fc-cache -fv && \ pip3 install --no-cache-dir --upgrade pip setuptools && \ + apt-get install cron -y && \ + apk add --no-cache dcron && \ rm -f /etc/nginx/conf.d/* && \ pip install --no-cache-dir -r /var/interlegis/sapl/requirements/dev-requirements.txt --upgrade setuptools && \ SUDO_FORCE_REMOVE=yes apt-get purge -y --auto-remove $BUILD_PACKAGES && \ @@ -71,3 +73,9 @@ EXPOSE 80/tcp 443/tcp VOLUME ["/var/interlegis/sapl/data", "/var/interlegis/sapl/media"] CMD ["/var/interlegis/sapl/start.sh"] + +COPY cronjob /etc/cron.d/rebuild_solr_index +RUN chmod 0644 /etc/cron.d/rebuild_solr_index +RUN crontab /etc/cron.d/rebuild_solr_index +RUN touch /var/log/cron.log +CMD cron && tail -f /var/log/cron.log diff --git a/docker/solr_cli.py b/docker/solr_cli.py index d452d1fe9..87d380b4b 100755 --- a/docker/solr_cli.py +++ b/docker/solr_cli.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- import argparse +import datetime import logging import re import secrets @@ -111,6 +112,8 @@ class SolrClient: DELETE_COLLECTION = "{}/solr/admin/collections?action=DELETE&name={}&wt=json" DELETE_DATA = "{}/solr/{}/update?commitWithin=1000&overwrite=true&wt=json" QUERY_DATA = "{}/solr/{}/select?q=*:*" + REBUILD_INDEX = "{}/solr/{}/dataimport?command=full-import&wt=json" + UPDATE_INDEX = "{}/solr/{}/dataimport?command=delta-import&wt=json" CONFIGSET_NAME = "sapl_configset" @@ -243,6 +246,35 @@ class SolrClient: num_docs = self.get_num_docs(collection_name) print("Num docs: %s" % num_docs) + def update_index_last_day(self, collection_name): + date = (datetime.now() - datetime.timedelta(days=1)).strftime('%Y-%m-%dT%H:%M:%SZ') + now = datetime.now().strftime('%Y-%m-%dT%H:%M:%SZ') + + req_url = self.UPDATE_INDEX.format(self.url, collection_name) + res = requests.post(req_url, + data='*:[%s TO %s]' % date % now, + headers={'Content-Type': 'application/xml'}) + if not res.ok: + print("Error updating index for collection '%s'", collection_name) + print("Code {}: {}".format(res.status_code, res.text)) + else: + print("Collection '%s' data updated successfully!" % collection_name) + + num_docs = self.get_num_docs(collection_name) + print("Num docs: %s" % num_docs) + + def rebuild_index(self, collection_name): + req_url = self.REBUILD_INDEX.format(self.url, collection_name) + res = requests.post(req_url) + if not res.ok: + print("Error rebuilding index for collection '%s'", collection_name) + print("Code {}: {}".format(res.status_code, res.text)) + else: + print("Collection '%s' index rebuilt successfully!" % collection_name) + + num_docs = self.get_num_docs(collection_name) + print("Num docs: %s" % num_docs) + def setup_embedded_zk(solr_url): match = re.match(URL_PATTERN, solr_url) @@ -277,9 +309,10 @@ if __name__ == '__main__': help='Replication factor (default=1)', default=1) parser.add_argument('-ms', type=int, dest='max_shards_per_node', nargs='?', help='Max shards per node (default=1)', default=1) - parser.add_argument("--embedded_zk", default=False, action="store_true", help="Embedded ZooKeeper") + parser.add_argument("--rebuild_index", default=False, action="store_true",) + parser.add_argument("--update_index", default=False, action="store_true",) try: args = parser.parse_args() @@ -315,3 +348,12 @@ if __name__ == '__main__': if num_docs == 0: print("Performing a full reindex of '%s' collection..." % collection) p = subprocess.call(["python3", "manage.py", "rebuild_index", "--noinput"]) + + if args.rebuild_index: + print("Rebuilding index of '%s' collection..." % collection) + client.rebuild_index(collection) + + if args.update_index: + print("Updating index of '%s' collection..." % collection) + client.update_index_last_day(collection) + diff --git a/docker/start.sh b/docker/start.sh index 558b7d7b5..9465f3200 100755 --- a/docker/start.sh +++ b/docker/start.sh @@ -85,6 +85,21 @@ if [ "${USE_SOLR-False}" == "True" ] || [ "${USE_SOLR-False}" == "true" ]; then fi python3 solr_cli.py -u $SOLR_URL -c $SOLR_COLLECTION -s $NUM_SHARDS -rf $RF -ms $MAX_SHARDS_PER_NODE $ZK_EMBEDDED & + + RANDOM_MINUTE_MIN=0 + RANDOM_MINUTE_MAX=60 + RANDOM_HOUR_MIN=0 + RANDOM_HOUR_MAX=3 + + # Generate a random minute within the interval + RANDOM_MINUTE=$((RANDOM % ($RANDOM_MINUTE_MAX-$RANDOM_MINUTE_MIN+1) + $RANDOM_MINUTE_MIN)) + RANDOM_HOUR=$((RANDOM % ($RANDOM_HOUR_MAX-$RANDOM_HOUR_MIN+1) + $RANDOM_HOUR_MIN)) + + # Add the cronjob to the crontab + echo "$RANDOM_MINUTE $RANDOM_HOUR * * * python3 solr_cli.py -u $SOLR_URL -c $SOLR_COLLECTION --update-index" >> /etc/cron.daily/rebuild_index_job + + # Start the cron daemon + crond -f -L /dev/stdout else echo "Solr is offline, not possible to connect." fi