Browse Source

fix: mudanças no cronjob após revisao de codigo

cron-update-solr-index
joao 2 years ago
parent
commit
fdb06f4998
  1. 11
      docker/Dockerfile
  2. 18
      docker/random_cron_job.sh
  3. 35
      docker/solr_cli.py
  4. 2
      docker/start.sh

11
docker/Dockerfile

@ -9,7 +9,8 @@ ENV DEBIAN_FRONTEND noninteractive
ENV BUILD_PACKAGES apt-utils apt-file libpq-dev graphviz-dev build-essential git pkg-config \
python3-dev libxml2-dev libjpeg-dev libssl-dev libffi-dev libxslt1-dev \
libcairo2-dev software-properties-common python3-setuptools python3-pip
libcairo2-dev software-properties-common python3-setuptools python3-pip \
cron
## NAO EH PRA TIRAR O vim DA LISTA DE COMANDOS INSTALADOS!!!
ENV RUN_PACKAGES graphviz python3-lxml python3-magic postgresql-client python3-psycopg2 \
@ -27,8 +28,6 @@ RUN apt-get update && \
apt-get install -y --no-install-recommends $BUILD_PACKAGES $RUN_PACKAGES && \
fc-cache -fv && \
pip3 install --no-cache-dir --upgrade pip setuptools && \
apt-get install cron -y && \
apk add --no-cache dcron && \
rm -f /etc/nginx/conf.d/* && \
pip install --no-cache-dir -r /var/interlegis/sapl/requirements/dev-requirements.txt --upgrade setuptools && \
SUDO_FORCE_REMOVE=yes apt-get purge -y --auto-remove $BUILD_PACKAGES && \
@ -74,8 +73,8 @@ VOLUME ["/var/interlegis/sapl/data", "/var/interlegis/sapl/media"]
CMD ["/var/interlegis/sapl/start.sh"]
COPY cronjob /etc/cron.d/rebuild_solr_index
RUN chmod 0644 /etc/cron.d/rebuild_solr_index
RUN crontab /etc/cron.d/rebuild_solr_index
COPY cronjob /etc/cron.d/update_solr_index
RUN chmod 0644 /etc/cron.d/update_solr_index
RUN crontab /etc/cron.d/update_solr_index
RUN touch /var/log/cron.log
CMD cron && tail -f /var/log/cron.log

18
docker/random_cron_job.sh

@ -0,0 +1,18 @@
#!/usr/bin/env bash
SOLR_URL=$1
# Define the interval of time to run the cronjob
RANDOM_MINUTE_MIN=0
RANDOM_MINUTE_MAX=60
RANDOM_HOUR_MIN=0
RANDOM_HOUR_MAX=3
# Generate a random minute within the interval
RANDOM_MINUTE=$((RANDOM % ($RANDOM_MINUTE_MAX-$RANDOM_MINUTE_MIN+1) + $RANDOM_MINUTE_MIN))
RANDOM_HOUR=$((RANDOM % ($RANDOM_HOUR_MAX-$RANDOM_HOUR_MIN+1) + $RANDOM_HOUR_MIN))
# Add the cronjob to the crontab
echo "$RANDOM_MINUTE $RANDOM_HOUR * * * /path/to/command" >> /etc/crontab
# Start the cron daemon
crond -f -L /dev/stdout

35
docker/solr_cli.py

@ -112,8 +112,6 @@ class SolrClient:
DELETE_COLLECTION = "{}/solr/admin/collections?action=DELETE&name={}&wt=json"
DELETE_DATA = "{}/solr/{}/update?commitWithin=1000&overwrite=true&wt=json"
QUERY_DATA = "{}/solr/{}/select?q=*:*"
REBUILD_INDEX = "{}/solr/{}/dataimport?command=full-import&wt=json"
UPDATE_INDEX = "{}/solr/{}/dataimport?command=delta-import&wt=json"
CONFIGSET_NAME = "sapl_configset"
@ -246,35 +244,6 @@ class SolrClient:
num_docs = self.get_num_docs(collection_name)
print("Num docs: %s" % num_docs)
def update_index_last_day(self, collection_name):
date = (datetime.now() - datetime.timedelta(days=1)).strftime('%Y-%m-%dT%H:%M:%SZ')
now = datetime.now().strftime('%Y-%m-%dT%H:%M:%SZ')
req_url = self.UPDATE_INDEX.format(self.url, collection_name)
res = requests.post(req_url,
data='<update><query>*:[%s TO %s]</query></update>' % date % now,
headers={'Content-Type': 'application/xml'})
if not res.ok:
print("Error updating index for collection '%s'", collection_name)
print("Code {}: {}".format(res.status_code, res.text))
else:
print("Collection '%s' data updated successfully!" % collection_name)
num_docs = self.get_num_docs(collection_name)
print("Num docs: %s" % num_docs)
def rebuild_index(self, collection_name):
req_url = self.REBUILD_INDEX.format(self.url, collection_name)
res = requests.post(req_url)
if not res.ok:
print("Error rebuilding index for collection '%s'", collection_name)
print("Code {}: {}".format(res.status_code, res.text))
else:
print("Collection '%s' index rebuilt successfully!" % collection_name)
num_docs = self.get_num_docs(collection_name)
print("Num docs: %s" % num_docs)
def setup_embedded_zk(solr_url):
match = re.match(URL_PATTERN, solr_url)
@ -351,9 +320,9 @@ if __name__ == '__main__':
if args.rebuild_index:
print("Rebuilding index of '%s' collection..." % collection)
client.rebuild_index(collection)
p = subprocess.call(["python3", "manage.py", "rebuild_index", "--noinput"])
if args.update_index:
print("Updating index of '%s' collection..." % collection)
client.update_index_last_day(collection)
p = subprocess.call(["python3", "manage.py", "update_index", "--noinput"])

2
docker/start.sh

@ -96,7 +96,7 @@ if [ "${USE_SOLR-False}" == "True" ] || [ "${USE_SOLR-False}" == "true" ]; then
RANDOM_HOUR=$((RANDOM % ($RANDOM_HOUR_MAX-$RANDOM_HOUR_MIN+1) + $RANDOM_HOUR_MIN))
# Add the cronjob to the crontab
echo "$RANDOM_MINUTE $RANDOM_HOUR * * * python3 solr_cli.py -u $SOLR_URL -c $SOLR_COLLECTION --update-index" >> /etc/cron.daily/rebuild_index_job
echo "$RANDOM_MINUTE $RANDOM_HOUR * * * python3 solr_cli.py -u $SOLR_URL -c $SOLR_COLLECTION --update-index" >> /etc/cron.daily/update_solr_index
# Start the cron daemon
crond -f -L /dev/stdout

Loading…
Cancel
Save