mirror of https://github.com/interlegis/sapl.git
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
192 lines
4.4 KiB
192 lines
4.4 KiB
4 years ago
|
#!/usr/bin/env python
|
||
|
# -*- coding: utf-8 -*-
|
||
|
|
||
|
import os
|
||
|
import logging
|
||
|
import sys
|
||
|
import requests
|
||
|
import json
|
||
|
import time
|
||
|
import re
|
||
|
|
||
|
# TODO: inserir timestamp no logging do python-indexer.py
|
||
|
|
||
|
USE_SOLR = os.getenv('USE_SOLR', True) # TODO: trocar por False em produção
|
||
|
SOLR_BASE_URL = os.getenv('SOLR_URL', 'http://localhost:8983') + '/solr'
|
||
|
|
||
|
SOLR_UPDATE_URL = f'{SOLR_BASE_URL}/sapl-logs/update?commitWithin=1000'
|
||
|
|
||
|
SOLR_COLLECTION_STATUS = (
|
||
|
f'{SOLR_BASE_URL}/sapl-logs/admin/ping?distrib=true&wt=json'
|
||
|
)
|
||
|
|
||
|
BATCH_SIZE = 10 # https://lucidworks.com/post/really-batch-updates-solr-2/
|
||
|
|
||
|
previous = None
|
||
|
|
||
|
buffer = []
|
||
|
payload = []
|
||
|
|
||
|
num_docs = 0
|
||
|
total_docs = 0
|
||
|
|
||
|
# logging setup
|
||
|
logfilename = 'python-indexer.log'
|
||
|
|
||
|
logging.basicConfig(
|
||
|
filename=logfilename,
|
||
|
filemode='w+',
|
||
|
level=logging.INFO
|
||
|
)
|
||
|
|
||
|
logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))
|
||
|
logger = logging.getLogger('python-indexer')
|
||
|
logger.setLevel(logging.DEBUG)
|
||
|
|
||
|
print(f"The logging of this program is done at {logfilename}")
|
||
|
|
||
|
|
||
|
def push_to_solr():
|
||
|
logger.debug(f"Sending {len(payload)} documents to Solr")
|
||
|
|
||
|
r = requests.post(
|
||
|
SOLR_UPDATE_URL,
|
||
|
data=json.dumps(payload),
|
||
|
headers={'Content-Type': 'application/json; charset=utf-8'}
|
||
|
)
|
||
|
logger.debug(r.content)
|
||
|
|
||
|
|
||
|
def parse_fields(groups):
|
||
|
from datetime import datetime as dt
|
||
|
|
||
|
iso8601 = "{} {}".format(groups[1], groups[2].replace(",", "."))
|
||
|
d = dt.fromisoformat(iso8601)
|
||
|
datetime = d.strftime('%Y-%m-%dT%H:%M:%SZ')
|
||
|
|
||
|
# datetime = groups[1] + "T" + groups[2].split(',')[0] + "Z"
|
||
|
|
||
|
fields = {
|
||
|
'level': groups[0],
|
||
|
'datetime': datetime
|
||
|
}
|
||
|
|
||
|
parts = groups[3].split()
|
||
|
fields['server'] = parts[0]
|
||
|
fields['path'] = parts[1]
|
||
|
|
||
|
# format: sapl.painel.views:get_votos:497
|
||
|
function = parts[2].split(':')
|
||
|
fields['app_file'] = function[0]
|
||
|
fields['method'] = function[1]
|
||
|
fields['line_number'] = function[2]
|
||
|
fields['function'] = parts[2]
|
||
|
|
||
|
fields['message'] = ' '.join(parts[3:])
|
||
|
|
||
|
return fields
|
||
|
|
||
|
|
||
|
def parse_logs(line):
|
||
|
global previous
|
||
|
|
||
|
# discard empty lines
|
||
|
if not line.strip():
|
||
|
return
|
||
|
|
||
|
pattern = (
|
||
|
"^(ERROR|INFO|DEBUG|WARNING)" +
|
||
|
r'\s+(\d{4}-\d{2}-\d{2})\s+(\d{2}:\d{2}:\d{2},\d+)\s+(.*)'
|
||
|
)
|
||
|
match = re.match(pattern, line)
|
||
|
|
||
|
if match:
|
||
|
groups = match.groups()
|
||
|
fields = parse_fields(groups)
|
||
|
fields['line'] = line
|
||
|
|
||
|
# if match but buffer is full then there was a stack trace before
|
||
|
if buffer and previous:
|
||
|
previous['stacktrace'] = ''.join(buffer)
|
||
|
buffer.clear()
|
||
|
elif not previous:
|
||
|
buffer.clear() # un-garbaged trash
|
||
|
|
||
|
# append the previous one
|
||
|
if previous:
|
||
|
payload.append(previous)
|
||
|
|
||
|
# delay append of current (it may have stacktrace)
|
||
|
previous = fields
|
||
|
else:
|
||
|
# while not match again collect into buffer
|
||
|
buffer.append(line)
|
||
|
|
||
|
logger.debug(len(payload))
|
||
|
|
||
|
|
||
|
def follow(fd):
|
||
|
""" generator function that yields new lines in a file """
|
||
|
|
||
|
# seek the end of the file
|
||
|
fd.seek(0, os.SEEK_END)
|
||
|
|
||
|
# start infinite loop
|
||
|
while True:
|
||
|
# read last line of file
|
||
|
line = fd.readline()
|
||
|
# sleep if file hasn't been updated
|
||
|
if not line:
|
||
|
time.sleep(0.1)
|
||
|
continue
|
||
|
|
||
|
yield line
|
||
|
|
||
|
|
||
|
def check_solr():
|
||
|
try:
|
||
|
r = requests.get(SOLR_BASE_URL)
|
||
|
if r.status_code == 200:
|
||
|
print(f"Solr server at {SOLR_BASE_URL} is up and running...")
|
||
|
|
||
|
print("Checking collection health...")
|
||
|
|
||
|
r = requests.get(SOLR_COLLECTION_STATUS)
|
||
|
data = r.json()
|
||
|
if data['status'] == 'OK':
|
||
|
print("Collection sapl-logs is healthy")
|
||
|
|
||
|
except Exception as e:
|
||
|
logger.error(
|
||
|
"Exception: " + str(e) +
|
||
|
f"\nError connecting to Solr at {SOLR_COLLECTION_STATUS}"
|
||
|
)
|
||
|
sys.exit(1)
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
|
||
|
if not USE_SOLR:
|
||
|
print(f"USE_SOLR={USE_SOLR}")
|
||
|
sys.exit(0)
|
||
|
|
||
|
check_solr()
|
||
|
|
||
|
filename = sys.argv[1]
|
||
|
print(f"Opening log file {filename}...")
|
||
|
logfile = open(filename, 'r')
|
||
|
loglines = follow(logfile)
|
||
|
|
||
|
# iterate over the generator
|
||
|
for line in loglines:
|
||
|
logger.debug(f"Current payload size: {len(payload)}")
|
||
|
parse_logs(line)
|
||
|
|
||
|
num_docs = (num_docs + 1) % BATCH_SIZE
|
||
|
if num_docs == 0 and payload:
|
||
|
push_to_solr()
|
||
|
total_docs += len(payload)
|
||
|
payload.clear()
|
||
|
|
||
|
push_to_solr()
|