#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os
import logging
import sys
import requests
import json
import time
import re

# TODO: inserir timestamp no logging do python-indexer.py

USE_SOLR = os.getenv('USE_SOLR', True)  # TODO: trocar por False em produção
SOLR_BASE_URL = os.getenv('SOLR_URL', 'http://localhost:8983') + '/solr'

SOLR_UPDATE_URL = f'{SOLR_BASE_URL}/sapl-logs/update?commitWithin=1000'

SOLR_COLLECTION_STATUS = (
    f'{SOLR_BASE_URL}/sapl-logs/admin/ping?distrib=true&wt=json'
)

BATCH_SIZE = 10  # https://lucidworks.com/post/really-batch-updates-solr-2/

previous = None

buffer = []
payload = []

num_docs = 0
total_docs = 0

# logging setup
logfilename = 'python-indexer.log'

logging.basicConfig(
    filename=logfilename,
    filemode='w+',
    level=logging.INFO
)

logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))
logger = logging.getLogger('python-indexer')
logger.setLevel(logging.DEBUG)

print(f"The logging of this program is done at {logfilename}")


def push_to_solr():
    logger.debug(f"Sending {len(payload)} documents to Solr")

    r = requests.post(
        SOLR_UPDATE_URL,
        data=json.dumps(payload),
        headers={'Content-Type': 'application/json; charset=utf-8'}
    )
    logger.debug(r.content)


def parse_fields(groups):
    from datetime import datetime as dt

    iso8601 = "{} {}".format(groups[1], groups[2].replace(",", "."))
    d = dt.fromisoformat(iso8601)
    datetime = d.strftime('%Y-%m-%dT%H:%M:%SZ')

    # datetime = groups[1] + "T" + groups[2].split(',')[0] + "Z"

    fields = {
        'level': groups[0],
        'datetime': datetime
    }

    parts = groups[3].split()
    fields['server'] = parts[0]
    fields['path'] = parts[1]

    # format: sapl.painel.views:get_votos:497
    function = parts[2].split(':')
    fields['app_file'] = function[0]
    fields['method'] = function[1]
    fields['line_number'] = function[2]
    fields['function'] = parts[2]

    fields['message'] = ' '.join(parts[3:])

    return fields


def parse_logs(line):
    global previous

    # discard empty lines
    if not line.strip():
        return

    pattern = (
        "^(ERROR|INFO|DEBUG|WARNING)" +
        r'\s+(\d{4}-\d{2}-\d{2})\s+(\d{2}:\d{2}:\d{2},\d+)\s+(.*)'
    )
    match = re.match(pattern, line)

    if match:
        groups = match.groups()
        fields = parse_fields(groups)
        fields['line'] = line

        # if match but buffer is full then there was a stack trace before
        if buffer and previous:
            previous['stacktrace'] = ''.join(buffer)
            buffer.clear()
        elif not previous:
            buffer.clear()  # un-garbaged trash

        # append the previous one
        if previous:
            payload.append(previous)

        # delay append of current (it may have stacktrace)
        previous = fields
    else:
        # while not match again collect into buffer
        buffer.append(line)

    logger.debug(len(payload))


def follow(fd):
    """ generator function that yields new lines in a file """

    # seek the end of the file
    fd.seek(0, os.SEEK_END)

    # start infinite loop
    while True:
        # read last line of file
        line = fd.readline()
        # sleep if file hasn't been updated
        if not line:
            time.sleep(0.1)
            continue

        yield line


def check_solr():
    try:
        r = requests.get(SOLR_BASE_URL)
        if r.status_code == 200:
            print(f"Solr server at {SOLR_BASE_URL} is up and running...")

        print("Checking collection health...")

        r = requests.get(SOLR_COLLECTION_STATUS)
        data = r.json()
        if data['status'] == 'OK':
            print("Collection sapl-logs is healthy")

    except Exception as e:
        logger.error(
            "Exception: " + str(e) +
            f"\nError connecting to Solr at {SOLR_COLLECTION_STATUS}"
        )
        sys.exit(1)


if __name__ == '__main__':

    if not USE_SOLR:
        print(f"USE_SOLR={USE_SOLR}")
        sys.exit(0)

    check_solr()

    filename = sys.argv[1]
    print(f"Opening log file {filename}...")
    logfile = open(filename, 'r')
    loglines = follow(logfile)

    # iterate over the generator
    for line in loglines:
        logger.debug(f"Current payload size: {len(payload)}")
        parse_logs(line)

        num_docs = (num_docs + 1) % BATCH_SIZE
        if num_docs == 0 and payload:
            push_to_solr()
            total_docs += len(payload)
            payload.clear()

    push_to_solr()