import os
import string
from difflib import SequenceMatcher
from itertools import chain

from bs4 import BeautifulSoup
from django.template.defaultfilters import slugify

from materia.models import MateriaLegislativa
from bs4.element import NavigableString, Tag


def _label_from_td(td):
    return td.text.strip().split('\n')[0].strip(u'\xa0' + string.whitespace)


# TODO: improve, getting ids inputs
# TODO: improve, getting fieldsets
def get_fieldsets(filename):
    """Extract labels from a file containg the html source of a rendered
    legacy sapl form
    """
    with open(filename, 'r') as f:
        cont = f.read()
    html_doc = cont.decode('utf-8')
    soup = BeautifulSoup(html_doc, 'html.parser')
    forms = soup.find_all('form')
    [form] = [f for f in forms if (u'method', u'post') in f.attrs.items()]

    # children are either tags or strings...
    assert set(type(c) for c in form.children) == {Tag, NavigableString}
    # ... and all strings are empty
    assert all(not c.strip() for c in form.children if isinstance(c, NavigableString))

    for fieldset in form.find_all('fieldset'):
        legend = fieldset.find('legend').text
        yield dict(
            legend=legend,
            lines=[[_label_from_td(td) for td in tr.find_all('td')]
                   for tr in fieldset.find_all('tr')]
        )


def get_labels(fieldsets):
    for fieldset in fieldsets:
        for line in fieldset['lines']:
            for label in line:
                yield label


def print_fieldsets(fieldsets):
    for fieldset in fieldsets:
        print fieldset['legend']
        for line in fieldset['lines']:
            print '  ' + ', '.join(line)


def similar(a, b):
    return SequenceMatcher(None, a, b).ratio() > 0.6

model = MateriaLegislativa
filename = os.path.join(os.path.dirname(__file__),
                        'original_forms/%s.html' % model.__name__)
fieldsets = list(get_fieldsets(filename))
labels = get_labels(fieldsets)
slugs_to_labels = [(slugify(s.lower()).replace('-', '_'), s) for s in labels]
field_names = [f.name for f in model._meta.fields if f.name != 'id']

matches = {}

while field_names:
    percent, field, slug, label = sorted(
        [(similar(a, slug), a, slug, label)
         for a in field_names
         for (slug, label) in slugs_to_labels])[-1]
    if percent > 0.6:
        matches[field] = (label, percent)
        slugs_to_labels.remove((slug, label))
    else:
        print 'Label not found for [%s]' % field
    field_names.remove(field)