diff --git a/legacy/scripts/extract_labels.py b/legacy/scripts/extract_labels.py index 5d6e1a126..a82c060e3 100644 --- a/legacy/scripts/extract_labels.py +++ b/legacy/scripts/extract_labels.py @@ -7,6 +7,7 @@ from bs4 import BeautifulSoup from django.template.defaultfilters import slugify from materia.models import MateriaLegislativa +from bs4.element import NavigableString, Tag def _label_from_td(td): @@ -15,7 +16,7 @@ def _label_from_td(td): # TODO: improve, getting ids inputs # TODO: improve, getting fieldsets -def get_labels(filename, flat=True): +def get_fieldsets(filename): """Extract labels from a file containg the html source of a rendered legacy sapl form """ @@ -26,13 +27,32 @@ def get_labels(filename, flat=True): forms = soup.find_all('form') [form] = [f for f in forms if (u'method', u'post') in f.attrs.items()] - labels = [[_label_from_td(td) for td in tr.find_all('td')] for tr in form.find_all('tr')] - for line in labels: - print ', '.join("u'%s'" % l for l in line) - if flat: - return list(chain(*labels)) - else: - return labels + # children are either tags or strings... + assert set(type(c) for c in form.children) == {Tag, NavigableString} + # ... and all strings are empty + assert all(not c.strip() for c in form.children if isinstance(c, NavigableString)) + + for fieldset in form.find_all('fieldset'): + legend = fieldset.find('legend').text + yield dict( + legend=legend, + lines=[[_label_from_td(td) for td in tr.find_all('td')] + for tr in fieldset.find_all('tr')] + ) + + +def get_labels(fieldsets): + for fieldset in fieldsets: + for line in fieldset['lines']: + for label in line: + yield label + + +def print_fieldsets(fieldsets): + for fieldset in fieldsets: + print fieldset['legend'] + for line in fieldset['lines']: + print ' ' + ', '.join(line) def similar(a, b): @@ -41,7 +61,8 @@ def similar(a, b): model = MateriaLegislativa filename = os.path.join(os.path.dirname(__file__), 'original_forms/%s.html' % model.__name__) -labels = get_labels(filename) +fieldsets = list(get_fieldsets(filename)) +labels = get_labels(fieldsets) slugs_to_labels = [(slugify(s.lower()).replace('-', '_'), s) for s in labels] field_names = [f.name for f in model._meta.fields if f.name != 'id']