Browse Source

Extract fieldsets from original forms html

pull/6/head
Marcio Mazza 10 years ago
committed by Marcio Mazza
parent
commit
d2ebf912ee
  1. 39
      legacy/scripts/extract_labels.py

39
legacy/scripts/extract_labels.py

@ -7,6 +7,7 @@ from bs4 import BeautifulSoup
from django.template.defaultfilters import slugify from django.template.defaultfilters import slugify
from materia.models import MateriaLegislativa from materia.models import MateriaLegislativa
from bs4.element import NavigableString, Tag
def _label_from_td(td): def _label_from_td(td):
@ -15,7 +16,7 @@ def _label_from_td(td):
# TODO: improve, getting ids inputs # TODO: improve, getting ids inputs
# TODO: improve, getting fieldsets # TODO: improve, getting fieldsets
def get_labels(filename, flat=True): def get_fieldsets(filename):
"""Extract labels from a file containg the html source of a rendered """Extract labels from a file containg the html source of a rendered
legacy sapl form legacy sapl form
""" """
@ -26,13 +27,32 @@ def get_labels(filename, flat=True):
forms = soup.find_all('form') forms = soup.find_all('form')
[form] = [f for f in forms if (u'method', u'post') in f.attrs.items()] [form] = [f for f in forms if (u'method', u'post') in f.attrs.items()]
labels = [[_label_from_td(td) for td in tr.find_all('td')] for tr in form.find_all('tr')] # children are either tags or strings...
for line in labels: assert set(type(c) for c in form.children) == {Tag, NavigableString}
print ', '.join("u'%s'" % l for l in line) # ... and all strings are empty
if flat: assert all(not c.strip() for c in form.children if isinstance(c, NavigableString))
return list(chain(*labels))
else: for fieldset in form.find_all('fieldset'):
return labels legend = fieldset.find('legend').text
yield dict(
legend=legend,
lines=[[_label_from_td(td) for td in tr.find_all('td')]
for tr in fieldset.find_all('tr')]
)
def get_labels(fieldsets):
for fieldset in fieldsets:
for line in fieldset['lines']:
for label in line:
yield label
def print_fieldsets(fieldsets):
for fieldset in fieldsets:
print fieldset['legend']
for line in fieldset['lines']:
print ' ' + ', '.join(line)
def similar(a, b): def similar(a, b):
@ -41,7 +61,8 @@ def similar(a, b):
model = MateriaLegislativa model = MateriaLegislativa
filename = os.path.join(os.path.dirname(__file__), filename = os.path.join(os.path.dirname(__file__),
'original_forms/%s.html' % model.__name__) 'original_forms/%s.html' % model.__name__)
labels = get_labels(filename) fieldsets = list(get_fieldsets(filename))
labels = get_labels(fieldsets)
slugs_to_labels = [(slugify(s.lower()).replace('-', '_'), s) for s in labels] slugs_to_labels = [(slugify(s.lower()).replace('-', '_'), s) for s in labels]
field_names = [f.name for f in model._meta.fields if f.name != 'id'] field_names = [f.name for f in model._meta.fields if f.name != 'id']

Loading…
Cancel
Save