Browse Source

Extract fieldsets from original forms html

pull/6/head
Marcio Mazza 10 years ago
committed by Marcio Mazza
parent
commit
d2ebf912ee
  1. 39
      legacy/scripts/extract_labels.py

39
legacy/scripts/extract_labels.py

@ -7,6 +7,7 @@ from bs4 import BeautifulSoup
from django.template.defaultfilters import slugify
from materia.models import MateriaLegislativa
from bs4.element import NavigableString, Tag
def _label_from_td(td):
@ -15,7 +16,7 @@ def _label_from_td(td):
# TODO: improve, getting ids inputs
# TODO: improve, getting fieldsets
def get_labels(filename, flat=True):
def get_fieldsets(filename):
"""Extract labels from a file containg the html source of a rendered
legacy sapl form
"""
@ -26,13 +27,32 @@ def get_labels(filename, flat=True):
forms = soup.find_all('form')
[form] = [f for f in forms if (u'method', u'post') in f.attrs.items()]
labels = [[_label_from_td(td) for td in tr.find_all('td')] for tr in form.find_all('tr')]
for line in labels:
print ', '.join("u'%s'" % l for l in line)
if flat:
return list(chain(*labels))
else:
return labels
# children are either tags or strings...
assert set(type(c) for c in form.children) == {Tag, NavigableString}
# ... and all strings are empty
assert all(not c.strip() for c in form.children if isinstance(c, NavigableString))
for fieldset in form.find_all('fieldset'):
legend = fieldset.find('legend').text
yield dict(
legend=legend,
lines=[[_label_from_td(td) for td in tr.find_all('td')]
for tr in fieldset.find_all('tr')]
)
def get_labels(fieldsets):
for fieldset in fieldsets:
for line in fieldset['lines']:
for label in line:
yield label
def print_fieldsets(fieldsets):
for fieldset in fieldsets:
print fieldset['legend']
for line in fieldset['lines']:
print ' ' + ', '.join(line)
def similar(a, b):
@ -41,7 +61,8 @@ def similar(a, b):
model = MateriaLegislativa
filename = os.path.join(os.path.dirname(__file__),
'original_forms/%s.html' % model.__name__)
labels = get_labels(filename)
fieldsets = list(get_fieldsets(filename))
labels = get_labels(fieldsets)
slugs_to_labels = [(slugify(s.lower()).replace('-', '_'), s) for s in labels]
field_names = [f.name for f in model._meta.fields if f.name != 'id']

Loading…
Cancel
Save