|
|
@ -7,6 +7,7 @@ from bs4 import BeautifulSoup |
|
|
|
from django.template.defaultfilters import slugify |
|
|
|
|
|
|
|
from materia.models import MateriaLegislativa |
|
|
|
from bs4.element import NavigableString, Tag |
|
|
|
|
|
|
|
|
|
|
|
def _label_from_td(td): |
|
|
@ -15,7 +16,7 @@ def _label_from_td(td): |
|
|
|
|
|
|
|
# TODO: improve, getting ids inputs |
|
|
|
# TODO: improve, getting fieldsets |
|
|
|
def get_labels(filename, flat=True): |
|
|
|
def get_fieldsets(filename): |
|
|
|
"""Extract labels from a file containg the html source of a rendered |
|
|
|
legacy sapl form |
|
|
|
""" |
|
|
@ -26,13 +27,32 @@ def get_labels(filename, flat=True): |
|
|
|
forms = soup.find_all('form') |
|
|
|
[form] = [f for f in forms if (u'method', u'post') in f.attrs.items()] |
|
|
|
|
|
|
|
labels = [[_label_from_td(td) for td in tr.find_all('td')] for tr in form.find_all('tr')] |
|
|
|
for line in labels: |
|
|
|
print ', '.join("u'%s'" % l for l in line) |
|
|
|
if flat: |
|
|
|
return list(chain(*labels)) |
|
|
|
else: |
|
|
|
return labels |
|
|
|
# children are either tags or strings... |
|
|
|
assert set(type(c) for c in form.children) == {Tag, NavigableString} |
|
|
|
# ... and all strings are empty |
|
|
|
assert all(not c.strip() for c in form.children if isinstance(c, NavigableString)) |
|
|
|
|
|
|
|
for fieldset in form.find_all('fieldset'): |
|
|
|
legend = fieldset.find('legend').text |
|
|
|
yield dict( |
|
|
|
legend=legend, |
|
|
|
lines=[[_label_from_td(td) for td in tr.find_all('td')] |
|
|
|
for tr in fieldset.find_all('tr')] |
|
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
def get_labels(fieldsets): |
|
|
|
for fieldset in fieldsets: |
|
|
|
for line in fieldset['lines']: |
|
|
|
for label in line: |
|
|
|
yield label |
|
|
|
|
|
|
|
|
|
|
|
def print_fieldsets(fieldsets): |
|
|
|
for fieldset in fieldsets: |
|
|
|
print fieldset['legend'] |
|
|
|
for line in fieldset['lines']: |
|
|
|
print ' ' + ', '.join(line) |
|
|
|
|
|
|
|
|
|
|
|
def similar(a, b): |
|
|
@ -41,7 +61,8 @@ def similar(a, b): |
|
|
|
model = MateriaLegislativa |
|
|
|
filename = os.path.join(os.path.dirname(__file__), |
|
|
|
'original_forms/%s.html' % model.__name__) |
|
|
|
labels = get_labels(filename) |
|
|
|
fieldsets = list(get_fieldsets(filename)) |
|
|
|
labels = get_labels(fieldsets) |
|
|
|
slugs_to_labels = [(slugify(s.lower()).replace('-', '_'), s) for s in labels] |
|
|
|
field_names = [f.name for f in model._meta.fields if f.name != 'id'] |
|
|
|
|
|
|
|