Browse Source

Fix field id detection in scraping

pull/6/head
Marcio Mazza 10 years ago
parent
commit
0340caccd0
  1. 6
      legacy/scripts/scrap_original_forms.py

6
legacy/scripts/scrap_original_forms.py

@ -7,15 +7,19 @@ from bs4 import BeautifulSoup
from bs4.element import NavigableString, Tag from bs4.element import NavigableString, Tag
from field_mappings import field_mappings from field_mappings import field_mappings
from migration import appconfs
from utils import listify, getsourcelines from utils import listify, getsourcelines
assert appconfs # to prevent removal by automatic organize imports on this file
def _read_line(tr): def _read_line(tr):
for td in tr.find_all('td'): for td in tr.find_all('td'):
label = td.text.strip().split('\n')[0].strip(u'\xa0' + string.whitespace) label = td.text.strip().split('\n')[0].strip(u'\xa0' + string.whitespace)
if label.endswith('(*)'): if label.endswith('(*)'):
label = label[:-3].strip() label = label[:-3].strip()
names = [c.attrs['name'] for c in td.children if isinstance(c, Tag) and 'name' in c.attrs] names = [c.attrs['name'] for c in td.findAll() if isinstance(c, Tag) and 'name' in c.attrs]
if names: if names:
name = names[0].split('_', 1)[-1] name = names[0].split('_', 1)[-1]
yield name, label yield name, label

Loading…
Cancel
Save