sapl/legacy/scripts/scrap_original_forms.py

# -*- coding: utf-8 -*-
import os
import re
import string

from bs4 import BeautifulSoup
from bs4.element import NavigableString, Tag

from field_renames import field_renames
from migration import appconfs
from utils import listify, getsourcelines


assert appconfs  # to prevent removal by automatic organize imports on this file


def _read_line(tr):
    for td in tr.find_all('td'):
        label = td.text.strip().split('\n')[0].strip(u'\xa0' + string.whitespace)
        if label.endswith('(*)'):
            label = label[:-3].strip()
        names = [c.attrs['name'] for c in td.findAll() if isinstance(c, Tag) and 'name' in c.attrs]
        if names:
            name = names[0].split('_', 1)[-1]
            yield name, label


def extract_title_and_fieldsets(model):
    filename = os.path.join(os.path.dirname(__file__),
                            'original_forms/%s.html' % model.__name__)
    try:
        with open(filename, 'r') as f:
            cont = f.read()
    except IOError:
        return None, []

    html_doc = cont.decode('utf-8')
    soup = BeautifulSoup(html_doc, 'html.parser')
    forms = soup.find_all('form')
    [form] = [f for f in forms if (u'method', u'post') in f.attrs.items()]
    # children are either tags or strings...
    assert set(type(c) for c in form.children) == {Tag, NavigableString}
    # ... and all strings are empty
    assert all(not c.strip() for c in form.children if isinstance(c, NavigableString))

    title = soup.find('h1', {'class': 'firstHeading'})
    title = title.text.strip() if title else None
    fieldsets = [dict(
        legend=fieldset.find('legend').text if fieldset.find('legend') else '',
        lines=[list(_read_line(tr)) for tr in fieldset.find_all('tr')])
        for fieldset in form.find_all('fieldset')]

    return title, fieldsets


def get_names_labels(fieldsets):
    for fieldset in fieldsets:
        for line in fieldset['lines']:
            for name, label in line:
                yield name, label


def print_title_and_fieldsets(model):
    title, fieldsets = extract_title_and_fieldsets(model)
    print '#### %s ####\n' % title
    for fieldset in fieldsets:
        print fieldset['legend']
        for line in fieldset['lines']:
            print '  ' + ' | '.join('%s : %s' % (id, label) for id, label in line)


def extract_verbose_names(model):
    title, fieldsets = extract_title_and_fieldsets(model)
    names_to_labels = dict(get_names_labels(fieldsets))

    field_names = [f.name for f in model._meta.fields if f.name != 'id']

    labels = {}
    field_names_to_old = field_renames[model]
    for name in field_names:
        old_name = field_names_to_old[name]
        label = names_to_labels.get(old_name, None)
        if label:
            labels[name] = label
            del names_to_labels[old_name]
    for name, label in labels.items():
        field_names.remove(name)
    non_matched = field_names, names_to_labels
    return title, labels, non_matched


@listify
def source_with_verbose_names(model):
    source = getsourcelines(model)
    title, labels, non_matched = extract_verbose_names(model)

    field_regex = ' *(.+) = (models\.[^\(]*)\((.*verbose_name=_\(.*\)|.*)\)'
    new_lines = []
    class_meta_already_exists = False
    for line in source[1:]:
        for regex, split in [
                (field_regex + ' *# (.+)', lambda groups: groups),
                (field_regex, lambda groups: groups + ('',))]:
            match = re.match(regex, line)
            if match:
                name, path, args, legacy_name = split(match.groups())
                if name in labels and 'verbose_name' not in args:
                    args = [args] if args.strip() else []
                    args.append(u"verbose_name=_(u'%s')" % labels[name])
                    args = u', '.join(args)
                new_lines.append(
                    (u'    %s = %s(%s)' % (name, path, args), legacy_name))
                break
        else:
            if 'class Meta:' in line:
                class_meta_already_exists = True
            new_lines.append((line, ''))
    yield source[0].rstrip()
    cols = max(map(len, [line for line, _ in new_lines]))
    for line, legacy_name in new_lines:
        line = line.rstrip().ljust(cols)
        if legacy_name:
            yield line + '  # ' + legacy_name
        else:
            yield line

    # class Meta
    if class_meta_already_exists:
        return

    if title == 'Tabelas Auxiliares':
        title = ''
    title = title if title else ''

    def add_s(name):
        return ' '.join(p if p.endswith('s') else p + 's' for p in name.split())

    def remove_s(name):
        return ' '.join(p[:-1] if p.endswith('s') else p for p in name.split())

    if not title:
        # default title from model name
        title_singular = u' '.join(re.findall('[A-Z][^A-Z]*', model.__name__))
        title_singular = re.sub('cao\\b', u'ção', title_singular)
        title_singular = re.sub('ao\\b', u'ão', title_singular)
        title_plural = add_s(title_singular.replace(u'ção', u'ções').replace(u'ão', u'ões'))

    elif title.endswith('s'):
        title_singular = remove_s(title.replace(u'ções', u'ção').replace(u'ões', u'ão'))
        title_plural = title
    else:
        title_singular = title
        title_plural = add_s(title.replace(u'ção', u'ções').replace(u'ão', u'ões'))

    yield """
    class Meta:
        verbose_name = _(u'%s')
        verbose_name_plural = _(u'%s')""" % (title_singular, title_plural)


def print_app_with_verbose_names(app):
    print '##################################################################'
    header = '# -*- coding: utf-8 -*-\n'
    for line in getsourcelines(app.models_module):
        if line in ['# -*- coding: utf-8 -*-',
                    'from django.utils.translation import ugettext as _', ]:
            continue
        elif line == 'from django.db import models':
            header += '''from django.db import models
from django.utils.translation import ugettext as _
'''
        elif 'class' in line:
            break
        else:
            header += line + '\n'
    print header.strip()
    for model in app.models.values():
        print '\n'
        for p in source_with_verbose_names(model):
            print p


def list_models_with_no_scrapped_data(app):
    for model in app.models.values():
        if not any(extract_verbose_names(model)[:2]):
            print model.__name__
Add model verbose name guessing by model name 10 years ago			`# -- coding: utf-8 --`
Start label matching from original forms html 10 years ago			`import os`
Add source rewrite with scrapped verbose names 10 years ago			`import re`
Add script to extract labels from sapl forms 10 years ago			`import string`

Start label matching from original forms html 10 years ago			`from bs4 import BeautifulSoup`
Extract fieldsets from original forms html 10 years ago			`from bs4.element import NavigableString, Tag`
Start label matching from original forms html 10 years ago
Rename field_mappings -> field_renames 10 years ago			`from field_renames import field_renames`
Fix field id detection in scraping 10 years ago			`from migration import appconfs`
Add method to get source lines in unicode 10 years ago			`from utils import listify, getsourcelines`
Extract verbose names from original forms html 10 years ago
Add script to extract labels from sapl forms 10 years ago
Fix field id detection in scraping 10 years ago			`assert appconfs # to prevent removal by automatic organize imports on this file`


Extract verbose names from original forms html 10 years ago			`def _read_line(tr):`
			`for td in tr.find_all('td'):`
			`label = td.text.strip().split('\n')[0].strip(u'\xa0' + string.whitespace)`
Rewrite app models module with verbose names 10 years ago			`if label.endswith('(*)'):`
			`label = label[:-3].strip()`
Fix field id detection in scraping 10 years ago			`names = [c.attrs['name'] for c in td.findAll() if isinstance(c, Tag) and 'name' in c.attrs]`
Extract verbose names from original forms html 10 years ago			`if names:`
			`name = names[0].split('_', 1)[-1]`
			`yield name, label`
Add script to extract labels from sapl forms 10 years ago

Extract verbose names from original forms html 10 years ago			`def extract_title_and_fieldsets(model):`
			`filename = os.path.join(os.path.dirname(__file__),`
			`'original_forms/%s.html' % model.__name__)`
Adjust scraping code a little 10 years ago			`try:`
			`with open(filename, 'r') as f:`
			`cont = f.read()`
			`except IOError:`
			`return None, []`

Add script to extract labels from sapl forms 10 years ago			`html_doc = cont.decode('utf-8')`
			`soup = BeautifulSoup(html_doc, 'html.parser')`
			`forms = soup.find_all('form')`
			`[form] = [f for f in forms if (u'method', u'post') in f.attrs.items()]`
Extract fieldsets from original forms html 10 years ago			`# children are either tags or strings...`
			`assert set(type(c) for c in form.children) == {Tag, NavigableString}`
			`# ... and all strings are empty`
			`assert all(not c.strip() for c in form.children if isinstance(c, NavigableString))`

Extract verbose names from original forms html 10 years ago			`title = soup.find('h1', {'class': 'firstHeading'})`
Add source rewrite with scrapped verbose names 10 years ago			`title = title.text.strip() if title else None`
Extract verbose names from original forms html 10 years ago			`fieldsets = [dict(`
Adjust scraping code a little 10 years ago			`legend=fieldset.find('legend').text if fieldset.find('legend') else '',`
Extract verbose names from original forms html 10 years ago			`lines=[list(_read_line(tr)) for tr in fieldset.find_all('tr')])`
			`for fieldset in form.find_all('fieldset')]`

			`return title, fieldsets`
Extract fieldsets from original forms html 10 years ago

Extract verbose names from original forms html 10 years ago			`def get_names_labels(fieldsets):`
Extract fieldsets from original forms html 10 years ago			`for fieldset in fieldsets:`
			`for line in fieldset['lines']:`
Extract verbose names from original forms html 10 years ago			`for name, label in line:`
			`yield name, label`
Extract fieldsets from original forms html 10 years ago

Extract verbose names from original forms html 10 years ago			`def print_title_and_fieldsets(model):`
			`title, fieldsets = extract_title_and_fieldsets(model)`
			`print '#### %s ####\n' % title`
Extract fieldsets from original forms html 10 years ago			`for fieldset in fieldsets:`
			`print fieldset['legend']`
			`for line in fieldset['lines']:`
Extract verbose names from original forms html 10 years ago			`print ' ' + ' \| '.join('%s : %s' % (id, label) for id, label in line)`


			`def extract_verbose_names(model):`
			`title, fieldsets = extract_title_and_fieldsets(model)`
			`names_to_labels = dict(get_names_labels(fieldsets))`

			`field_names = [f.name for f in model._meta.fields if f.name != 'id']`

Add source rewrite with scrapped verbose names 10 years ago			`labels = {}`
Rename field_mappings -> field_renames 10 years ago			`field_names_to_old = field_renames[model]`
Extract verbose names from original forms html 10 years ago			`for name in field_names:`
			`old_name = field_names_to_old[name]`
			`label = names_to_labels.get(old_name, None)`
			`if label:`
Add source rewrite with scrapped verbose names 10 years ago			`labels[name] = label`
Extract verbose names from original forms html 10 years ago			`del names_to_labels[old_name]`
Add source rewrite with scrapped verbose names 10 years ago			`for name, label in labels.items():`
Extract verbose names from original forms html 10 years ago			`field_names.remove(name)`
			`non_matched = field_names, names_to_labels`
Add source rewrite with scrapped verbose names 10 years ago			`return title, labels, non_matched`


			`@listify`
			`def source_with_verbose_names(model):`
Add method to get source lines in unicode 10 years ago			`source = getsourcelines(model)`
Add source rewrite with scrapped verbose names 10 years ago			`title, labels, non_matched = extract_verbose_names(model)`

Ignore already set verbose_name on source rewrite 10 years ago			`field_regex = ' (.+) = (models\.[^\(])\((.verbose_name=_\(.\)\|.*)\)'`
Add source rewrite with scrapped verbose names 10 years ago			`new_lines = []`
Test for previous class Meta on source generation 10 years ago			`class_meta_already_exists = False`
Add source rewrite with scrapped verbose names 10 years ago			`for line in source[1:]:`
			`for regex, split in [`
			`(field_regex + ' *# (.+)', lambda groups: groups),`
Fix field matching regex for source rewrite 10 years ago			`(field_regex, lambda groups: groups + ('',))]:`
Add source rewrite with scrapped verbose names 10 years ago			`match = re.match(regex, line)`
			`if match:`
			`name, path, args, legacy_name = split(match.groups())`
Rewrite app models module with verbose names 10 years ago			`if name in labels and 'verbose_name' not in args:`
Add source rewrite with scrapped verbose names 10 years ago			`args = [args] if args.strip() else []`
Ignore already set verbose_name on source rewrite 10 years ago			`args.append(u"verbose_name=_(u'%s')" % labels[name])`
			`args = u', '.join(args)`
Add source rewrite with scrapped verbose names 10 years ago			`new_lines.append(`
Ignore already set verbose_name on source rewrite 10 years ago			`(u' %s = %s(%s)' % (name, path, args), legacy_name))`
Add source rewrite with scrapped verbose names 10 years ago			`break`
			`else:`
Test for previous class Meta on source generation 10 years ago			`if 'class Meta:' in line:`
			`class_meta_already_exists = True`
Add source rewrite with scrapped verbose names 10 years ago			`new_lines.append((line, ''))`
			`yield source[0].rstrip()`
			`cols = max(map(len, [line for line, _ in new_lines]))`
			`for line, legacy_name in new_lines:`
			`line = line.rstrip().ljust(cols)`
			`if legacy_name:`
Add double space before comments in source rewrite 10 years ago			`yield line + ' # ' + legacy_name`
Add source rewrite with scrapped verbose names 10 years ago			`else:`
			`yield line`
Test for previous class Meta on source generation 10 years ago
			`# class Meta`
			`if class_meta_already_exists:`
			`return`

Add model verbose name guessing by model name 10 years ago			`if title == 'Tabelas Auxiliares':`
			`title = ''`
Test for previous class Meta on source generation 10 years ago			`title = title if title else ''`
Add model verbose name guessing by model name 10 years ago
			`def add_s(name):`
			`return ' '.join(p if p.endswith('s') else p + 's' for p in name.split())`

			`def remove_s(name):`
			`return ' '.join(p[:-1] if p.endswith('s') else p for p in name.split())`

			`if not title:`
			`# default title from model name`
			`title_singular = u' '.join(re.findall('[A-Z][^A-Z]*', model.__name__))`
			`title_singular = re.sub('cao\\b', u'ção', title_singular)`
			`title_singular = re.sub('ao\\b', u'ão', title_singular)`
			`title_plural = add_s(title_singular.replace(u'ção', u'ções').replace(u'ão', u'ões'))`

			`elif title.endswith('s'):`
			`title_singular = remove_s(title.replace(u'ções', u'ção').replace(u'ões', u'ão'))`
			`title_plural = title`
Test for previous class Meta on source generation 10 years ago			`else:`
Add model verbose name guessing by model name 10 years ago			`title_singular = title`
			`title_plural = add_s(title.replace(u'ção', u'ções').replace(u'ão', u'ões'))`

Test for previous class Meta on source generation 10 years ago			`yield """`
Add source rewrite with scrapped verbose names 10 years ago			`class Meta:`
			`verbose_name = _(u'%s')`
Rewrite app models module with verbose names 10 years ago			`verbose_name_plural = _(u'%s')""" % (title_singular, title_plural)`


			`def print_app_with_verbose_names(app):`
			`print '##################################################################'`
			`header = '# -- coding: utf-8 --\n'`
Add method to get source lines in unicode 10 years ago			`for line in getsourcelines(app.models_module):`
			`if line in ['# -- coding: utf-8 --',`
			`'from django.utils.translation import ugettext as _', ]:`
Rewrite app models module with verbose names 10 years ago			`continue`
Add method to get source lines in unicode 10 years ago			`elif line == 'from django.db import models':`
Rewrite app models module with verbose names 10 years ago			`header += '''from django.db import models`
			`from django.utils.translation import ugettext as _`
			`'''`
			`elif 'class' in line:`
			`break`
			`else:`
Adjust imports rewrite 10 years ago			`header += line + '\n'`
Rewrite app models module with verbose names 10 years ago			`print header.strip()`
			`for model in app.models.values():`
			`print '\n'`
			`for p in source_with_verbose_names(model):`
			`print p`
List models without scrapped data 10 years ago

			`def list_models_with_no_scrapped_data(app):`
			`for model in app.models.values():`
			`if not any(extract_verbose_names(model)[:2]):`
			`print model.__name__`