From 47d295c90c8dbe09edf3d026fe2e25f1a6c6e2cf Mon Sep 17 00:00:00 2001 From: Marcio Mazza Date: Fri, 27 Apr 2018 15:23:23 -0300 Subject: [PATCH] =?UTF-8?q?Adiciona=20scraping=20de=20proposi=C3=A7=C3=B5e?= =?UTF-8?q?s=20do=20SDE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- sapl/base/urls.py | 4 ++-- sapl/legacy/migracao.py | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/sapl/base/urls.py b/sapl/base/urls.py index 849c18131..b52d46ad6 100644 --- a/sapl/base/urls.py +++ b/sapl/base/urls.py @@ -123,11 +123,11 @@ urlpatterns = [ url(r'^sistema/search/', SaplSearchView(), name='haystack_search'), # Folhas XSLT e extras referenciadas por documentos migrados do sapl 2.5 - url(r'^sapl/XSLT/HTML/(?P.*)$', RedirectView.as_view( + url(r'^(sapl/)?XSLT/HTML/(?P.*)$', RedirectView.as_view( url=os.path.join(MEDIA_URL, 'sapl/public/XSLT/HTML/%(path)s'), permanent=False)), # url do logotipo usada em documentos migrados do sapl 2.5 - url(r'^sapl/sapl_documentos/props_sapl/logo_casa', + url(r'^(sapl/)?sapl_documentos/props_sapl/logo_casa', LogotipoView.as_view(), name='logotipo'), diff --git a/sapl/legacy/migracao.py b/sapl/legacy/migracao.py index 66668a62f..4509d8ba6 100644 --- a/sapl/legacy/migracao.py +++ b/sapl/legacy/migracao.py @@ -1,4 +1,8 @@ import subprocess +from getpass import getpass + +import requests +from unipath import Path from sapl.legacy.migracao_dados import (REPO, TAG_MARCO, gravar_marco, info, migrar_dados) @@ -6,6 +10,7 @@ from sapl.legacy.migracao_documentos import migrar_documentos from sapl.legacy.migracao_usuarios import migrar_usuarios from sapl.legacy.scripts.exporta_zope.variaveis_comuns import TAG_ZOPE from sapl.legacy_migration_settings import DIR_REPO, NOME_BANCO_LEGADO +from sapl.materia.models import Proposicao def adornar_msg(msg): @@ -45,3 +50,38 @@ def gerar_pacote(): arq_tar.remove() subprocess.check_output(['tar', 'cfh', arq_tar, '-C', DIR_REPO, 'sapl']) print('SUCESSO') + + +PROPOSICAO_UPLOAD_TO = Proposicao._meta.get_field('texto_original').upload_to + + +def salva_conteudo_do_sde(proposicao, conteudo): + caminho_relativo = PROPOSICAO_UPLOAD_TO( + proposicao, 'proposicao_sde_{}.xml'.format(proposicao.pk)) + caminho_absoluto = Path(REPO.working_dir, caminho_relativo) + caminho_absoluto.parent.mkdir(parents=True) + with open(caminho_absoluto, 'wb') as arq: + arq.write(conteudo) + proposicao.texto_original = caminho_relativo + proposicao.save() + + +def scrap_sde(url, usuario, senha=None): + if not senha: + senha = getpass() + + # login + session = requests.session() + res = session.post('{}?retry=1'.format(url), + {'__ac_name': usuario, '__ac_password': senha}) + assert res.status_code == 200 + + url_proposicao = '{}/sapl_documentos/proposicao/{}/renderXML?xsl=__default__' # noqa + total = Proposicao.objects.count() + for num, proposicao in enumerate(Proposicao.objects.all()): + pk = proposicao.pk + res = session.get(url_proposicao.format(url, pk)) + print("pk: {} status: {} (progresso: {:.2%})".format( + pk, res.status_code, num / total)) + if res.status_code == 200: + salva_conteudo_do_sde(proposicao, res.content)