From 022c6f478d4ca5b87b979139fe35a46c36461c07 Mon Sep 17 00:00:00 2001 From: Marcio Mazza Date: Thu, 26 Apr 2018 17:35:45 -0300 Subject: [PATCH 1/6] Remover pacote de migracao antes de gerar --- sapl/legacy/migracao.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sapl/legacy/migracao.py b/sapl/legacy/migracao.py index 92dd439e1..66668a62f 100644 --- a/sapl/legacy/migracao.py +++ b/sapl/legacy/migracao.py @@ -31,6 +31,7 @@ def gerar_pacote(): # backup do banco print('Gerando backup do banco... ', end='', flush=True) arq_backup = DIR_REPO.child('{}.backup'.format(NOME_BANCO_LEGADO)) + arq_backup.remove() backup_cmd = ''' pg_dump --host localhost --port 5432 --username postgres --no-password --format custom --blobs --verbose --file {} {}'''.format( @@ -41,5 +42,6 @@ def gerar_pacote(): # tar de media/sapl print('Criando tar de media... ', end='', flush=True) arq_tar = DIR_REPO.child('{}.media.tar'.format(NOME_BANCO_LEGADO)) + arq_tar.remove() subprocess.check_output(['tar', 'cfh', arq_tar, '-C', DIR_REPO, 'sapl']) print('SUCESSO') From 47d295c90c8dbe09edf3d026fe2e25f1a6c6e2cf Mon Sep 17 00:00:00 2001 From: Marcio Mazza Date: Fri, 27 Apr 2018 15:23:23 -0300 Subject: [PATCH 2/6] =?UTF-8?q?Adiciona=20scraping=20de=20proposi=C3=A7?= =?UTF-8?q?=C3=B5es=20do=20SDE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- sapl/base/urls.py | 4 ++-- sapl/legacy/migracao.py | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/sapl/base/urls.py b/sapl/base/urls.py index 849c18131..b52d46ad6 100644 --- a/sapl/base/urls.py +++ b/sapl/base/urls.py @@ -123,11 +123,11 @@ urlpatterns = [ url(r'^sistema/search/', SaplSearchView(), name='haystack_search'), # Folhas XSLT e extras referenciadas por documentos migrados do sapl 2.5 - url(r'^sapl/XSLT/HTML/(?P.*)$', RedirectView.as_view( + url(r'^(sapl/)?XSLT/HTML/(?P.*)$', RedirectView.as_view( url=os.path.join(MEDIA_URL, 'sapl/public/XSLT/HTML/%(path)s'), permanent=False)), # url do logotipo usada em documentos migrados do sapl 2.5 - url(r'^sapl/sapl_documentos/props_sapl/logo_casa', + url(r'^(sapl/)?sapl_documentos/props_sapl/logo_casa', LogotipoView.as_view(), name='logotipo'), diff --git a/sapl/legacy/migracao.py b/sapl/legacy/migracao.py index 66668a62f..4509d8ba6 100644 --- a/sapl/legacy/migracao.py +++ b/sapl/legacy/migracao.py @@ -1,4 +1,8 @@ import subprocess +from getpass import getpass + +import requests +from unipath import Path from sapl.legacy.migracao_dados import (REPO, TAG_MARCO, gravar_marco, info, migrar_dados) @@ -6,6 +10,7 @@ from sapl.legacy.migracao_documentos import migrar_documentos from sapl.legacy.migracao_usuarios import migrar_usuarios from sapl.legacy.scripts.exporta_zope.variaveis_comuns import TAG_ZOPE from sapl.legacy_migration_settings import DIR_REPO, NOME_BANCO_LEGADO +from sapl.materia.models import Proposicao def adornar_msg(msg): @@ -45,3 +50,38 @@ def gerar_pacote(): arq_tar.remove() subprocess.check_output(['tar', 'cfh', arq_tar, '-C', DIR_REPO, 'sapl']) print('SUCESSO') + + +PROPOSICAO_UPLOAD_TO = Proposicao._meta.get_field('texto_original').upload_to + + +def salva_conteudo_do_sde(proposicao, conteudo): + caminho_relativo = PROPOSICAO_UPLOAD_TO( + proposicao, 'proposicao_sde_{}.xml'.format(proposicao.pk)) + caminho_absoluto = Path(REPO.working_dir, caminho_relativo) + caminho_absoluto.parent.mkdir(parents=True) + with open(caminho_absoluto, 'wb') as arq: + arq.write(conteudo) + proposicao.texto_original = caminho_relativo + proposicao.save() + + +def scrap_sde(url, usuario, senha=None): + if not senha: + senha = getpass() + + # login + session = requests.session() + res = session.post('{}?retry=1'.format(url), + {'__ac_name': usuario, '__ac_password': senha}) + assert res.status_code == 200 + + url_proposicao = '{}/sapl_documentos/proposicao/{}/renderXML?xsl=__default__' # noqa + total = Proposicao.objects.count() + for num, proposicao in enumerate(Proposicao.objects.all()): + pk = proposicao.pk + res = session.get(url_proposicao.format(url, pk)) + print("pk: {} status: {} (progresso: {:.2%})".format( + pk, res.status_code, num / total)) + if res.status_code == 200: + salva_conteudo_do_sde(proposicao, res.content) From 2c034a84b393ff8a62504bf32fa4837e15c32d0c Mon Sep 17 00:00:00 2001 From: Marcio Mazza Date: Thu, 3 May 2018 10:57:33 -0300 Subject: [PATCH 3/6] =?UTF-8?q?Imprime=20arquivos=20anteriores=20na=20expo?= =?UTF-8?q?rta=C3=A7=C3=A3o=20de=20docs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- sapl/legacy/scripts/exporta_zope/exporta_zope.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sapl/legacy/scripts/exporta_zope/exporta_zope.py b/sapl/legacy/scripts/exporta_zope/exporta_zope.py index c7442654e..e84a4d13b 100755 --- a/sapl/legacy/scripts/exporta_zope/exporta_zope.py +++ b/sapl/legacy/scripts/exporta_zope/exporta_zope.py @@ -333,7 +333,9 @@ def build_salvar(repo): def salvar(fullname, conteudo): sha = hashlib.sha256() sha.update(conteudo) - if sha.hexdigest() not in hashes: + if sha.hexdigest() in hashes: + print('- hash encontrado - {}'.format(fullname)) + else: fullname = ajusta_extensao(fullname, conteudo) if os.path.exists(fullname): # destrava arquivo pré-existente (o conteúdo mudou) @@ -355,7 +357,7 @@ def dump_sapl(sigla): destino.mkdir(parents=True) repo = git.Repo.init(destino) if TAG_ZOPE in repo.tags: - info('A exportação de documentos já está feita.') + print('A exportação de documentos já está feita -- abortando') return repo_execute(repo, 'git annex init') From 0690a77bab64901a3d95466c9c28ebee8c394af8 Mon Sep 17 00:00:00 2001 From: Marcio Mazza Date: Thu, 3 May 2018 11:51:32 -0300 Subject: [PATCH 4/6] =?UTF-8?q?Continua=20exporta=C3=A7=C3=A3o=20de=20docs?= =?UTF-8?q?=20mesmo=20c=20tipos=20n=C3=A3o=20reconhecidos?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- sapl/legacy/scripts/exporta_zope/exporta_zope.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/sapl/legacy/scripts/exporta_zope/exporta_zope.py b/sapl/legacy/scripts/exporta_zope/exporta_zope.py index e84a4d13b..a75200949 100755 --- a/sapl/legacy/scripts/exporta_zope/exporta_zope.py +++ b/sapl/legacy/scripts/exporta_zope/exporta_zope.py @@ -78,20 +78,21 @@ def br(obj): def guess_extension(fullname, buffer): mime = magic.from_buffer(buffer, mime=True) - try: - return EXTENSOES[mime] - except KeyError as e: + extensao = EXTENSOES.get(mime) + if extensao is not None: + return extensao + else: possibilidades = '\n'.join( [" '{}': '{}',".format(mime, ext) for ext in mimetypes.guess_all_extensions(mime)]) - msg = '''Extensão não conhecida para o arquivo: {} + print('''Extensão não conhecida para o arquivo: {} e mimetype: {} Algumas possibilidades são: {} Atualize o código do dicionário EXTENSOES! '''.format(fullname, mime, possibilidades) - print(msg) - raise Exception(msg, e) + ) + return '.DESCONHECIDO.{}'.format(mime.replace('/', '__')) def get_conteudo_file(doc): From 0226923a668070c767288b7cd9377f0fd77444ed Mon Sep 17 00:00:00 2001 From: Marcio Mazza Date: Thu, 3 May 2018 17:55:58 -0300 Subject: [PATCH 5/6] Reporta contagens diferentes na btree ao exportar docs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ao invés de lancçar exceção --- sapl/legacy/scripts/exporta_zope/exporta_zope.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sapl/legacy/scripts/exporta_zope/exporta_zope.py b/sapl/legacy/scripts/exporta_zope/exporta_zope.py index a75200949..63d0ed09e 100755 --- a/sapl/legacy/scripts/exporta_zope/exporta_zope.py +++ b/sapl/legacy/scripts/exporta_zope/exporta_zope.py @@ -155,7 +155,11 @@ def enumerate_btree(folder): obj, meta_type = br(obj), type(obj).__name__ yield id, obj, meta_type # verificação de consistência - assert contagem_esperada == contagem_real + if contagem_esperada != contagem_real: + print('ATENÇÃO: contagens diferentes na btree: ' + '{} esperada: {} real: {}'.format(folder, + contagem_esperada, + contagem_real)) nao_identificados = defaultdict(list) From 71665929c558820e7ee5509228c55f4acc46a597 Mon Sep 17 00:00:00 2001 From: Marcio Mazza Date: Thu, 3 May 2018 19:14:58 -0300 Subject: [PATCH 6/6] Grava marco com dump do postgres --- sapl/legacy/migracao.py | 14 +------------- sapl/legacy/migracao_dados.py | 12 ++++++++++++ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/sapl/legacy/migracao.py b/sapl/legacy/migracao.py index 4509d8ba6..059e49164 100644 --- a/sapl/legacy/migracao.py +++ b/sapl/legacy/migracao.py @@ -28,21 +28,9 @@ def migrar(interativo=False): migrar_usuarios(REPO.working_dir) migrar_documentos(REPO) gravar_marco() - gerar_pacote() -def gerar_pacote(): - - # backup do banco - print('Gerando backup do banco... ', end='', flush=True) - arq_backup = DIR_REPO.child('{}.backup'.format(NOME_BANCO_LEGADO)) - arq_backup.remove() - backup_cmd = ''' - pg_dump --host localhost --port 5432 --username postgres --no-password - --format custom --blobs --verbose --file {} {}'''.format( - arq_backup, NOME_BANCO_LEGADO) - subprocess.check_output(backup_cmd.split(), stderr=subprocess.DEVNULL) - print('SUCESSO') +def compactar_media(): # tar de media/sapl print('Criando tar de media... ', end='', flush=True) diff --git a/sapl/legacy/migracao_dados.py b/sapl/legacy/migracao_dados.py index e40406447..0f1e4d6dc 100644 --- a/sapl/legacy/migracao_dados.py +++ b/sapl/legacy/migracao_dados.py @@ -1,6 +1,7 @@ import datetime import os import re +import subprocess import traceback from collections import OrderedDict, defaultdict, namedtuple from datetime import date @@ -1333,6 +1334,17 @@ def gravar_marco(): with open(nome_arq, 'w') as arq: pyaml.dump(data, arq) + # backup do banco + print('Gerando backup do banco... ', end='', flush=True) + arq_backup = DIR_REPO.child('{}.backup'.format(NOME_BANCO_LEGADO)) + arq_backup.remove() + backup_cmd = ''' + pg_dump --host localhost --port 5432 --username postgres --no-password + --format custom --blobs --verbose --file {} {}'''.format( + arq_backup, NOME_BANCO_LEGADO) + subprocess.check_output(backup_cmd.split(), stderr=subprocess.DEVNULL) + print('SUCESSO') + # salva mudanças REPO.git.add([dir_dados.name]) if 'master' not in REPO.heads or REPO.index.diff('HEAD'):