From c7da2ee597baf10e51ec33596358a7f38ffcd0be Mon Sep 17 00:00:00 2001 From: ulyssesBML Date: Mon, 26 Aug 2019 12:03:00 -0300 Subject: [PATCH 1/5] Iniciando script --- scripts/deduplica_comissao.py | 43 +++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 scripts/deduplica_comissao.py diff --git a/scripts/deduplica_comissao.py b/scripts/deduplica_comissao.py new file mode 100644 index 000000000..0c10acede --- /dev/null +++ b/scripts/deduplica_comissao.py @@ -0,0 +1,43 @@ + +from sapl.comissoes.models import Comissao, Composicao +from sapl.materia.models import DespachoInicial, Relatoria, UnidadeTramitacao + +from difflib import SequenceMatcher + +models_dependentes = [Composicao, DespachoInicial, Relatoria, UnidadeTramitacao] + +def similar(a, b): + return SequenceMatcher(None, a, b).ratio() + +def detecta_duplicados(): + lst_duplicados = [] + comissoes = Comissao.objects.all().order_by('id') + for c_1 in comissoes: + c_1_lst = [] + for c_2 in comissoes: + if similar(c_1.nome,c_2.nome) > 0.9 and c_1.id != c_2.id: + c_1_lst.append(c_2) + comissoes = comissoes.exclude(id=c_2.id) + if c_1_lst: + c_1_lst.append(c_1) + comissoes = comissoes.exclude(id=c_1.id) + lst_duplicados.append(c_1_lst) + return lst_duplicados + +def junta_dulpicados(duplicados): + principal = duplicados[-1] + for c in duplicados[:-1]: + for m in models_dependentes: + for obj in m.objects.filter(comissao=c): + obj.comissao = principal + obj.save() + c.delete() + + +def main(): + lst_duplicados = detecta_duplicados() + for c in lst_duplicados: + junta_dulpicados(c) + +if __name__ == '__main__': + main() From 7967aa764a6e79605c96dd9e34d199d33d13d700 Mon Sep 17 00:00:00 2001 From: ulyssesBML Date: Tue, 27 Aug 2019 15:13:43 -0300 Subject: [PATCH 2/5] Redirecionando autores --- scripts/deduplica_comissao.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/scripts/deduplica_comissao.py b/scripts/deduplica_comissao.py index 0c10acede..29e1313b6 100644 --- a/scripts/deduplica_comissao.py +++ b/scripts/deduplica_comissao.py @@ -1,10 +1,10 @@ -from sapl.comissoes.models import Comissao, Composicao +from sapl.comissoes.models import Comissao, Composicao, Reuniao from sapl.materia.models import DespachoInicial, Relatoria, UnidadeTramitacao from difflib import SequenceMatcher -models_dependentes = [Composicao, DespachoInicial, Relatoria, UnidadeTramitacao] +models_dependentes = [Composicao, DespachoInicial, Relatoria, UnidadeTramitacao, Reuniao] def similar(a, b): return SequenceMatcher(None, a, b).ratio() @@ -24,14 +24,23 @@ def detecta_duplicados(): lst_duplicados.append(c_1_lst) return lst_duplicados + +def muda_autor(principal, secundaria): + for autor in secundaria.autor.all(): + autor.delete() + +def muda_models_dependentes(principal,secundaria): + for model in models_dependentes: + for obj in model.objects.filter(comissao=secundaria): + obj.comissao = principal + obj.save() + def junta_dulpicados(duplicados): principal = duplicados[-1] - for c in duplicados[:-1]: - for m in models_dependentes: - for obj in m.objects.filter(comissao=c): - obj.comissao = principal - obj.save() - c.delete() + for secundaria in duplicados[:-1]: + muda_models_dependentes(principal,secundaria) + muda_autor(principal, secundaria) + secundaria.delete() def main(): From cf12c320cf6e5f171a2d1d9ac4a6f29929e0537c Mon Sep 17 00:00:00 2001 From: ulyssesBML Date: Fri, 30 Aug 2019 12:36:26 -0300 Subject: [PATCH 3/5] =?UTF-8?q?Adcionando=20casos=20com=20diferentes=20reg?= =?UTF-8?q?ras=20de=20realoca=C3=A7=C3=A3o?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/deduplica_comissao.py | 64 +++++++++++++++++++++++++++++++---- 1 file changed, 57 insertions(+), 7 deletions(-) diff --git a/scripts/deduplica_comissao.py b/scripts/deduplica_comissao.py index 29e1313b6..94b29cb58 100644 --- a/scripts/deduplica_comissao.py +++ b/scripts/deduplica_comissao.py @@ -1,6 +1,7 @@ from sapl.comissoes.models import Comissao, Composicao, Reuniao from sapl.materia.models import DespachoInicial, Relatoria, UnidadeTramitacao +from sapl.utils import intervalos_tem_intersecao from difflib import SequenceMatcher @@ -25,26 +26,75 @@ def detecta_duplicados(): return lst_duplicados -def muda_autor(principal, secundaria): - for autor in secundaria.autor.all(): - autor.delete() +def realoca_autor(principal, secundaria): + autor_principal = principal.autor.first() + clone = secundaria.autor.first() + + for autoria in clone.autoria_set.all(): + autoria.autor_id = autor_principal + autoria.save() + + for proposicao in clone.proposicao_set.all(): + proposicao.autor_id = autor_principal + proposicao.save() + + for autorianorma in clone.autorianorma_set.all(): + autorianorma.autor_id = autor_principal + autorianorma.save() + + for documentoadministrativo in clone.documentoadministrativo_set.all(): + documentoadministrativo.autor_id = autor[0] + documentoadministrativo.save() + + for protocolo in clone.protocolo_set.all(): + protocolo.autor_id = autor_principal + protocolo.save() + + clone.delete() + +def redireciona_apaga(principal,secundaria,apaga): + if(apaga): + secundaria.delete() + else: + secundaria.comissao = principal + secundaria.save() + def muda_models_dependentes(principal,secundaria): for model in models_dependentes: - for obj in model.objects.filter(comissao=secundaria): - obj.comissao = principal - obj.save() + for obj_secundario in model.objects.filter(comissao=secundaria): + for obj_principal in model.objects.filter(comissao=principal): + apaga = False + + if model == Composicao: + apaga = intervalos_tem_intersecao(obj_principal.periodo.data_inicio, + obj_principal.periodo.data_fim, + obj_secundario.periodo.data_inicio, + obj_secundario.periodo.data_fim) + + elif model == DespachoInicial: + apaga = obj_principal.materia == obj_secundario.materia + + elif model == Reuniao: + apaga = obj_principal.numero == obj_secundario.numero + + else: + apaga = False + + redireciona_apaga(obj_principal, obj_secundario, apaga) def junta_dulpicados(duplicados): principal = duplicados[-1] for secundaria in duplicados[:-1]: muda_models_dependentes(principal,secundaria) - muda_autor(principal, secundaria) + realoca_autor(principal, secundaria) secundaria.delete() def main(): lst_duplicados = detecta_duplicados() + print('Duplicados encomtrados:\n') + print(lst_duplicados) for c in lst_duplicados: junta_dulpicados(c) From 3a44d77603e403d3a8a94a104fc43660b8e8cfb8 Mon Sep 17 00:00:00 2001 From: ulyssesBML Date: Thu, 5 Sep 2019 15:27:42 -0300 Subject: [PATCH 4/5] Corrigindo script --- scripts/deduplica_comissao.py | 50 ++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/scripts/deduplica_comissao.py b/scripts/deduplica_comissao.py index 94b29cb58..fd9a4c679 100644 --- a/scripts/deduplica_comissao.py +++ b/scripts/deduplica_comissao.py @@ -1,5 +1,5 @@ -from sapl.comissoes.models import Comissao, Composicao, Reuniao +from sapl.comissoes.models import Comissao, Composicao, Reuniao, Participacao from sapl.materia.models import DespachoInicial, Relatoria, UnidadeTramitacao from sapl.utils import intervalos_tem_intersecao @@ -51,37 +51,43 @@ def realoca_autor(principal, secundaria): protocolo.save() clone.delete() - -def redireciona_apaga(principal,secundaria,apaga): - if(apaga): - secundaria.delete() - else: - secundaria.comissao = principal - secundaria.save() - + def muda_models_dependentes(principal,secundaria): for model in models_dependentes: for obj_secundario in model.objects.filter(comissao=secundaria): + repetido = False for obj_principal in model.objects.filter(comissao=principal): - apaga = False - if model == Composicao: - apaga = intervalos_tem_intersecao(obj_principal.periodo.data_inicio, + if model == Composicao and intervalos_tem_intersecao(obj_principal.periodo.data_inicio, obj_principal.periodo.data_fim, obj_secundario.periodo.data_inicio, - obj_secundario.periodo.data_fim) - - elif model == DespachoInicial: - apaga = obj_principal.materia == obj_secundario.materia - - elif model == Reuniao: - apaga = obj_principal.numero == obj_secundario.numero - + obj_secundario.periodo.data_fim): + + prim_participacoes = Participacao.objects.filter(composicao=obj_principal) + sec_participacoes = Participacao.objects.filter(composicao=obj_secundario) + for p in sec_participacoes: + if p in prim_participacoes: + p.delete() + else: + p.composicao = obj_principal + p.save() + + elif model == DespachoInicial and obj_principal.materia == obj_secundario.materia: + repetido =True + elif model == Reuniao and obj_principal.numero == obj_secundario.numero: + repetido =True else: - apaga = False + repetido = False + + if(repetido): + obj_secundario.comissao = None + obj_secundario.delete() + + else: + obj_secundario.comissao = principal + obj_secundario.save() - redireciona_apaga(obj_principal, obj_secundario, apaga) def junta_dulpicados(duplicados): principal = duplicados[-1] From 80836ae7e6dfa884b6c1e700a4b9d2a532ebc16b Mon Sep 17 00:00:00 2001 From: ulyssesBML Date: Mon, 21 Oct 2019 11:40:15 -0300 Subject: [PATCH 5/5] Concertando apagar autor --- scripts/deduplica_comissao.py | 47 +++++++++++++++++------------------ 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/scripts/deduplica_comissao.py b/scripts/deduplica_comissao.py index fd9a4c679..abb26ba2d 100644 --- a/scripts/deduplica_comissao.py +++ b/scripts/deduplica_comissao.py @@ -27,30 +27,29 @@ def detecta_duplicados(): def realoca_autor(principal, secundaria): - autor_principal = principal.autor.first() - clone = secundaria.autor.first() - - for autoria in clone.autoria_set.all(): - autoria.autor_id = autor_principal - autoria.save() - - for proposicao in clone.proposicao_set.all(): - proposicao.autor_id = autor_principal - proposicao.save() - - for autorianorma in clone.autorianorma_set.all(): - autorianorma.autor_id = autor_principal - autorianorma.save() - - for documentoadministrativo in clone.documentoadministrativo_set.all(): - documentoadministrativo.autor_id = autor[0] - documentoadministrativo.save() - - for protocolo in clone.protocolo_set.all(): - protocolo.autor_id = autor_principal - protocolo.save() - - clone.delete() + autor_principal = principal.autor.first() + autor_secundario = secundaria.autor.first() + for autoria in autor_secundario.autoria_set.all(): + autoria.autor_id = autor_principal + autoria.save() + + for proposicao in autor_secundario.proposicao_set.all(): + proposicao.autor_id = autor_principal + proposicao.save() + + for autorianorma in autor_secundario.autorianorma_set.all(): + autorianorma.autor_id = autor_principal + autorianorma.save() + + for documentoadministrativo in autor_secundario.documentoadministrativo_set.all(): + documentoadministrativo.autor_id = autor_principal + documentoadministrativo.save() + + for protocolo in autor_secundario.protocolo_set.all(): + protocolo.autor_id = autor_principal + protocolo.save() + + autor_secundario.delete() def muda_models_dependentes(principal,secundaria):