From ce1c75860062da34a70753a1e845530fb218c494 Mon Sep 17 00:00:00 2001 From: Marcio Mazza Date: Fri, 10 Nov 2017 11:58:53 -0200 Subject: [PATCH] =?UTF-8?q?Adiciona=20script=20de=20exporta=C3=A7=C3=A3o?= =?UTF-8?q?=20de=20docs=20do=20zope?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- sapl/legacy/scripts/exporta_zope/.gitignore | 2 + .../scripts/exporta_zope/exporta_zope.py | 151 ++++++++++++++++++ .../scripts/exporta_zope/requirements.txt | 2 + 3 files changed, 155 insertions(+) create mode 100644 sapl/legacy/scripts/exporta_zope/.gitignore create mode 100644 sapl/legacy/scripts/exporta_zope/exporta_zope.py create mode 100644 sapl/legacy/scripts/exporta_zope/requirements.txt diff --git a/sapl/legacy/scripts/exporta_zope/.gitignore b/sapl/legacy/scripts/exporta_zope/.gitignore new file mode 100644 index 000000000..98561c589 --- /dev/null +++ b/sapl/legacy/scripts/exporta_zope/.gitignore @@ -0,0 +1,2 @@ +Data*.fs* +sapl_documentos diff --git a/sapl/legacy/scripts/exporta_zope/exporta_zope.py b/sapl/legacy/scripts/exporta_zope/exporta_zope.py new file mode 100644 index 000000000..618c9ec5c --- /dev/null +++ b/sapl/legacy/scripts/exporta_zope/exporta_zope.py @@ -0,0 +1,151 @@ +# -*- coding: utf-8 -*- + +# IMPORTANTE: +# Esse script precisa rodar em python 2 +# e depende apenas do descrito no arquivo requiments.txt + +import os.path +from collections import defaultdict +from functools import partial + +import ZODB.DB +import ZODB.FileStorage +from ZODB.broken import Broken + +EXTENSOES = { + 'application/msword': '.doc', + 'application/pdf': '.pdf', + 'application/vnd.oasis.opendocument.text': '.odt', + 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': '.docx', # noqa + 'application/xml': '.xml', + 'text/xml': '.xml', + 'application/zip': '.zip', + 'image/jpeg': '.jpeg', + 'image/png': '.png', + 'image/gif': '.gif', + 'text/html': '.html', + 'text/rtf': '.rtf', + 'text/x-python': '.py', + 'text/plain': '.txt', + 'SDE-Document': 'xml', + + # TODO rever... + 'text/richtext': '.rtf', + + # sem extensao + 'application/octet-stream': '', # binario + 'inode/x-empty': '', # vazio + 'text/x-unknown-content-type': '', +} + + +def br(obj): + if isinstance(obj, Broken): + return obj.__Broken_state__ + else: + return obj + + +def dump_file(doc, path): + name = doc['__name__'] + extension = EXTENSOES[doc['content_type']] + fullname = os.path.join(path, name + extension) + print(fullname) + + pdata = br(doc['data']) + if isinstance(pdata, str): + # Retrocedemos se pdata ja eh uma str (necessario em Images) + pdata = doc + + with open(fullname, 'w') as arq: + while pdata: + arq.write(pdata['data']) + pdata = br(pdata.get('next', None)) + return name + + +nao_identificados = defaultdict(list) + + +def enumerate_folder(folder): + # folder vazio nao tem _objects + for entry in folder.get('_objects', []): + id, meta_type = entry['id'], entry['meta_type'] + obj = br(folder[id]) + yield id, obj, meta_type + + +def enumerate_btree(folder): + tree = folder['_tree'] + for id, obj in tree.iteritems(): + obj, meta_type = br(obj), type(obj).__name__ + yield id, obj, meta_type + + +def dump_folder(folder, path='', enum=enumerate_folder): + name = folder['id'] + path = os.path.join(path, name) + if not os.path.exists(path): + os.makedirs(path) + for id, obj, meta_type in enum(folder): + dump = DUMP_FUNCTIONS.get(meta_type, '?') + if dump == '?': + nao_identificados[meta_type].append(path + '/' + id) + elif dump: + id_interno = dump(obj, path) + assert id == id_interno + return name + + +DUMP_FUNCTIONS = { + 'File': dump_file, + 'Image': dump_file, + 'Folder': partial(dump_folder, enum=enumerate_folder), + 'BTreeFolder2': partial(dump_folder, enum=enumerate_btree), + + # explicitamente ignorados + 'ZCatalog': None, + 'Dumper': None, +} + + +def get_app(data_fs_path): + storage = ZODB.FileStorage.FileStorage(data_fs_path) + db = ZODB.DB(storage) + connection = db.open() + root = connection.root() + app = br(root['Application']) + + def close_db(): + db.close() + + return app, close_db + + +def find_sapl(app): + [id] = [e['id'] for e in app['_objects'] + if e['id'].startswith('cm_') + and e['meta_type'] == 'Folder'] + cm_zzz = br(app[id]) + sapl = br(cm_zzz['sapl']) + return sapl + + +def dump_sapl(data_fs_path): + app, close_db = get_app(data_fs_path) + try: + sapl = find_sapl(app) + docs = br(sapl['sapl_documentos']) + + nao_identificados.clear() + dump_folder(docs) + if nao_identificados: + print('#' * 80) + print('#' * 80) + print(u'FORAM ENCONTRADOS ARQUIVOS DE FORMATO NÃO IDENTIFICADO!!!') + print(u'REFAÇA A EXPORTAÇÃO\n') + print(nao_identificados) + print('#' * 80) + print('#' * 80) + finally: + close_db() diff --git a/sapl/legacy/scripts/exporta_zope/requirements.txt b/sapl/legacy/scripts/exporta_zope/requirements.txt new file mode 100644 index 000000000..9b1afa6fb --- /dev/null +++ b/sapl/legacy/scripts/exporta_zope/requirements.txt @@ -0,0 +1,2 @@ +# ZODB version 3.7.4 +git+git://github.com/zopefoundation/ZODB.git@d6f3c3ce5e6df1060de7a3#egg=ZODB3