mirror of https://github.com/interlegis/sapl.git
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
165 lines
4.4 KiB
165 lines
4.4 KiB
# -*- coding: utf-8 -*-
|
|
|
|
# IMPORTANTE:
|
|
# Esse script precisa rodar em python 2
|
|
# e depende apenas do descrito no arquivo requiments.txt
|
|
|
|
import os.path
|
|
from collections import defaultdict
|
|
from functools import partial
|
|
|
|
import ZODB.DB
|
|
import ZODB.FileStorage
|
|
from ZODB.broken import Broken
|
|
|
|
EXTENSOES = {
|
|
'application/msword': '.doc',
|
|
'application/pdf': '.pdf',
|
|
'application/vnd.oasis.opendocument.text': '.odt',
|
|
'application/vnd.openxmlformats-officedocument.wordprocessingml.document': '.docx', # noqa
|
|
'application/xml': '.xml',
|
|
'text/xml': '.xml',
|
|
'application/zip': '.zip',
|
|
'image/jpeg': '.jpeg',
|
|
'image/png': '.png',
|
|
'image/gif': '.gif',
|
|
'text/html': '.html',
|
|
'text/rtf': '.rtf',
|
|
'text/x-python': '.py',
|
|
'text/plain': '.txt',
|
|
'SDE-Document': 'xml',
|
|
|
|
# TODO rever...
|
|
'text/richtext': '.rtf',
|
|
|
|
# sem extensao
|
|
'application/octet-stream': '', # binario
|
|
'inode/x-empty': '', # vazio
|
|
'text/x-unknown-content-type': '',
|
|
}
|
|
|
|
|
|
def br(obj):
|
|
if isinstance(obj, Broken):
|
|
return obj.__Broken_state__
|
|
else:
|
|
return obj
|
|
|
|
|
|
def dump_file(doc, path):
|
|
name = doc['__name__']
|
|
extension = EXTENSOES[doc['content_type']]
|
|
fullname = os.path.join(path, name + extension)
|
|
print(fullname)
|
|
|
|
pdata = br(doc['data'])
|
|
if isinstance(pdata, str):
|
|
# Retrocedemos se pdata ja eh uma str (necessario em Images)
|
|
pdata = doc
|
|
|
|
with open(fullname, 'w') as arq:
|
|
while pdata:
|
|
arq.write(pdata['data'])
|
|
pdata = br(pdata.get('next', None))
|
|
return name
|
|
|
|
|
|
nao_identificados = defaultdict(list)
|
|
|
|
|
|
def enumerate_folder(folder):
|
|
# folder vazio nao tem _objects
|
|
for entry in folder.get('_objects', []):
|
|
id, meta_type = entry['id'], entry['meta_type']
|
|
obj = br(folder[id])
|
|
yield id, obj, meta_type
|
|
|
|
|
|
def enumerate_btree(folder):
|
|
contagem_esperada = folder['_count'].value
|
|
tree = folder['_tree']
|
|
for contagem_real, (id, obj) in enumerate(tree.iteritems(), start=1):
|
|
obj, meta_type = br(obj), type(obj).__name__
|
|
yield id, obj, meta_type
|
|
# verificação de consistência
|
|
assert contagem_esperada == contagem_real
|
|
|
|
|
|
def dump_folder(folder, path='', enum=enumerate_folder):
|
|
name = folder['id']
|
|
path = os.path.join(path, name)
|
|
if not os.path.exists(path):
|
|
os.makedirs(path)
|
|
for id, obj, meta_type in enum(folder):
|
|
dump = DUMP_FUNCTIONS.get(meta_type, '?')
|
|
if dump == '?':
|
|
nao_identificados[meta_type].append(path + '/' + id)
|
|
elif dump:
|
|
id_interno = dump(obj, path)
|
|
assert id == id_interno
|
|
return name
|
|
|
|
|
|
DUMP_FUNCTIONS = {
|
|
'File': dump_file,
|
|
'Image': dump_file,
|
|
'Folder': partial(dump_folder, enum=enumerate_folder),
|
|
'BTreeFolder2': partial(dump_folder, enum=enumerate_btree),
|
|
|
|
# explicitamente ignorados
|
|
'ZCatalog': None,
|
|
'Dumper': None,
|
|
}
|
|
|
|
|
|
def get_app(data_fs_path):
|
|
storage = ZODB.FileStorage.FileStorage(data_fs_path)
|
|
db = ZODB.DB(storage)
|
|
connection = db.open()
|
|
root = connection.root()
|
|
app = br(root['Application'])
|
|
|
|
def close_db():
|
|
db.close()
|
|
|
|
return app, close_db
|
|
|
|
|
|
def find_sapl(app):
|
|
for obj in app['_objects']:
|
|
id, meta_type = obj['id'], obj['meta_type']
|
|
if id.startswith('cm_') and meta_type == 'Folder':
|
|
cm_zzz = br(app[id])
|
|
sapl = br(cm_zzz.get('sapl', None))
|
|
if sapl and 'sapl_documentos' in sapl and 'acl_users' in sapl:
|
|
return sapl
|
|
|
|
|
|
def dump_sapl(data_fs_path):
|
|
app, close_db = get_app(data_fs_path)
|
|
try:
|
|
sapl = find_sapl(app)
|
|
docs = br(sapl['sapl_documentos'])
|
|
|
|
nao_identificados.clear()
|
|
dump_folder(docs)
|
|
if nao_identificados:
|
|
print('#' * 80)
|
|
print('#' * 80)
|
|
print(u'FORAM ENCONTRADOS ARQUIVOS DE FORMATO NÃO IDENTIFICADO!!!')
|
|
print(u'REFAÇA A EXPORTAÇÃO\n')
|
|
print(nao_identificados)
|
|
print('#' * 80)
|
|
print('#' * 80)
|
|
finally:
|
|
close_db()
|
|
|
|
|
|
def dump_propriedades(docs):
|
|
props_sapl = br(docs['props_sapl'])
|
|
ids = [p['id'] for p in props_sapl['_properties']]
|
|
props = {id: props_sapl[id] for id in ids}
|
|
props = {id: p.decode('iso-8859-1') if isinstance(p, str) else p
|
|
for id, p in props.items()}
|
|
with open('sapl_documentos/propriedades.yaml', 'w') as f:
|
|
f.write(yaml.safe_dump(props))
|
|
|