Skip to content

Commit 3e609ac

Browse files
authored
[ENG-8192] Ability to force archive registrations when OSFS Folders have changed (#11194)
## Purpose fix force archive for some actions ## Changes - divide revert_log_actions into separate functions - include trashed children when build a file tree - add additional retrieval logic for osf_storage_folder_created and osf_storage_file_removed - add generic retrieval fallback ## Ticket https://openscience.atlassian.net/browse/ENG-8192
1 parent 9aa22ee commit 3e609ac

File tree

2 files changed

+266
-34
lines changed

2 files changed

+266
-34
lines changed

osf/management/commands/force_archive.py

Lines changed: 72 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
from framework import sentry
3838
from framework.exceptions import HTTPError
3939
from osf.models import Node, NodeLog, Registration, BaseFileNode
40+
from osf.models.files import TrashedFileNode
4041
from osf.exceptions import RegistrationStuckRecoverableException, RegistrationStuckBrokenException
4142
from api.base.utils import waterbutler_api_url_for
4243
from scripts import utils as script_utils
@@ -280,48 +281,72 @@ def get_logs_to_revert(reg):
280281
Q(node=reg.registered_from) |
281282
(Q(params__source__nid=reg.registered_from._id) | Q(params__destination__nid=reg.registered_from._id))).order_by('-date')
282283

284+
def get_file_obj_from_log(log, reg):
285+
try:
286+
return BaseFileNode.objects.get(_id=log.params['urls']['view'].split('/')[4])
287+
except KeyError:
288+
if log.action == 'osf_storage_folder_created':
289+
return OsfStorageFolder.objects.get(
290+
target_object_id=reg.registered_from.id,
291+
name=log.params['path'].split('/')[-2]
292+
)
293+
elif log.action == 'osf_storage_file_removed':
294+
path = log.params['path'].split('/')
295+
return TrashedFileNode.objects.get(
296+
target_object_id=reg.registered_from.id,
297+
name=path[-1] or path[-2] # file name or folder name
298+
)
299+
elif log.action in ['addon_file_moved', 'addon_file_renamed']:
300+
try:
301+
return BaseFileNode.objects.get(_id=log.params['source']['path'].rstrip('/').split('/')[-1])
302+
except (KeyError, BaseFileNode.DoesNotExist):
303+
return BaseFileNode.objects.get(_id=log.params['destination']['path'].rstrip('/').split('/')[-1])
304+
else:
305+
# Generic fallback
306+
path = log.params.get('path', '').split('/')
307+
if len(path) >= 2:
308+
name = path[-1] or path[-2] # file name or folder name
309+
return BaseFileNode.objects.get(
310+
target_object_id=reg.registered_from.id,
311+
name=name
312+
)
313+
314+
raise ValueError(f'Cannot determine file obj for log {log._id} [Registration id {reg._id}]: {log.action}')
315+
316+
317+
def handle_file_operation(file_tree, reg, file_obj, log, obj_cache):
318+
logger.info(f'Reverting {log.action} {file_obj._id}:{file_obj.name} from {log.date}')
319+
320+
if log.action in ['osf_storage_file_added', 'osf_storage_folder_created']:
321+
return modify_file_tree_recursive(reg._id, file_tree, file_obj, deleted=True, cached=bool(file_obj._id in obj_cache))
322+
elif log.action == 'osf_storage_file_removed':
323+
return modify_file_tree_recursive(reg._id, file_tree, file_obj, deleted=False, cached=bool(file_obj._id in obj_cache))
324+
elif log.action == 'osf_storage_file_updated':
325+
return modify_file_tree_recursive(reg._id, file_tree, file_obj, cached=bool(file_obj._id in obj_cache), revert=True)
326+
elif log.action == 'addon_file_moved':
327+
parent = BaseFileNode.objects.get(_id__in=obj_cache, name='/{}'.format(log.params['source']['materialized']).rstrip('/').split('/')[-2])
328+
return modify_file_tree_recursive(reg._id, file_tree, file_obj, cached=bool(file_obj._id in obj_cache), move_under=parent)
329+
elif log.action == 'addon_file_renamed':
330+
return modify_file_tree_recursive(reg._id, file_tree, file_obj, cached=bool(file_obj._id in obj_cache), rename=log.params['source']['name'])
331+
else:
332+
raise ValueError(f'Unexpected log action: {log.action}')
333+
283334
def revert_log_actions(file_tree, reg, obj_cache, permissible_addons):
284335
logs_to_revert = get_logs_to_revert(reg)
336+
285337
if len(permissible_addons) > 1:
286338
logs_to_revert = logs_to_revert.exclude(action__in=PERMISSIBLE_BLACKLIST)
339+
287340
for log in list(logs_to_revert):
288-
try:
289-
file_obj = BaseFileNode.objects.get(_id=log.params['urls']['view'].split('/')[4])
290-
except KeyError:
291-
try:
292-
file_obj = BaseFileNode.objects.get(_id=log.params['source']['path'].rstrip('/').split('/')[-1])
293-
except BaseFileNode.DoesNotExist:
294-
# Bad log data
295-
file_obj = BaseFileNode.objects.get(_id=log.params['destination']['path'].rstrip('/').split('/')[-1])
341+
file_obj = get_file_obj_from_log(log, reg)
296342
assert file_obj.target in reg.registered_from.root.node_and_primary_descendants()
297-
if log.action == 'osf_storage_file_added':
298-
# Find and mark deleted
299-
logger.info(f'Reverting add {file_obj._id}:{file_obj.name} from {log.date}')
300-
file_tree, noop = modify_file_tree_recursive(reg._id, file_tree, file_obj, deleted=True, cached=bool(file_obj._id in obj_cache))
301-
elif log.action == 'osf_storage_file_removed':
302-
# Find parent and add to children, or undelete
303-
logger.info(f'Reverting delete {file_obj._id}:{file_obj.name} from {log.date}')
304-
file_tree, noop = modify_file_tree_recursive(reg._id, file_tree, file_obj, deleted=False, cached=bool(file_obj._id in obj_cache))
305-
elif log.action == 'osf_storage_file_updated':
306-
# Find file and revert version
307-
logger.info(f'Reverting update {file_obj._id}:{file_obj.name} from {log.date}')
308-
file_tree, noop = modify_file_tree_recursive(reg._id, file_tree, file_obj, cached=bool(file_obj._id in obj_cache), revert=True)
309-
elif log.action == 'osf_storage_folder_created':
310-
# Find folder and mark deleted
311-
logger.info(f'Reverting folder {file_obj._id}:{file_obj.name} from {log.date}')
312-
file_tree, noop = modify_file_tree_recursive(reg._id, file_tree, file_obj, deleted=True, cached=bool(file_obj._id in obj_cache))
313-
elif log.action == 'addon_file_moved':
314-
logger.info(f'Reverting move {file_obj._id}:{file_obj.name} from {log.date}')
315-
parent = BaseFileNode.objects.get(_id__in=obj_cache, name='/{}'.format(log.params['source']['materialized']).rstrip('/').split('/')[-2])
316-
file_tree, noop = modify_file_tree_recursive(reg._id, file_tree, file_obj, cached=bool(file_obj._id in obj_cache), move_under=parent)
317-
elif log.action == 'addon_file_renamed':
318-
logger.info('Reverting rename {}:{} -> {} from {}'.format(file_obj._id, log.params['source']['name'], file_obj.name, log.date))
319-
file_tree, noop = modify_file_tree_recursive(reg._id, file_tree, file_obj, cached=bool(file_obj._id in obj_cache), rename=log.params['source']['name'])
320-
else:
321-
raise Exception(f'Unexpected log action: {log.action}')
343+
344+
file_tree, noop = handle_file_operation(file_tree, reg, file_obj, log, obj_cache)
322345
assert not noop, f'{reg._id}: Failed to revert action for NodeLog {log._id}'
346+
323347
if file_obj._id not in obj_cache:
324348
obj_cache.add(file_obj._id)
349+
325350
return file_tree
326351

327352
def build_file_tree(reg, node_settings, *args, **kwargs):
@@ -337,7 +362,20 @@ def _recurse(file_obj, node):
337362
'version': int(file_obj.versions.latest('created').identifier) if file_obj.versions.exists() else None
338363
}
339364
if not file_obj.is_file:
340-
serialized['children'] = [_recurse(child, node) for child in OsfStorageFileNode.objects.filter(target_object_id=node.id, target_content_type_id=ct_id, parent_id=file_obj.id)]
365+
nonlocal reg
366+
all_children = OsfStorageFileNode.objects.filter(
367+
target_object_id=node.id,
368+
target_content_type_id=ct_id,
369+
parent_id=file_obj.id
370+
).union(
371+
TrashedFileNode.objects.filter(
372+
target_object_id=node.id,
373+
target_content_type_id=ct_id,
374+
parent_id=file_obj.id,
375+
modified__gt=reg.archive_job.created,
376+
)
377+
)
378+
serialized['children'] = [_recurse(child, node) for child in all_children]
341379
return serialized
342380

343381
current_tree = _recurse(node_settings.get_root(), n)
Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
import pytest
2+
from django.utils import timezone
3+
4+
from addons.osfstorage.models import OsfStorageFile, OsfStorageFolder
5+
from osf.models import NodeLog, BaseFileNode
6+
from osf.models.files import TrashedFileNode, TrashedFolder
7+
from osf.management.commands.force_archive import get_file_obj_from_log, build_file_tree, DEFAULT_PERMISSIBLE_ADDONS
8+
from osf_tests.factories import NodeFactory, RegistrationFactory
9+
10+
11+
class TestGetFileObjFromLog:
12+
13+
@pytest.fixture
14+
def node(self):
15+
return NodeFactory(title='Test Node', category='project')
16+
17+
@pytest.fixture
18+
def reg(self, node):
19+
return RegistrationFactory(project=node, registered_date=timezone.now())
20+
21+
@pytest.mark.django_db
22+
def test_file_added(self, node, reg):
23+
file = OsfStorageFile.objects.create(target=node, name='file1.txt')
24+
file.save()
25+
log = NodeLog.objects.create(
26+
node=node,
27+
action='osf_storage_file_added',
28+
params={'urls': {'view': f'/{node._id}/files/osfstorage/{file._id}/'}},
29+
date=timezone.now(),
30+
)
31+
file_obj = get_file_obj_from_log(log, reg)
32+
assert isinstance(file_obj, BaseFileNode)
33+
assert file_obj == file
34+
35+
@pytest.mark.django_db
36+
def test_file_removed(self, node, reg):
37+
file = OsfStorageFile.create(target=node, name='trashed.txt')
38+
file.delete()
39+
log = NodeLog.objects.create(
40+
node=node,
41+
action='osf_storage_file_removed',
42+
params={'path': '/folder1/trashed.txt'},
43+
date=timezone.now(),
44+
)
45+
file_obj = get_file_obj_from_log(log, reg)
46+
assert isinstance(file_obj, TrashedFileNode)
47+
assert file_obj == file
48+
49+
folder = OsfStorageFolder.create(target=node, name='folder1')
50+
folder.delete()
51+
log = NodeLog.objects.create(
52+
node=node,
53+
action='osf_storage_file_removed',
54+
params={'path': '/folder1/'},
55+
date=timezone.now(),
56+
)
57+
file_obj = get_file_obj_from_log(log, reg)
58+
assert isinstance(file_obj, TrashedFolder)
59+
assert file_obj == folder
60+
61+
@pytest.mark.django_db
62+
def test_folder_created(self, node, reg):
63+
folder = OsfStorageFolder.create(target=node, name='folder1')
64+
folder.save()
65+
log = NodeLog.objects.create(
66+
node=node,
67+
action='osf_storage_folder_created',
68+
params={'path': '/folder1/'},
69+
date=timezone.now(),
70+
)
71+
file_obj = get_file_obj_from_log(log, reg)
72+
assert isinstance(file_obj, OsfStorageFolder)
73+
assert file_obj == folder
74+
75+
@pytest.mark.django_db
76+
def test_move_rename(self, node, reg):
77+
file = OsfStorageFile.create(target=node, name='file2.txt')
78+
file.save()
79+
log = NodeLog.objects.create(
80+
node=node,
81+
action='addon_file_renamed',
82+
params={
83+
'source': {'path': f'/{file._id}', 'name': 'file1.txt'},
84+
'destination': {'path': f'/{file._id}', 'name': 'file2.txt'}
85+
},
86+
date=timezone.now(),
87+
)
88+
file_obj = get_file_obj_from_log(log, reg)
89+
assert isinstance(file_obj, BaseFileNode)
90+
assert file_obj == file
91+
92+
@pytest.mark.django_db
93+
def test_generic_fallback(self, node, reg):
94+
file = OsfStorageFile.create(target=node, name='fallback.txt')
95+
file.save()
96+
log = NodeLog.objects.create(
97+
node=node,
98+
action='some_other_action',
99+
params={'path': '/fallback.txt'},
100+
date=timezone.now(),
101+
)
102+
file_obj = get_file_obj_from_log(log, reg)
103+
assert file_obj == file
104+
105+
106+
class TestBuildFileTree:
107+
108+
@pytest.fixture
109+
def node(self):
110+
return NodeFactory(title='Test Node', category='project')
111+
112+
@pytest.fixture
113+
def reg(self, node):
114+
return RegistrationFactory(project=node, registered_date=timezone.now())
115+
116+
@pytest.fixture
117+
def permissible_addons(self):
118+
return DEFAULT_PERMISSIBLE_ADDONS
119+
120+
@pytest.mark.django_db
121+
def test_empty_folder(self, node, reg, permissible_addons):
122+
folder = OsfStorageFolder.create(target=node, name='empty')
123+
folder.save()
124+
125+
class DummyNodeSettings:
126+
def get_root(self):
127+
return folder
128+
129+
node_settings = DummyNodeSettings()
130+
tree = build_file_tree(reg, node_settings, permissible_addons=permissible_addons)
131+
assert tree['object'] == folder
132+
assert tree['children'] == []
133+
134+
@pytest.mark.django_db
135+
def test_nested_folders(self, node, reg, permissible_addons):
136+
parent = OsfStorageFolder.create(target=node, name='parent')
137+
parent.save()
138+
139+
child = OsfStorageFolder.create(target=node, name='child')
140+
child.save()
141+
child.move_under(parent)
142+
143+
file = OsfStorageFile.objects.create(target=node, name='file1.txt')
144+
file.save()
145+
file.move_under(child)
146+
147+
class DummyNodeSettings:
148+
def get_root(self):
149+
return parent
150+
151+
node_settings = DummyNodeSettings()
152+
tree = build_file_tree(reg, node_settings, permissible_addons=permissible_addons)
153+
assert tree['object'] == parent
154+
155+
child_node = next((c for c in tree['children'] if c['object'] == child), None)
156+
assert child_node is not None
157+
assert any(grandchild['object'] == file for grandchild in child_node['children'])
158+
159+
@pytest.mark.django_db
160+
def test_active_and_trashed_children(self, node, reg, permissible_addons):
161+
folder = OsfStorageFolder.create(target=node, name='parent')
162+
folder.save()
163+
164+
file = OsfStorageFile.create(target=node, name='file1.txt')
165+
file.save()
166+
file.move_under(folder)
167+
168+
deleted_file = OsfStorageFile.create(target=node, name='file2.txt')
169+
deleted_file.save()
170+
deleted_file.move_under(folder)
171+
deleted_file.delete()
172+
173+
class DummyNodeSettings:
174+
def get_root(self):
175+
return folder
176+
177+
node_settings = DummyNodeSettings()
178+
tree = build_file_tree(reg, node_settings, permissible_addons=permissible_addons)
179+
assert tree['object'] == folder
180+
181+
names = [child['object'].name for child in tree['children']]
182+
assert 'file1.txt' in names
183+
assert 'file2.txt' in names
184+
185+
# make sure only valid thrashed file nodes are included
186+
new_reg = RegistrationFactory(project=node, registered_date=timezone.now())
187+
file.delete()
188+
189+
tree = build_file_tree(new_reg, node_settings, permissible_addons=permissible_addons)
190+
assert tree['object'] == folder
191+
192+
names = [child['object'].name for child in tree['children']]
193+
assert 'file1.txt' in names
194+
assert 'file2.txt' not in names

0 commit comments

Comments
 (0)