Skip to content

Commit 6259903

Browse files
authored
Add transformer for JSON strings (#7)
1 parent efb0b44 commit 6259903

File tree

4 files changed

+157
-0
lines changed

4 files changed

+157
-0
lines changed

localstack_snapshot/snapshots/prototype.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,8 @@ def _transform_dict_to_parseable_values(self, original):
272272
self._transform_dict_to_parseable_values(v)
273273

274274
if isinstance(v, str) and v.startswith("{"):
275+
# Doesn't handle JSON arrays and nested JSON strings. See JsonStringTransformer.
276+
# TODO for the major release consider having JSON parsing in one place only: either here or in JsonStringTransformer
275277
try:
276278
json_value = json.loads(v)
277279
original[k] = json_value

localstack_snapshot/snapshots/transformer.py

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
import copy
2+
import json
23
import logging
34
import os
45
import re
56
from datetime import datetime
7+
from json import JSONDecodeError
68
from re import Pattern
79
from typing import Any, Callable, Optional, Protocol
810

@@ -375,3 +377,78 @@ def replace_val(s):
375377
f"Registering text pattern '{self.text}' in snapshot with '{self.replacement}'"
376378
)
377379
return input_data
380+
381+
382+
class JsonStringTransformer:
383+
"""
384+
Parses JSON string at the specified key.
385+
Additionally, attempts to parse any JSON strings inside the parsed JSON
386+
387+
This transformer complements the default parsing of JSON strings in
388+
localstack_snapshot.snapshots.prototype.SnapshotSession._transform_dict_to_parseable_values
389+
390+
Shortcomings of the default parser that this transformer addresses:
391+
- parsing of nested JSON strings '{"a": "{\\"b\\":42}"}'
392+
- parsing of JSON arrays at the specified key, e.g. '["a", "b"]'
393+
394+
Such parsing allows applying transformations further to the elements of the parsed JSON - timestamps, ARNs, etc.
395+
396+
Such parsing is not done by default because it's not a common use case.
397+
Whether to parse a JSON string or not should be decided by the user on a case by case basis.
398+
Limited general parsing that we already have is preserved for backwards compatibility.
399+
"""
400+
401+
key: str
402+
403+
def __init__(self, key: str):
404+
self.key = key
405+
406+
def transform(self, input_data: dict, *, ctx: TransformContext = None) -> dict:
407+
return self._transform_dict(input_data, ctx=ctx)
408+
409+
def _transform(self, input_data: Any, ctx: TransformContext = None) -> Any:
410+
if isinstance(input_data, dict):
411+
return self._transform_dict(input_data, ctx=ctx)
412+
elif isinstance(input_data, list):
413+
return self._transform_list(input_data, ctx=ctx)
414+
return input_data
415+
416+
def _transform_dict(self, input_data: dict, ctx: TransformContext = None) -> dict:
417+
for k, v in input_data.items():
418+
if k == self.key and isinstance(v, str) and v.strip().startswith(("{", "[")):
419+
try:
420+
SNAPSHOT_LOGGER.debug(f"Replacing string value of {k} with parsed JSON")
421+
json_value = json.loads(v)
422+
input_data[k] = self._transform_nested(json_value)
423+
except JSONDecodeError:
424+
SNAPSHOT_LOGGER.exception(
425+
f'Value mapped to "{k}" key is not a valid JSON string and won\'t be transformed. Value: {v}'
426+
)
427+
else:
428+
input_data[k] = self._transform(v, ctx=ctx)
429+
return input_data
430+
431+
def _transform_list(self, input_data: list, ctx: TransformContext = None) -> list:
432+
return [self._transform(item, ctx=ctx) for item in input_data]
433+
434+
def _transform_nested(self, input_data: Any) -> Any:
435+
"""
436+
Separate method from the main `_transform_dict` one because
437+
it checks every string while the main one attempts to load at specified key only.
438+
This one is implicit, best-effort attempt,
439+
while the main one is explicit about at which key transform should happen
440+
"""
441+
if isinstance(input_data, list):
442+
input_data = [self._transform_nested(item) for item in input_data]
443+
if isinstance(input_data, dict):
444+
for k, v in input_data.items():
445+
input_data[k] = self._transform_nested(v)
446+
if isinstance(input_data, str) and input_data.strip().startswith(("{", "[")):
447+
try:
448+
json_value = json.loads(input_data)
449+
input_data = self._transform_nested(json_value)
450+
except JSONDecodeError:
451+
SNAPSHOT_LOGGER.debug(
452+
f"The value is not a valid JSON string and won't be transformed. The value: {input_data}"
453+
)
454+
return input_data

localstack_snapshot/snapshots/transformer_utility.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,11 @@
33

44
from localstack_snapshot.snapshots.transformer import (
55
JsonpathTransformer,
6+
JsonStringTransformer,
67
KeyValueBasedTransformer,
78
KeyValueBasedTransformerFunctionReplacement,
89
RegexTransformer,
10+
SortingTransformer,
911
TextTransformer,
1012
)
1113

@@ -109,3 +111,27 @@ def text(text: str, replacement: str):
109111
:return: TextTransformer
110112
"""
111113
return TextTransformer(text, replacement)
114+
115+
@staticmethod
116+
def json_string(key: str) -> JsonStringTransformer:
117+
"""Creates a new JsonStringTransformer. If there is a valid JSON text string at specified key
118+
it will be loaded as a regular object or array.
119+
120+
:param key: key at which JSON string is expected
121+
122+
:return: JsonStringTransformer
123+
"""
124+
return JsonStringTransformer(key)
125+
126+
@staticmethod
127+
def sorting(key: str, sorting_fn: Optional[Callable[[...], Any]]) -> SortingTransformer:
128+
"""Creates a new SortingTransformer.
129+
130+
Sorts a list at `key` with the given `sorting_fn` (argument for `sorted(list, key=sorting_fn)`)
131+
132+
:param key: key at which the list to sort is expected
133+
:param sorting_fn: sorting function
134+
135+
:return: SortingTransformer
136+
"""
137+
return SortingTransformer(key, sorting_fn)

tests/test_transformer.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import pytest
55

66
from localstack_snapshot.snapshots.transformer import (
7+
JsonStringTransformer,
78
SortingTransformer,
89
TimestampTransformer,
910
TransformContext,
@@ -311,6 +312,57 @@ def test_text(self, value):
311312
output = sr(output)
312313
assert json.loads(output) == expected
313314

315+
@pytest.mark.parametrize(
316+
"input_value,transformed_value",
317+
[
318+
pytest.param('{"a": "b"}', {"a": "b"}, id="simple_json_object"),
319+
pytest.param('{\n "a": "b"\n}', {"a": "b"}, id="formatted_json_object"),
320+
pytest.param('\n {"a": "b"}', {"a": "b"}, id="json_with_whitespaces"),
321+
pytest.param('{"a": 42}malformed', '{"a": 42}malformed', id="malformed_json"),
322+
pytest.param('["a", "b"]', ["a", "b"], id="simple_json_list"),
323+
pytest.param('{"a": "{\\"b\\":42}"}', {"a": {"b": 42}}, id="nested_json_object"),
324+
pytest.param(
325+
'{"a": "\\n {\\n \\"b\\":42}"}',
326+
{"a": {"b": 42}},
327+
id="nested_formatted_json_object_with_whitespaces",
328+
),
329+
pytest.param(
330+
'{"a": "[{\\"b\\":\\"c\\"}]"}', {"a": [{"b": "c"}]}, id="nested_json_list"
331+
),
332+
pytest.param(
333+
'{"a": "{\\"b\\":42malformed}"}',
334+
{"a": '{"b":42malformed}'},
335+
id="malformed_nested_json",
336+
),
337+
pytest.param("[]", [], id="empty_list"),
338+
pytest.param("{}", {}, id="empty_object"),
339+
pytest.param("", "", id="empty_string"),
340+
],
341+
)
342+
def test_json_string(self, input_value, transformed_value):
343+
key = "key"
344+
input_data = {key: input_value}
345+
expected = {key: transformed_value}
346+
347+
transformer = JsonStringTransformer(key)
348+
349+
ctx = TransformContext()
350+
output = transformer.transform(input_data, ctx=ctx)
351+
352+
assert output == expected
353+
354+
def test_json_string_in_a_nested_key(self):
355+
key = "nested-key-in-an-object-hidden-inside-a-list"
356+
input_data = {"top-level-key": [{key: '{"a": "b"}'}]}
357+
expected = {"top-level-key": [{key: {"a": "b"}}]}
358+
359+
transformer = JsonStringTransformer(key)
360+
361+
ctx = TransformContext()
362+
output = transformer.transform(input_data, ctx=ctx)
363+
364+
assert output == expected
365+
314366

315367
class TestTimestampTransformer:
316368
def test_generic_timestamp_transformer(self):

0 commit comments

Comments
 (0)