Skip to content

Commit 3e4024c

Browse files
committed
fix: Add valid UTF-8 and length filtering when recovering strings from code
1 parent 5a278f6 commit 3e4024c

File tree

1 file changed

+48
-16
lines changed

1 file changed

+48
-16
lines changed

binja_plugin/actions.py

Lines changed: 48 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -328,27 +328,59 @@ def run(self):
328328
if detailed_operand[0] == "src" and isinstance(
329329
detailed_operand[1], MediumLevelILConst
330330
):
331+
candidate_string_slice_data_addr = data_var.address
331332
candidate_string_slice_data = data_var.value
332-
candidate_string_slice_length = detailed_operand[
333+
candidate_string_slice_len = detailed_operand[
333334
1
334335
].value.value
335-
logger.log_info(
336-
f"Reference to data var at {data_var.address:#x} with value {candidate_string_slice_data} is followed by store of integer with value {candidate_string_slice_length}"
337-
)
338-
logger.log_info(
339-
f"Candidate string: {candidate_string_slice_data[:candidate_string_slice_length]}"
336+
logger.log_debug(
337+
f"Reference to candidate string in code at {code_ref.address:#x} with data at {candidate_string_slice_data_addr:#x} with value {candidate_string_slice_data} is followed by store of integer with value {candidate_string_slice_len}"
340338
)
341339

342-
self.bv.define_user_data_var(
343-
addr=data_var.address,
344-
var_type=Type.array(
345-
type=Type.char(),
346-
count=candidate_string_slice_length,
347-
),
348-
)
349-
logger.log_info(
350-
f"Defined string: {candidate_string_slice_data[:candidate_string_slice_length]}"
351-
)
340+
# Filter out any potential string slice which has length 0
341+
if candidate_string_slice_len == 0:
342+
continue
343+
# Filter out any potential string slice which is too long
344+
if (
345+
candidate_string_slice_len >= 0x1000
346+
): # TODO: maybe change this limit
347+
continue
348+
349+
# Attempt to read out the pointed to value as a string slice, with the length obtained above.
350+
try:
351+
candidate_string_slice = self.bv.read(
352+
addr=candidate_string_slice_data_addr,
353+
length=candidate_string_slice_len,
354+
)
355+
except Exception as err:
356+
logger.log_error(
357+
f"Failed to read from address {candidate_string_slice_data_addr} with length {candidate_string_slice_len}: {err}"
358+
)
359+
continue
360+
361+
# Sanity check whether the recovered string is valid UTF-8
362+
try:
363+
candidate_utf8_string = (
364+
candidate_string_slice.decode("utf-8")
365+
)
366+
367+
logger.log_info(
368+
f'Recovered string referenced in code at {code_ref.address:#x}, with data at addr {candidate_string_slice_data_addr:#x}, len {candidate_string_slice_len}: "{candidate_utf8_string}"'
369+
)
370+
371+
self.bv.define_user_data_var(
372+
addr=data_var.address,
373+
var_type=Type.array(
374+
type=Type.char(),
375+
count=candidate_string_slice_len,
376+
),
377+
)
378+
379+
except UnicodeDecodeError as err:
380+
logger.log_warn(
381+
f"Candidate string slice {candidate_string_slice} does not decode to a valid UTF-8 string; excluding from final results: {err}"
382+
)
383+
continue
352384

353385
self.bv.commit_undo_actions()
354386
self.bv.update_analysis()

0 commit comments

Comments
 (0)