Skip to content

Commit 383c8ef

Browse files
kpamnanyNHDalyIanButterworth
authored
Redact object data in heap snapshots, with option to opt-out (#55326)
The contents of strings can contain user data which may be proprietary and emitting them in the heap snapshot makes the heap snapshot a potential vulnerability rather than a useful debugging artifact. There are likely other tweaks necessary to make heap snapshots "safe", but this is one less. --------- Co-authored-by: Nathan Daly <[email protected]> Co-authored-by: Ian Butterworth <[email protected]>
1 parent 0c8641a commit 383c8ef

File tree

5 files changed

+44
-18
lines changed

5 files changed

+44
-18
lines changed

NEWS.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,10 @@ Standard library changes
135135

136136
#### Profile
137137

138+
* `Profile.take_heap_snapshot` takes a new keyword argument, `redact_data::Bool`,
139+
that is `true` by default. When set, the contents of Julia objects are not emitted
140+
in the heap snapshot. This currently only applies to strings. ([#55326])
141+
138142
#### Random
139143

140144
#### REPL

src/gc-heap-snapshot.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,7 @@ struct HeapSnapshot {
182182
// global heap snapshot, mutated by garbage collector
183183
// when snapshotting is on.
184184
int gc_heap_snapshot_enabled = 0;
185+
int gc_heap_snapshot_redact_data = 0;
185186
HeapSnapshot *g_snapshot = nullptr;
186187
// mutex for gc-heap-snapshot.
187188
jl_mutex_t heapsnapshot_lock;
@@ -195,7 +196,7 @@ void _add_synthetic_root_entries(HeapSnapshot *snapshot) JL_NOTSAFEPOINT;
195196

196197

197198
JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *nodes, ios_t *edges,
198-
ios_t *strings, ios_t *json, char all_one)
199+
ios_t *strings, ios_t *json, char all_one, char redact_data)
199200
{
200201
HeapSnapshot snapshot;
201202
snapshot.nodes = nodes;
@@ -207,6 +208,7 @@ JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *nodes, ios_t *edges,
207208

208209
// Enable snapshotting
209210
g_snapshot = &snapshot;
211+
gc_heap_snapshot_redact_data = redact_data;
210212
gc_heap_snapshot_enabled = true;
211213

212214
_add_synthetic_root_entries(&snapshot);
@@ -216,6 +218,7 @@ JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *nodes, ios_t *edges,
216218

217219
// Disable snapshotting
218220
gc_heap_snapshot_enabled = false;
221+
gc_heap_snapshot_redact_data = 0;
219222
g_snapshot = nullptr;
220223

221224
jl_mutex_unlock(&heapsnapshot_lock);
@@ -328,7 +331,7 @@ size_t record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT
328331

329332
if (jl_is_string(a)) {
330333
node_type = "String";
331-
name = jl_string_data(a);
334+
name = gc_heap_snapshot_redact_data ? "<redacted>" : jl_string_data(a);
332335
self_size = jl_string_len(a);
333336
}
334337
else if (jl_is_symbol(a)) {

src/gc-heap-snapshot.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ static inline void gc_heap_snapshot_record_finlist(jl_value_t *finlist, size_t i
122122
// Functions to call from Julia to take heap snapshot
123123
// ---------------------------------------------------------------------
124124
JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *nodes, ios_t *edges,
125-
ios_t *strings, ios_t *json, char all_one);
125+
ios_t *strings, ios_t *json, char all_one, char redact_data);
126126

127127

128128
#ifdef __cplusplus

stdlib/Profile/src/Profile.jl

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1250,8 +1250,10 @@ end
12501250

12511251

12521252
"""
1253-
Profile.take_heap_snapshot(filepath::String, all_one::Bool=false, streaming=false)
1254-
Profile.take_heap_snapshot(all_one::Bool=false; dir::String, streaming=false)
1253+
Profile.take_heap_snapshot(filepath::String, all_one::Bool=false;
1254+
redact_data::Bool=true, streaming::Bool=false)
1255+
Profile.take_heap_snapshot(all_one::Bool=false; redact_data:Bool=true,
1256+
dir::String=nothing, streaming::Bool=false)
12551257
12561258
Write a snapshot of the heap, in the JSON format expected by the Chrome
12571259
Devtools Heap Snapshot viewer (.heapsnapshot extension) to a file
@@ -1262,6 +1264,8 @@ full file path, or IO stream.
12621264
If `all_one` is true, then report the size of every object as one so they can be easily
12631265
counted. Otherwise, report the actual size.
12641266
1267+
If `redact_data` is true (default), then do not emit the contents of any object.
1268+
12651269
If `streaming` is true, we will stream the snapshot data out into four files, using filepath
12661270
as the prefix, to avoid having to hold the entire snapshot in memory. This option should be
12671271
used for any setting where your memory is constrained. These files can then be reassembled
@@ -1277,28 +1281,28 @@ backwards-compatibility) and your process is killed, note that this will always
12771281
parts in the same directory as your provided filepath, so you can still reconstruct the
12781282
snapshot after the fact, via `assemble_snapshot()`.
12791283
"""
1280-
function take_heap_snapshot(filepath::AbstractString, all_one::Bool=false; streaming::Bool=false)
1284+
function take_heap_snapshot(filepath::AbstractString, all_one::Bool=false; redact_data::Bool=true, streaming::Bool=false)
12811285
if streaming
1282-
_stream_heap_snapshot(filepath, all_one)
1286+
_stream_heap_snapshot(filepath, all_one, redact_data)
12831287
else
12841288
# Support the legacy, non-streaming mode, by first streaming the parts, then
12851289
# reassembling it after we're done.
12861290
prefix = filepath
1287-
_stream_heap_snapshot(prefix, all_one)
1291+
_stream_heap_snapshot(prefix, all_one, redact_data)
12881292
Profile.HeapSnapshot.assemble_snapshot(prefix, filepath)
12891293
Profile.HeapSnapshot.cleanup_streamed_files(prefix)
12901294
end
12911295
return filepath
12921296
end
1293-
function take_heap_snapshot(io::IO, all_one::Bool=false)
1297+
function take_heap_snapshot(io::IO, all_one::Bool=false; redact_data::Bool=true)
12941298
# Support the legacy, non-streaming mode, by first streaming the parts to a tempdir,
12951299
# then reassembling it after we're done.
12961300
dir = tempdir()
12971301
prefix = joinpath(dir, "snapshot")
1298-
_stream_heap_snapshot(prefix, all_one)
1302+
_stream_heap_snapshot(prefix, all_one, redact_data)
12991303
Profile.HeapSnapshot.assemble_snapshot(prefix, io)
13001304
end
1301-
function _stream_heap_snapshot(prefix::AbstractString, all_one::Bool)
1305+
function _stream_heap_snapshot(prefix::AbstractString, all_one::Bool, redact_data::Bool)
13021306
# Nodes and edges are binary files
13031307
open("$prefix.nodes", "w") do nodes
13041308
open("$prefix.edges", "w") do edges
@@ -1311,9 +1315,9 @@ function _stream_heap_snapshot(prefix::AbstractString, all_one::Bool)
13111315
Base.@_lock_ios(json,
13121316
ccall(:jl_gc_take_heap_snapshot,
13131317
Cvoid,
1314-
(Ptr{Cvoid},Ptr{Cvoid},Ptr{Cvoid},Ptr{Cvoid}, Cchar),
1318+
(Ptr{Cvoid},Ptr{Cvoid},Ptr{Cvoid},Ptr{Cvoid}, Cchar, Cchar),
13151319
nodes.handle, edges.handle, strings.handle, json.handle,
1316-
Cchar(all_one))
1320+
Cchar(all_one), Cchar(redact_data))
13171321
)
13181322
)
13191323
)
@@ -1323,7 +1327,7 @@ function _stream_heap_snapshot(prefix::AbstractString, all_one::Bool)
13231327
end
13241328
end
13251329
end
1326-
function take_heap_snapshot(all_one::Bool=false; dir::Union{Nothing,S}=nothing) where {S <: AbstractString}
1330+
function take_heap_snapshot(all_one::Bool=false; dir::Union{Nothing,S}=nothing, kwargs...) where {S <: AbstractString}
13271331
fname = "$(getpid())_$(time_ns()).heapsnapshot"
13281332
if isnothing(dir)
13291333
wd = pwd()
@@ -1338,7 +1342,7 @@ function take_heap_snapshot(all_one::Bool=false; dir::Union{Nothing,S}=nothing)
13381342
else
13391343
fpath = joinpath(expanduser(dir), fname)
13401344
end
1341-
return take_heap_snapshot(fpath, all_one)
1345+
return take_heap_snapshot(fpath, all_one; kwargs...)
13421346
end
13431347

13441348
"""

stdlib/Profile/test/runtests.jl

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -279,16 +279,31 @@ end
279279

280280
@testset "HeapSnapshot" begin
281281
tmpdir = mktempdir()
282+
283+
# ensure that we can prevent redacting data
282284
fname = cd(tmpdir) do
283-
read(`$(Base.julia_cmd()) --startup-file=no -e "using Profile; print(Profile.take_heap_snapshot())"`, String)
285+
read(`$(Base.julia_cmd()) --startup-file=no -e "using Profile; const x = \"redact_this\"; print(Profile.take_heap_snapshot(; redact_data=false))"`, String)
284286
end
285287

286288
@test isfile(fname)
287289

288-
open(fname) do fs
289-
@test readline(fs) != ""
290+
sshot = read(fname, String)
291+
@test sshot != ""
292+
@test contains(sshot, "redact_this")
293+
294+
rm(fname)
295+
296+
# ensure that string data is redacted by default
297+
fname = cd(tmpdir) do
298+
read(`$(Base.julia_cmd()) --startup-file=no -e "using Profile; const x = \"redact_this\"; print(Profile.take_heap_snapshot())"`, String)
290299
end
291300

301+
@test isfile(fname)
302+
303+
sshot = read(fname, String)
304+
@test sshot != ""
305+
@test !contains(sshot, "redact_this")
306+
292307
rm(fname)
293308
rm(tmpdir, force = true, recursive = true)
294309
end

0 commit comments

Comments
 (0)