Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions src/pyodide/helpers.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,17 @@ _REPLACEMENTS = [
"var tableBase=metadata.tableSize?wasmTable.length:0;" +
"Module.snapshotDebug && console.log('loadWebAssemblyModule', libName, memoryBase, tableBase);",
],
[
"function loadLibData(){",
"""
function loadLibData(){
var f = findLibraryFS(libName, flags.rpath);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

findLibraryFS probably doesn't exist in old Emscripten versions? I guess I added that in 4.X

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indeed. My thought was to get it to work for 0.28.2 first and then work on 0.26 once that's working.

var libData = Module.patched_loadLibData(Module, f);
return flags.loadAsync ? Promise.resolve(libData) : libData;
}
function loadLibData1(){
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well I'm trying to replace the definition of loadLibData. I'm replacing "function loadLibData(){" so after that there's a function body. I added my own function body but I need to define a new pointless function in order to discard the old function body.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see. How about using a more meaningful name such as loadLibDataPrivate for just in case?

""",
],
]

def _python_bundle(version, *, pyodide_asm_wasm = None, pyodide_asm_js = None, python_stdlib_zip = None, emscripten_setup_override = None):
Expand Down
9 changes: 3 additions & 6 deletions src/pyodide/internal/pool/builtin_wrappers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -226,12 +226,9 @@ function prepareStackTrace(
return [false, funcName];
}
return [
[
'loadModule',
'convertJsFunctionToWasm',
'generate',
'getPyEMCountArgsPtr',
].includes(funcName),
['convertJsFunctionToWasm', 'generate', 'getPyEMCountArgsPtr'].includes(
funcName
),
funcName,
];
} catch (e) {
Expand Down
20 changes: 20 additions & 0 deletions src/pyodide/internal/python.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ import {
import { loadPackages } from 'pyodide-internal:loadPackage';
import { default as MetadataReader } from 'pyodide-internal:runtime-generated/metadata';
import { TRANSITIVE_REQUIREMENTS } from 'pyodide-internal:metadata';
import { getTrustedReadFunc } from 'pyodide-internal:readOnlyFS';

/**
* After running `instantiateEmscriptenModule` but before calling into any C
Expand Down Expand Up @@ -206,6 +207,24 @@ export function clearSignals(Module: Module): void {
}
}

function patched_loadLibData(Module: Module, path: string): WebAssembly.Module {
const { node } = Module.FS.lookupPath(path);
// Get the trusted read function from our private Map, not from the node
// or filesystem object (which could have been tampered with by user code)
Comment on lines +212 to +213
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this really something we should worry about? As long as users can access the Emscripten module object (I guess they can?), they can do anything.

I wonder if this additional barrier is really helpful.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Users can access the Module object. However, they cannot access objects that are captured by closures. So patched_loadLibData can call UnsafeEval.newWasmModule(), but it is only willing to call it on read only data. The user could do all sorts of mischief with the file system, or even replace Module.patched_loadLibData with something else. But they can't import UnsafeEval into their own code, so they can only call it via the functions that call it that are attached to Module. As long as none of these functions are willing to compile arbitrary modules, we're okay.

const trustedRead = getTrustedReadFunc(node);
if (!trustedRead) {
throw new Error(
'Can only load shared libraries from read only file systems.'
);
}
const stat = node.node_ops.getattr(node);
const buffer = new Uint8Array(stat.size);
// Create a minimal stream object and read using trusted read function
const stream = { node, position: 0 };
trustedRead(stream, buffer, 0, stat.size, 0);
return UnsafeEval.newWasmModule(buffer);
}

export function loadPyodide(
isWorkerd: boolean,
lockfile: PackageLock,
Expand All @@ -216,6 +235,7 @@ export function loadPyodide(
const Module = enterJaegerSpan('instantiate_emscripten', () =>
SetupEmscripten.getModule()
);
Module.patched_loadLibData = patched_loadLibData;
Module.API.config.jsglobals = globalThis;
if (isWorkerd) {
Module.API.config.indexURL = indexURL;
Expand Down
18 changes: 18 additions & 0 deletions src/pyodide/internal/readOnlyFS.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,18 @@
type ReadFn<Info> = FSStreamOps<Info>['read'];

// When we load shared libraries we need to ensure they come from a read only file system.

// Map to store the original trusted read function for each read-only filesystem. We store the
// function itself to prevent attacks where user code modifies stream_ops.read after filesystem
// creation and tricks us into loading a dynamically generated so file.
const TRUSTED_READ_FUNCS: Map<object, ReadFn<any>> = new Map();

export function getTrustedReadFunc<Info>(
node: FSNode<Info>
): ReadFn<Info> | undefined {
return TRUSTED_READ_FUNCS.get(node.mount.type);
}

export function createReadonlyFS<Info>(
FSOps: FSOps<Info>,
Module: Module
Expand Down Expand Up @@ -77,5 +92,8 @@ export function createReadonlyFS<Info>(
},
},
};
// Register this filesystem as read-only and store its trusted read function so we can load so
// files from it.
TRUSTED_READ_FUNCS.set(ReadOnlyFS, ReadOnlyFS.stream_ops.read);
return ReadOnlyFS;
}
1 change: 1 addition & 0 deletions src/pyodide/types/emscripten.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -135,4 +135,5 @@ interface Module {
Py_EmscriptenSignalBuffer: Uint8Array;
_Py_EMSCRIPTEN_SIGNAL_HANDLING: number;
___memory_base: WebAssembly.Global<'i32'>;
patched_loadLibData: (Module: Module, path: string) => WebAssembly.Module;
}
21 changes: 20 additions & 1 deletion src/pyodide/types/filesystem.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,31 @@ interface TarFSInfo {
declare type MetadataDirInfo = Map<string, MetadataFSInfo>;
declare type MetadataFSInfo = MetadataDirInfo | number; // file infos are numbers and dir infos are maps

interface FSLookupResult<Info> {
node: FSNode<Info>;
}

interface FS {
mkdir: (dirname: string) => void;
mkdirTree: (dirname: string) => void;
writeFile: (fname: string, contents: Uint8Array, options: object) => void;
readFile: (fname: string) => Uint8Array;
readFile: (fname: string, options?: { encoding?: string }) => Uint8Array;
mount(fs: object, options: { info?: any }, path: string): void;
createNode<Info>(
parent: FSNode<Info> | null,
name: string,
mode: number
): FSNode<Info>;
lookupPath<Info>(path: string): FSLookupResult<Info>;
open<Info>(nodeOrPath: FSNode<Info> | string, flags?: number): FSStream<Info>;
read<Info>(
stream: FSStream<Info>,
buffer: Uint8Array,
offset: number,
length: number,
position: number
): number;
close<Info>(stream: FSStream<Info>): void;
isFile: (mode: number) => boolean;
readdir: (path: string) => string[];
genericErrors: { 44: Error };
Expand Down Expand Up @@ -86,13 +100,18 @@ interface FSStreamOps<Info> {
) => number;
}

interface FSMount {
type: EmscriptenFS<any>;
}

interface FSNode<Info> {
id: number;
usedBytes: number;
mode: number;
modtime: number;
node_ops: FSNodeOps<Info>;
stream_ops: FSStreamOps<Info>;
mount: FSMount;
info: Info;
contentsOffset?: number | undefined;
tree?: MetadataDirInfo;
Expand Down
1 change: 1 addition & 0 deletions src/workerd/server/tests/python/pytest/pytest.wd-test
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ const unitTests :Workerd.Config = (
(name = "tests/test_env.py", pythonModule = embed "pytest/tests/test_env.py"),
(name = "tests/test_fs.py", pythonModule = embed "pytest/tests/test_fs.py"),
(name = "tests/test_import_from_javascript.py", pythonModule = embed "pytest/tests/test_import_from_javascript.py"),
(name = "tests/test_dynlib_loading.py", pythonModule = embed "pytest/tests/test_dynlib_loading.py"),
%PYTHON_VENDORED_MODULES%
],
compatibilityFlags = [
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from pathlib import Path

import pytest


def use(x):
pass


def test_dynlib_loading(tmp_path, monkeypatch):
# fmt: off
Path(tmp_path / "a.so").write_bytes(
bytes(
[
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you please add a comment about the content of this file?

0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x00, 0x0f, 0x08, 0x64,
0x79, 0x6c, 0x69, 0x6e, 0x6b, 0x2e, 0x30, 0x01, 0x04, 0x00, 0x00, 0x00,
0x00, 0x01, 0x04, 0x01, 0x60, 0x00, 0x00, 0x02, 0x38, 0x03, 0x03, 0x65,
0x6e, 0x76, 0x06, 0x6d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x02, 0x00, 0x00,
0x03, 0x65, 0x6e, 0x76, 0x0d, 0x5f, 0x5f, 0x6d, 0x65, 0x6d, 0x6f, 0x72,
0x79, 0x5f, 0x62, 0x61, 0x73, 0x65, 0x03, 0x7f, 0x00, 0x03, 0x65, 0x6e,
0x76, 0x0c, 0x5f, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x5f, 0x62, 0x61,
0x73, 0x65, 0x03, 0x7f, 0x00, 0x03, 0x02, 0x01, 0x00, 0x07, 0x15, 0x01,
0x11, 0x5f, 0x5f, 0x77, 0x61, 0x73, 0x6d, 0x5f, 0x63, 0x61, 0x6c, 0x6c,
0x5f, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x00, 0x00, 0x0a, 0x04, 0x01, 0x02,
0x00, 0x0b
]
)
)
# fmt: on
monkeypatch.syspath_prepend(tmp_path)
with pytest.raises(
ImportError, match="Can only load shared libraries from read only file systems"
):
import a

use(a)
Loading