diff --git a/.github/workflows/CargoPublish.yml b/.github/workflows/CargoPublish.yml index d8f165c38..b97800910 100644 --- a/.github/workflows/CargoPublish.yml +++ b/.github/workflows/CargoPublish.yml @@ -71,6 +71,18 @@ jobs: env: CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_PUBLISH_TOKEN }} + - name: Publish hyperlight-component-util + continue-on-error: ${{ inputs.dry_run }} + run: cargo publish --manifest-path ./src/hyperlight_component_util/Cargo.toml ${{ inputs.dry_run && '--dry-run' || '' }} + env: + CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_PUBLISH_TOKEN }} + + - name: Publish hyperlight-component-macro + continue-on-error: ${{ inputs.dry_run }} + run: cargo publish --manifest-path ./src/hyperlight_component_macro/Cargo.toml ${{ inputs.dry_run && '--dry-run' || '' }} + env: + CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_PUBLISH_TOKEN }} + # TODO: Do we want to publish hyperlight-guest-capi to crates.io given that it's not for Rust consumption? # - name: Publish hyperlight-guest-capi # # `--no-verify` is needed because build.rs writes to "include/hyperlight_guest.h", but since we exclude that directory in Cargo.toml, it should be fine. diff --git a/Cargo.lock b/Cargo.lock index 6b671b9ec..d3f4b30ce 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -995,6 +995,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" dependencies = [ "foldhash", + "serde", ] [[package]] @@ -1145,9 +1146,37 @@ dependencies = [ "flatbuffers", "hyperlight-testing", "log", + "spin 0.10.0", "tracing", ] +[[package]] +name = "hyperlight-component-macro" +version = "0.5.1" +dependencies = [ + "env_logger", + "hyperlight-component-util", + "itertools 0.14.0", + "prettyplease", + "proc-macro2", + "quote", + "syn", + "wasmparser", +] + +[[package]] +name = "hyperlight-component-util" +version = "0.5.1" +dependencies = [ + "itertools 0.14.0", + "log", + "prettyplease", + "proc-macro2", + "quote", + "syn", + "wasmparser", +] + [[package]] name = "hyperlight-fuzz" version = "0.0.0" @@ -1199,6 +1228,7 @@ dependencies = [ "gdbstub_arch", "goblin", "hyperlight-common", + "hyperlight-component-macro", "hyperlight-testing", "kvm-bindings", "kvm-ioctls", @@ -1445,6 +1475,7 @@ checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" dependencies = [ "equivalent", "hashbrown", + "serde", ] [[package]] @@ -3623,6 +3654,19 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "wasmparser" +version = "0.224.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04f17a5917c2ddd3819e84c661fae0d6ba29d7b9c1f0e96c708c65a9c4188e11" +dependencies = [ + "bitflags 2.9.1", + "hashbrown", + "indexmap", + "semver", + "serde", +] + [[package]] name = "web-sys" version = "0.3.77" diff --git a/Cargo.toml b/Cargo.toml index 828a07516..984aaca8f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,14 +11,17 @@ members = [ "src/hyperlight_host", "src/hyperlight_guest_capi", "src/hyperlight_testing", - "fuzz", + "fuzz", "src/hyperlight_guest_bin", + "src/hyperlight_component_util", + "src/hyperlight_component_macro", ] # Guests have custom linker flags, so we need to exclude them from the workspace exclude = [ "src/tests/rust_guests/callbackguest", "src/tests/rust_guests/dummyguest", "src/tests/rust_guests/simpleguest", + "src/tests/rust_guests/witguest", ] [workspace.package] @@ -36,6 +39,8 @@ hyperlight-host = { path = "src/hyperlight_host", version = "0.5.1", default-fea hyperlight-guest = { path = "src/hyperlight_guest", version = "0.5.1", default-features = false } hyperlight-guest-bin = { path = "src/hyperlight_guest_bin", version = "0.5.1", default-features = false } hyperlight-testing = { path = "src/hyperlight_testing", default-features = false } +hyperlight-component-util = { path = "src/hyperlight_component_util" } +hyperlight-component-macro = { path = "src/hyperlight_component_macro" } [workspace.lints.rust] unsafe_op_in_unsafe_fn = "deny" diff --git a/Justfile b/Justfile index 78cc7ab5f..b491d1db7 100644 --- a/Justfile +++ b/Justfile @@ -12,6 +12,7 @@ default-target := "debug" simpleguest_source := "src/tests/rust_guests/simpleguest/target/x86_64-unknown-none" dummyguest_source := "src/tests/rust_guests/dummyguest/target/x86_64-unknown-none" callbackguest_source := "src/tests/rust_guests/callbackguest/target/x86_64-unknown-none" +witguest_source := "src/tests/rust_guests/witguest/target/x86_64-unknown-none" rust_guests_bin_dir := "src/tests/rust_guests/bin" ################ @@ -28,15 +29,21 @@ build target=default-target: # build testing guest binaries guests: build-and-move-rust-guests build-and-move-c-guests -build-rust-guests target=default-target: +witguest-wit: + cargo install --locked wasm-tools + cd src/tests/rust_guests/witguest && wasm-tools component wit guest.wit -w -o interface.wasm + +build-rust-guests target=default-target: (witguest-wit) cd src/tests/rust_guests/callbackguest && cargo build --profile={{ if target == "debug" { "dev" } else { target } }} cd src/tests/rust_guests/simpleguest && cargo build --profile={{ if target == "debug" { "dev" } else { target } }} cd src/tests/rust_guests/dummyguest && cargo build --profile={{ if target == "debug" { "dev" } else { target } }} + cd src/tests/rust_guests/witguest && cargo build --profile={{ if target == "debug" { "dev" } else { target } }} @move-rust-guests target=default-target: cp {{ callbackguest_source }}/{{ target }}/callbackguest* {{ rust_guests_bin_dir }}/{{ target }}/ cp {{ simpleguest_source }}/{{ target }}/simpleguest* {{ rust_guests_bin_dir }}/{{ target }}/ cp {{ dummyguest_source }}/{{ target }}/dummyguest* {{ rust_guests_bin_dir }}/{{ target }}/ + cp {{ witguest_source }}/{{ target }}/witguest* {{ rust_guests_bin_dir }}/{{ target }}/ build-and-move-rust-guests: (build-rust-guests "debug") (move-rust-guests "debug") (build-rust-guests "release") (move-rust-guests "release") build-and-move-c-guests: (build-c-guests "debug") (move-c-guests "debug") (build-c-guests "release") (move-c-guests "release") @@ -48,6 +55,8 @@ clean-rust: cd src/tests/rust_guests/simpleguest && cargo clean cd src/tests/rust_guests/dummyguest && cargo clean cd src/tests/rust_guests/callbackguest && cargo clean + cd src/tests/rust_guests/witguest && cargo clean + cd src/tests/rust_guests/witguest && rm -f interface.wasm git clean -fdx src/tests/c_guests/bin src/tests/rust_guests/bin ################ @@ -127,6 +136,7 @@ fmt-check: cargo +nightly fmt --manifest-path src/tests/rust_guests/callbackguest/Cargo.toml -- --check cargo +nightly fmt --manifest-path src/tests/rust_guests/simpleguest/Cargo.toml -- --check cargo +nightly fmt --manifest-path src/tests/rust_guests/dummyguest/Cargo.toml -- --check + cargo +nightly fmt --manifest-path src/tests/rust_guests/witguest/Cargo.toml -- --check cargo +nightly fmt --manifest-path src/hyperlight_guest_capi/Cargo.toml -- --check check-license-headers: @@ -137,14 +147,16 @@ fmt-apply: cargo +nightly fmt --manifest-path src/tests/rust_guests/callbackguest/Cargo.toml cargo +nightly fmt --manifest-path src/tests/rust_guests/simpleguest/Cargo.toml cargo +nightly fmt --manifest-path src/tests/rust_guests/dummyguest/Cargo.toml + cargo +nightly fmt --manifest-path src/tests/rust_guests/witguest/Cargo.toml cargo +nightly fmt --manifest-path src/hyperlight_guest_capi/Cargo.toml -clippy target=default-target: +clippy target=default-target: (witguest-wit) cargo clippy --all-targets --all-features --profile={{ if target == "debug" { "dev" } else { target } }} -- -D warnings -clippy-guests target=default-target: +clippy-guests target=default-target: (witguest-wit) cd src/tests/rust_guests/simpleguest && cargo clippy --profile={{ if target == "debug" { "dev" } else { target } }} -- -D warnings cd src/tests/rust_guests/callbackguest && cargo clippy --profile={{ if target == "debug" { "dev" } else { target } }} -- -D warnings + cd src/tests/rust_guests/witguest && cargo clippy --profile={{ if target == "debug" { "dev" } else { target } }} -- -D warnings clippy-apply-fix-unix: cargo clippy --fix --all diff --git a/src/hyperlight_common/Cargo.toml b/src/hyperlight_common/Cargo.toml index 8cdb66b3e..1afea5022 100644 --- a/src/hyperlight_common/Cargo.toml +++ b/src/hyperlight_common/Cargo.toml @@ -20,10 +20,12 @@ anyhow = { version = "1.0.98", default-features = false } log = "0.4.27" tracing = { version = "0.1.41", optional = true } arbitrary = {version = "1.4.1", optional = true, features = ["derive"]} +spin = "0.10.0" [features] default = ["tracing"] fuzzing = ["dep:arbitrary"] +std = [] [dev-dependencies] hyperlight-testing = { workspace = true } diff --git a/src/hyperlight_common/src/lib.rs b/src/hyperlight_common/src/lib.rs index ef907dfaa..e22cf6417 100644 --- a/src/hyperlight_common/src/lib.rs +++ b/src/hyperlight_common/src/lib.rs @@ -39,3 +39,6 @@ pub mod mem; /// cbindgen:ignore pub mod outb; + +/// cbindgen:ignore +pub mod resource; diff --git a/src/hyperlight_common/src/resource.rs b/src/hyperlight_common/src/resource.rs new file mode 100644 index 000000000..7ea758cbc --- /dev/null +++ b/src/hyperlight_common/src/resource.rs @@ -0,0 +1,172 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + */ + +//! Shared operations around resources + +// "Needless" lifetimes are useful for clarity +#![allow(clippy::needless_lifetimes)] + +use alloc::sync::Arc; + +#[cfg(feature = "std")] +extern crate std; +use core::marker::{PhantomData, Send}; +use core::ops::Deref; +#[cfg(feature = "std")] +use std::sync::{RwLock, RwLockReadGuard}; + +#[cfg(not(feature = "std"))] +use spin::{RwLock, RwLockReadGuard}; + +/// The semantics of component model resources are, pleasingly, +/// roughly compatible with those of Rust references, so we would like +/// to use the more-or-less directly in interfaces generated by +/// hyperlight_component_macro. Less pleasingly, it's not terribly +/// easy to show the semantic agreement statically. +/// +/// In particular, if the host calls into the guest and gives it a +/// borrow of a resource, reentrant host function calls that use that +/// borrow need to be able to resolve the original reference and use +/// it in an appropriately scoped manner, but it is not simple to do +/// this, because the core Hyperlight machinery doesn't offer an easy +/// way to augment the host's context for the span of time of a guest +/// function call. This may be worth revisiting at some time, but in +/// the meantime, it's easier to just do it dynamically. +/// +/// # Safety +/// Informally: this only creates SharedRead references, so having a +/// bunch of them going at once is fine. Safe Rust in the host can't +/// use any earlier borrows (potentially invalidating these) until +/// borrow passed into [`ResourceEntry::lend`] has expired. Because +/// that borrow outlives the [`LentResourceGuard`], it will not expire +/// until that destructor is called. That destructor ensures that (a) +/// there are no outstanding [`BorrowedResourceGuard`]s alive (since +/// they would be holding the read side of the [`RwLock`] if they +/// were), and that (b) the shared flag has been set to false, so +/// [`ResourceEntry::borrow`] will never create another borrow +pub enum ResourceEntry { + Empty, + Owned(T), + Borrowed(Arc>, *const T), +} +unsafe impl Send for ResourceEntry {} + +pub struct LentResourceGuard<'a> { + flag: Arc>, + already_revoked: bool, + _phantom: core::marker::PhantomData<&'a mut ()>, +} +impl<'a> LentResourceGuard<'a> { + pub fn revoke_nonblocking(&mut self) -> bool { + #[cfg(feature = "std")] + let Ok(mut flag) = self.flag.try_write() else { + return false; + }; + #[cfg(not(feature = "std"))] + let Some(mut flag) = self.flag.try_write() else { + return false; + }; + *flag = false; + self.already_revoked = true; + true + } +} +impl<'a> Drop for LentResourceGuard<'a> { + fn drop(&mut self) { + if !self.already_revoked { + #[allow(unused_mut)] // it isn't actually unused + let mut guard = self.flag.write(); + #[cfg(feature = "std")] + // If a mutex that is just protecting us from our own + // mistakes is poisoned, something is so seriously + // wrong that dying is a sensible response. + #[allow(clippy::unwrap_used)] + { + *guard.unwrap() = false; + } + #[cfg(not(feature = "std"))] + { + *guard = false; + } + } + } +} +pub struct BorrowedResourceGuard<'a, T> { + _flag: Option>, + reference: &'a T, +} +impl<'a, T> Deref for BorrowedResourceGuard<'a, T> { + type Target = T; + fn deref(&self) -> &T { + self.reference + } +} +impl ResourceEntry { + pub fn give(x: T) -> ResourceEntry { + ResourceEntry::Owned(x) + } + pub fn lend<'a>(x: &'a T) -> (LentResourceGuard<'a>, ResourceEntry) { + let flag = Arc::new(RwLock::new(true)); + ( + LentResourceGuard { + flag: flag.clone(), + already_revoked: false, + _phantom: PhantomData {}, + }, + ResourceEntry::Borrowed(flag, x as *const T), + ) + } + pub fn borrow<'a>(&'a self) -> Option> { + match self { + ResourceEntry::Empty => None, + ResourceEntry::Owned(t) => Some(BorrowedResourceGuard { + _flag: None, + reference: t, + }), + ResourceEntry::Borrowed(flag, t) => { + let guard = flag.read(); + // If a mutex that is just protecting us from our own + // mistakes is poisoned, something is so seriously + // wrong that dying is a sensible response. + #[allow(clippy::unwrap_used)] + let flag = { + #[cfg(feature = "std")] + { + guard.unwrap() + } + #[cfg(not(feature = "std"))] + { + guard + } + }; + if *flag { + Some(BorrowedResourceGuard { + _flag: Some(flag), + reference: unsafe { &**t }, + }) + } else { + None + } + } + } + } + pub fn take(&mut self) -> Option { + match core::mem::replace(self, ResourceEntry::Empty) { + ResourceEntry::Owned(t) => Some(t), + _ => None, + } + } +} diff --git a/src/hyperlight_component_macro/Cargo.toml b/src/hyperlight_component_macro/Cargo.toml new file mode 100644 index 000000000..5de63628a --- /dev/null +++ b/src/hyperlight_component_macro/Cargo.toml @@ -0,0 +1,26 @@ +[package] +name = "hyperlight-component-macro" +version.workspace = true +edition.workspace = true +rust-version.workspace = true +license.workspace = true +homepage.workspace = true +repository.workspace = true +readme.workspace = true +description = """ +Procedural macros to generate Hyperlight host and guest bindings from component types +""" + +[lib] +name = "hyperlight_component_macro" +proc-macro = true + +[dependencies] +wasmparser = { version = "0.224.0" } +quote = { version = "1.0.38" } +proc-macro2 = { version = "1.0.93" } +syn = { version = "2.0.96" } +itertools = { version = "0.14.0" } +prettyplease = { version = "0.2.31" } +hyperlight-component-util = { workspace = true } +env_logger = { version = "0.11.8" } \ No newline at end of file diff --git a/src/hyperlight_component_macro/src/lib.rs b/src/hyperlight_component_macro/src/lib.rs new file mode 100644 index 000000000..9c74cf5b4 --- /dev/null +++ b/src/hyperlight_component_macro/src/lib.rs @@ -0,0 +1,109 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + */ + +//! # Component-model bindgen macros +//! +//! These macros make it easy to use Wasm Component Model types +//! (e.g. those described by WIT) to describe the interface between a +//! Hyperlight host and guest. +//! +//! For both host and guest bindings, bindings generation takes in a +//! binary-encoded wasm component, which should have roughly the +//! structure of a binary-encoded WIT (in particular, component +//! import/export kebab-names should have `wit:package/name` namespace +//! structure, and the same two-level convention for wrapping a +//! component type into an actual component should be adhered to). If +//! you are using WIT as the input, it is easy to build such a file +//! via `wasm-tools component wit -w -o file.wasm file.wit`. +//! +//! Both macros can take the path to such a file as a parameter, or, +//! if one is not provided, will fall back to using the path in the +//! environment variable `$WIT_WORLD`. A relative path provided either way +//! will be resolved relative to `$CARGO_MANIFEST_DIR`. +//! +//! ## Debugging +//! +//! The generated code can be examined by setting the environment +//! variable `$HYPERLIGHT_COMPONENT_MACRO_DEBUG=/path/to/file.rs`, +//! which will result in the generated code being written to that +//! file, which is then included back into the Rust source. +//! +//! The macros also can be configured to output a great deal of debug +//! information about the internal elaboration and codegen +//! phases. This is logged via the `log` and `env_logger` crates, so +//! setting `RUST_LOG=debug` before running the compiler should +//! suffice to produce this output. + +extern crate proc_macro; + +use hyperlight_component_util::*; + +/// Create host bindings for the wasm component type in the file +/// passed in (or `$WIT_WORLD`, if nothing is passed in). This will +/// produce all relevant types and trait implementations for the +/// component type, as well as functions allowing the component to be +/// instantiated inside a sandbox. +/// +/// This includes both a primitive `register_host_functions`, which can +/// be used to directly register the host functions on any sandbox +/// (and which can easily be used with Hyperlight-Wasm), as well as an +/// `instantiate()` method on the component trait that makes +/// instantiating the sandbox particularly ergonomic in core +/// Hyperlight. +#[proc_macro] +pub fn host_bindgen(input: proc_macro::TokenStream) -> proc_macro::TokenStream { + env_logger::init(); + let path: Option = syn::parse_macro_input!(input as Option); + let path = path + .map(|x| x.value().into()) + .unwrap_or_else(|| std::env::var_os("WIT_WORLD").unwrap()); + util::read_wit_type_from_file(path, |kebab_name, ct| { + let decls = emit::run_state(false, false, |s| { + rtypes::emit_toplevel(s, &kebab_name, ct); + host::emit_toplevel(s, &kebab_name, ct); + }); + util::emit_decls(decls).into() + }) +} + +/// Create the hyperlight_guest_init() function (which should be +/// called in hyperlight_main()) for the wasm component type in the +/// file passed in (or `$WIT_WORLD`, if nothing is passed in). This +/// function registers Hyperlight functions for component exports +/// (which are implemented by calling into the trait provided) and +/// implements the relevant traits for a trivial Host type (by calling +/// into the Hyperlight host). +#[proc_macro] +pub fn guest_bindgen(input: proc_macro::TokenStream) -> proc_macro::TokenStream { + env_logger::init(); + let path: Option = syn::parse_macro_input!(input as Option); + let path = path + .map(|x| x.value().into()) + .unwrap_or_else(|| std::env::var_os("WIT_WORLD").unwrap()); + util::read_wit_type_from_file(path, |kebab_name, ct| { + let decls = emit::run_state(true, false, |s| { + // Emit type/trait definitions for all instances in the world + rtypes::emit_toplevel(s, &kebab_name, ct); + // Emit the host/guest function registrations + guest::emit_toplevel(s, &kebab_name, ct); + }); + // Use util::emit_decls() to choose between emitting the token + // stream directly and emitting an include!() pointing at a + // temporary file, depending on whether the user has requested + // a debug temporary file be created. + util::emit_decls(decls).into() + }) +} diff --git a/src/hyperlight_component_util/Cargo.toml b/src/hyperlight_component_util/Cargo.toml new file mode 100644 index 000000000..bfef6be27 --- /dev/null +++ b/src/hyperlight_component_util/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "hyperlight-component-util" +version.workspace = true +edition.workspace = true +rust-version.workspace = true +license.workspace = true +homepage.workspace = true +repository.workspace = true +readme.workspace = true +description = """ +Shared implementation for the procedural macros that generate Hyperlight host and guest bindings from component types +""" + +[lib] +name = "hyperlight_component_util" + +[dependencies] +wasmparser = { version = "0.224.0" } +quote = { version = "1.0.38" } +proc-macro2 = { version = "1.0.93" } +syn = { version = "2.0.96" } +itertools = { version = "0.14.0" } +prettyplease = { version = "0.2.31" } +log = { version = "0.4" } \ No newline at end of file diff --git a/src/hyperlight_component_util/src/component.rs b/src/hyperlight_component_util/src/component.rs new file mode 100644 index 000000000..866419abc --- /dev/null +++ b/src/hyperlight_component_util/src/component.rs @@ -0,0 +1,159 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + */ + +//! Just enough component parsing support to get at the actual types + +use wasmparser::Payload::{ + ComponentAliasSection, ComponentExportSection, ComponentTypeSection, Version, +}; +use wasmparser::{ + ComponentAlias, ComponentExternalKind, ComponentOuterAliasKind, ComponentType, + ComponentTypeRef, Payload, +}; + +use crate::etypes::{Component, Ctx, Defined}; + +/// From [`wasmparser::ComponentExport`], elaborate a deftype_e as per +/// the specification. +fn raw_type_export_type<'p, 'a, 'c>( + ctx: &'c Ctx<'p, 'a>, + ce: &'c wasmparser::ComponentExport<'a>, +) -> &'c Defined<'a> { + match ce.ty { + Some(ComponentTypeRef::Component(n)) => match ctx.types.get(n as usize) { + Some(t) => t, + None => { + panic!("malformed component type export: ascription does not refer to a type"); + } + }, + Some(_) => { + panic!( + "malformed component type export: ascription does not refer to a component type" + ); + } + None => match ctx.types.get(ce.index as usize) { + Some(t) => t, + None => { + panic!("malformed component type export: does not refer to a type"); + } + }, + } +} + +/// Find the last exported type in a component, since in wasm-encoded +/// WIT this is typically the main world to use. This is a very +/// special case that just lets us pull a type out of a value-level +/// +/// Precondition: The given iterator is +/// - a component, whose +/// - encoding version is 0xd exactly, and who +/// - does not contain any value-level aliases, and whose +/// - final export is a component type +/// +/// Anything that is a "binary-encoded WIT" produced by a recent +/// toolchain should satisfy this. On violation, this function will +/// panic with an error message. +/// +/// The reason we look for the last export is that the WIT binary +/// encoding encodes any instance type imported/exported from the main +/// component (a/k/a WIT world) as a type export, followed by a final +/// type export for the type of the main component/world. +/// +/// TODO: Allow the user to specify a specific export to use (or a WIT +/// world name), since current WIT tooling can generate encoded +/// packages with multiple component types in them. +/// +/// TODO: Encode even more assumptions about WIT package structure +/// (which are already there in rtypes/host/guest) and allow looking +/// for a specific named world, instead of simply grabbing the last +/// export. +pub fn read_component_single_exported_type<'a>( + items: impl Iterator>>, +) -> Component<'a> { + let mut ctx = Ctx::new(None, false); + let mut last_idx = None; + for x in items { + match x { + Ok(Version { num, encoding, .. }) => { + if encoding != wasmparser::Encoding::Component { + panic!("wasm file is not a component") + } + if num != 0xd { + panic!("unknown component encoding version 0x{:x}\n", num); + } + } + Ok(ComponentTypeSection(ts)) => { + for t in ts { + match t { + Ok(ComponentType::Component(ct)) => { + let ct_ = ctx.elab_component(&ct); + ctx.types.push(Defined::Component(ct_.unwrap())); + } + _ => panic!("non-component type"), + } + } + } + Ok(ComponentExportSection(es)) => { + for e in es { + match e { + Err(_) => panic!("invalid export section"), + Ok(ce) => { + if ce.kind == ComponentExternalKind::Type { + last_idx = Some(ctx.types.len()); + ctx.types.push(raw_type_export_type(&ctx, &ce).clone()); + } + } + } + } + } + Ok(ComponentAliasSection(r#as)) => { + for a in r#as { + match a { + Ok(ComponentAlias::InstanceExport { + kind: ComponentExternalKind::Type, + .. + }) + | Ok(ComponentAlias::Outer { + kind: ComponentOuterAliasKind::Type, + .. + }) => { + panic!("Component outer type aliases are not supported") + } + // Anything else doesn't affect the index + // space that we are interested in, so we can + // safely ignore + _ => {} + } + } + } + + // No other component section should be terribly relevant + // for us. We would not generally expect to find them in + // a file that just represents a type like this, but it + // seems like there are/may be a whole bunch of debugging + // custom sections, etc that might show up, so for now + // let's just ignore anything. + _ => {} + } + } + match last_idx { + None => panic!("no exported type"), + Some(n) => match ctx.types.into_iter().nth(n) { + Some(Defined::Component(c)) => c, + _ => panic!("final export is not component"), + }, + } +} diff --git a/src/hyperlight_component_util/src/elaborate.rs b/src/hyperlight_component_util/src/elaborate.rs new file mode 100644 index 000000000..c38051725 --- /dev/null +++ b/src/hyperlight_component_util/src/elaborate.rs @@ -0,0 +1,712 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + */ + +//! Component type elaboration +//! +//! This is a pretty direct port of the relevant sections of the OCaml +//! reference interpreter, except that the approach to substitutions has +//! been changed significantly. (Although the core capture-avoiding +//! substitution routines are of course the same, the ways in which +//! substitutions are represented/constructed are quite different; see +//! substitute.rs for more details of the approach here). + +use wasmparser::{ + ComponentAlias, ComponentDefinedType, ComponentFuncResult, ComponentFuncType, + ComponentOuterAliasKind, ComponentType, ComponentTypeDeclaration, ComponentTypeRef, + ComponentValType, CompositeInnerType, CoreType, InstanceTypeDeclaration, ModuleTypeDeclaration, + OuterAliasKind, PrimitiveValType, TypeBounds, TypeRef, +}; + +use crate::etypes::{ + self, BoundedTyvar, Component, CoreDefined, CoreExportDecl, CoreExternDesc, CoreModule, + CoreOrComponentExternDesc, Ctx, Defined, ExternDecl, ExternDesc, FloatWidth, Func, Handleable, + Instance, IntWidth, Name, Param, QualifiedInstance, RecordField, Resource, ResourceId, + TypeBound, Tyvar, Value, VariantCase, +}; +use crate::substitute::{self, Substitution, Unvoidable}; +use crate::tv::ResolvedTyvar; +use crate::wf; + +mod basic_conversions { + //! Basic utility conversions between various spec and wasmparser + //! representations of extern kind/sorts + + use wasmparser::{ComponentExternalKind, ExternalKind}; + + use crate::etypes::{CoreExternDesc, ExternDesc}; + use crate::structure::{CoreSort, Sort}; + + pub(super) fn sort_matches_core_ed(sort: Sort, ed: &CoreExternDesc) { + match (sort, ed) { + (Sort::Core(CoreSort::Func), CoreExternDesc::Func(_)) => (), + (Sort::Core(CoreSort::Table), CoreExternDesc::Table(_)) => (), + (Sort::Core(CoreSort::Memory), CoreExternDesc::Memory(_)) => (), + (Sort::Core(CoreSort::Global), CoreExternDesc::Global(_)) => (), + _ => panic!("sort does not match core extern descriptor"), + } + } + + pub(super) fn external_kind(k: ExternalKind) -> Sort { + match k { + ExternalKind::Func => Sort::Core(CoreSort::Func), + ExternalKind::Table => Sort::Core(CoreSort::Table), + ExternalKind::Memory => Sort::Core(CoreSort::Memory), + ExternalKind::Global => Sort::Core(CoreSort::Global), + ExternalKind::Tag => panic!("core type tags are not supported"), + } + } + + pub(super) fn sort_matches_ed<'a>(sort: Sort, ed: &ExternDesc<'a>) { + match (sort, ed) { + (Sort::Core(CoreSort::Module), ExternDesc::CoreModule(_)) => (), + (Sort::Func, ExternDesc::Func(_)) => (), + (Sort::Type, ExternDesc::Type(_)) => (), + (Sort::Instance, ExternDesc::Instance(_)) => (), + (Sort::Component, ExternDesc::Component(_)) => (), + _ => panic!("sort does not match extern descriptor"), + } + } + + pub(super) fn component_external_kind(k: ComponentExternalKind) -> Sort { + match k { + ComponentExternalKind::Module => Sort::Core(CoreSort::Module), + ComponentExternalKind::Func => Sort::Func, + ComponentExternalKind::Value => Sort::Value, + ComponentExternalKind::Type => Sort::Type, + ComponentExternalKind::Instance => Sort::Instance, + ComponentExternalKind::Component => Sort::Component, + } + } +} +use basic_conversions::*; + +#[derive(Debug)] +#[allow(dead_code)] +/// Elaboration-specific errors +pub enum Error<'a> { + /// Innerizing an outer alias failed; this usually means that the + /// outer alias refers to a resource type or something like that. + InvalidOuterAlias(substitute::InnerizeError), + /// Innerizing an outer alias resulted in an ill-formed type; this + /// often also means that the outer alias refers to a resource + /// type or similar. + IllFormedOuterAlias(wf::Error<'a>), + /// The component type declarator should never have a resource + /// type in it, even though this is allowed by the grammar, since + /// there is no export (or instantiation) to generatively give it + /// identity. + ResourceInDeclarator, + /// A the typeidx inside an own/borrow handle should always point + /// to a resource type (either a bare resource, or, more usually, + /// an imported/exported type variable that is bounded by `(sub + /// resource)`. + HandleToNonResource, + /// Complex valtypes are allowed to use indirect type indices to + /// refer to another type, but the type index space is also used + /// for instance types, bare resource types, etc. A malformed + /// complex value type which refers to a non-value type will + /// result in this error. + ValTypeRefToNonVal(Defined<'a>), + /// The finalisation/closing of a component or instance type + /// failed. This usually means that an exported type is referring + /// to a non-exported type variable or something along those + /// lines, which makes it impossible for the exported type to be + /// lifted out of the context. + ClosingError(substitute::ClosingError), + /// A finished component or instance type was ill-formed + IllFormed(wf::Error<'a>), +} +impl<'a> From for Error<'a> { + fn from(e: substitute::ClosingError) -> Error<'a> { + Error::ClosingError(e) + } +} + +/// # Elaboration +/// +/// Most of this is a very direct translation of the specification +/// (section 3.4 Type Elaboration). +impl<'p, 'a> Ctx<'p, 'a> { + pub fn elab_component<'c>( + &'c mut self, + decls: &[ComponentTypeDeclaration<'a>], + ) -> Result, Error<'a>> { + let mut ctx = Ctx::new(Some(self), false); + let mut imports = Vec::new(); + let mut exports = Vec::new(); + for decl in decls { + let (import, export) = ctx.elab_component_decl(decl)?; + if let Some(import) = import { + imports.push(import); + } + if let Some(export) = export { + exports.push(export); + } + } + ctx.finish_component(&imports, &exports) + } + + fn elab_core_module_decl<'c>( + &'c mut self, + decl: &ModuleTypeDeclaration<'a>, + ) -> (Option>, Option>) { + match decl { + ModuleTypeDeclaration::Import(i) => (Some(*i), None), + ModuleTypeDeclaration::Type(rg) => { + let ct = self.elab_core_type_rec(rg); + self.core.types.push(ct); + (None, None) + } + ModuleTypeDeclaration::OuterAlias { + kind: OuterAliasKind::Type, + count, + index, + } => { + let ct = self.parents().nth(*count as usize).unwrap().core.types[*index as usize] + .clone(); + self.core.types.push(ct); + (None, None) + } + ModuleTypeDeclaration::Export { name, ty } => ( + None, + Some(CoreExportDecl { + name: Name { name }, + desc: match ty { + TypeRef::Func(n) => match &self.core.types[*n as usize] { + CoreDefined::Func(ft) => CoreExternDesc::Func(ft.clone()), + _ => panic!( + "internal invariant violation: WasmParser function TypeRef refers to non-function" + ), + }, + TypeRef::Table(tt) => CoreExternDesc::Table(*tt), + TypeRef::Memory(mt) => CoreExternDesc::Memory(*mt), + TypeRef::Global(gt) => CoreExternDesc::Global(*gt), + TypeRef::Tag(_) => panic!("core type tags are not supported"), + }, + }), + ), + } + } + + fn elab_core_module<'c>(&'c mut self, decls: &[ModuleTypeDeclaration<'a>]) -> CoreModule<'a> { + let mut ctx = Ctx::new(Some(self), false); + let mut imports = Vec::new(); + let mut exports = Vec::new(); + for decl in decls { + let (import, export) = ctx.elab_core_module_decl(decl); + if let Some(import) = import { + imports.push(import) + } + if let Some(export) = export { + exports.push(export) + } + } + CoreModule { + _imports: imports, + _exports: exports, + } + } + + fn elab_core_type_rec<'c>(&'c mut self, rg: &wasmparser::RecGroup) -> CoreDefined<'a> { + match &rg.types().nth(0).unwrap().composite_type.inner { + CompositeInnerType::Func(ft) => CoreDefined::Func(ft.clone()), + _ => panic!("GC core types are not presently supported"), + } + } + + fn elab_core_type<'c>(&'c mut self, ct: &wasmparser::CoreType<'a>) -> CoreDefined<'a> { + match ct { + CoreType::Rec(rg) => self.elab_core_type_rec(rg), + CoreType::Module(ds) => CoreDefined::Module(self.elab_core_module(ds)), + } + } + + /// This tries to handle pretty much everything involved in alias + /// resolution and well-formedness checking. Since both core and + /// component aliases are largely similar, it can handle both and + /// has to return a union of core/component extern descriptors + /// that does not exist in the spec. + fn resolve_alias<'c>( + &'c mut self, + alias: &ComponentAlias<'a>, + ) -> Result, Error<'a>> { + match alias { + ComponentAlias::InstanceExport { + kind, + instance_index, + name, + } => { + let it = &self.instances[*instance_index as usize]; + let ed = &it + .exports + .iter() + .find(|e| e.kebab_name == *name) + .unwrap() + .desc; + let sort = component_external_kind(*kind); + sort_matches_ed(sort, ed); + Ok(CoreOrComponentExternDesc::Component(ed.clone())) + } + ComponentAlias::CoreInstanceExport { + kind, + instance_index, + name, + } => { + let it = &self.core.instances[*instance_index as usize]; + let ed = &it + .exports + .iter() + .find(|e| e.name.name == *name) + .unwrap() + .desc; + let sort = external_kind(*kind); + sort_matches_core_ed(sort, ed); + Ok(CoreOrComponentExternDesc::Core(ed.clone())) + } + ComponentAlias::Outer { kind, count, index } => { + if *kind != ComponentOuterAliasKind::Type { + panic!("In types, only outer type aliases are allowed"); + } + // Walk through each of the contexts between us and + // the targeted type, so that we can innerize each one + let mut ctxs = self.parents().take(*count as usize + 1).collect::>(); + ctxs.reverse(); + let mut target_type = ctxs[0].types[*index as usize].clone(); + let mut ob_crossed = false; + for ctxs_ in ctxs.windows(2) { + ob_crossed |= ctxs_[1].outer_boundary; + let sub = substitute::Innerize::new(ctxs_[0], ctxs_[1].outer_boundary); + target_type = sub + .defined(&target_type) + .map_err(Error::InvalidOuterAlias)?; + } + if ob_crossed { + self.wf_defined(wf::DefinedTypePosition::export(), &target_type) + .map_err(Error::IllFormedOuterAlias)?; + } + Ok(CoreOrComponentExternDesc::Component(ExternDesc::Type( + target_type, + ))) + } + } + } + + /// Add a core extern descriptor to the context: whatever it + /// describes is added to the relevant index space + fn add_core_ed<'c>(&'c mut self, ed: CoreExternDesc) { + match ed { + CoreExternDesc::Func(ft) => self.core.funcs.push(ft), + CoreExternDesc::Table(tt) => self.core.tables.push(tt), + CoreExternDesc::Memory(mt) => self.core.mems.push(mt), + CoreExternDesc::Global(gt) => self.core.globals.push(gt), + } + } + + /// Add an extern descriptor to the context: whatever it describes + /// is added to the relevant index space. Note that this does not + /// handle stripping the type variables off of an instance type + /// (since `ExternDesc::Instance` doesn't have them); that should + /// have been done earlier. See for example the export instance + /// declarator case below, which converts the bound variables on + /// the instance type to context evars, and fixes them up in the + /// instance type, before calling add_ed. + fn add_ed<'c>(&'c mut self, ed: &ExternDesc<'a>) { + match ed { + ExternDesc::CoreModule(cmd) => self.core.modules.push(cmd.clone()), + ExternDesc::Func(ft) => self.funcs.push(ft.clone()), + ExternDesc::Type(dt) => self.types.push(dt.clone()), + ExternDesc::Instance(it) => self.instances.push(it.clone()), + ExternDesc::Component(ct) => self.components.push(ct.clone()), + } + } + + fn add_core_or_component_ed<'c>(&'c mut self, ed: CoreOrComponentExternDesc<'a>) { + match ed { + CoreOrComponentExternDesc::Core(ced) => self.add_core_ed(ced), + CoreOrComponentExternDesc::Component(ed) => self.add_ed(&ed), + } + } + + fn elab_value<'c>(&'c mut self, ctr: &ComponentValType) -> Result, Error<'a>> { + match ctr { + ComponentValType::Type(n) => match &self.types[*n as usize] { + Defined::Value(vt) => Ok(vt.clone()), + dt @ Defined::Handleable(Handleable::Var(tv)) => match self.resolve_tyvar(tv) { + ResolvedTyvar::Definite(Defined::Value(vt)) => { + Ok(Value::Var(Some(tv.clone()), Box::new(vt))) + } + _ => Err(Error::ValTypeRefToNonVal(dt.clone())), + }, + dt => Err(Error::ValTypeRefToNonVal(dt.clone())), + }, + ComponentValType::Primitive(pt) => Ok(match pt { + PrimitiveValType::Bool => Value::Bool, + PrimitiveValType::S8 => Value::S(IntWidth::I8), + PrimitiveValType::U8 => Value::U(IntWidth::I8), + PrimitiveValType::S16 => Value::S(IntWidth::I16), + PrimitiveValType::U16 => Value::U(IntWidth::I16), + PrimitiveValType::S32 => Value::S(IntWidth::I32), + PrimitiveValType::U32 => Value::U(IntWidth::I32), + PrimitiveValType::S64 => Value::S(IntWidth::I64), + PrimitiveValType::U64 => Value::U(IntWidth::I64), + PrimitiveValType::F32 => Value::F(FloatWidth::F32), + PrimitiveValType::F64 => Value::F(FloatWidth::F64), + PrimitiveValType::Char => Value::Char, + PrimitiveValType::String => Value::String, + }), + } + } + + fn elab_defined_value<'c>( + &'c mut self, + vt: &ComponentDefinedType<'a>, + ) -> Result, Error<'a>> { + match vt { + ComponentDefinedType::Primitive(pvt) => { + self.elab_value(&ComponentValType::Primitive(*pvt)) + } + ComponentDefinedType::Record(rfs) => { + let rfs = rfs + .iter() + .map(|(name, ty)| { + Ok::<_, Error<'a>>(RecordField { + name: Name { name }, + ty: self.elab_value(ty)?, + }) + }) + .collect::, Error<'a>>>()?; + Ok(Value::Record(rfs)) + } + ComponentDefinedType::Variant(vcs) => { + let vcs = vcs + .iter() + .map(|vc| { + Ok(VariantCase { + name: Name { name: vc.name }, + ty: vc.ty.as_ref().map(|ty| self.elab_value(ty)).transpose()?, + refines: vc.refines, + }) + }) + .collect::, Error<'a>>>()?; + Ok(Value::Variant(vcs)) + } + ComponentDefinedType::List(vt) => Ok(Value::List(Box::new(self.elab_value(vt)?))), + ComponentDefinedType::Tuple(vts) => Ok(Value::Tuple( + vts.iter() + .map(|vt| self.elab_value(vt)) + .collect::, Error<'a>>>()?, + )), + ComponentDefinedType::Flags(ns) => { + Ok(Value::Flags(ns.iter().map(|n| Name { name: n }).collect())) + } + ComponentDefinedType::Enum(ns) => { + Ok(Value::Enum(ns.iter().map(|n| Name { name: n }).collect())) + } + ComponentDefinedType::Option(vt) => Ok(Value::Option(Box::new(self.elab_value(vt)?))), + ComponentDefinedType::Result { ok, err } => Ok(Value::Result( + Box::new(ok.map(|ok| self.elab_value(&ok)).transpose()?), + Box::new(err.map(|err| self.elab_value(&err)).transpose()?), + )), + ComponentDefinedType::Own(n) => match &self.types[*n as usize] { + Defined::Handleable(h) => Ok(Value::Own(h.clone())), + _ => Err(Error::HandleToNonResource), + }, + ComponentDefinedType::Borrow(n) => match &self.types[*n as usize] { + Defined::Handleable(h) => Ok(Value::Borrow(h.clone())), + _ => Err(Error::HandleToNonResource), + }, + ComponentDefinedType::Future(_) + | ComponentDefinedType::Stream(_) + | ComponentDefinedType::ErrorContext => panic!("async not yet supported"), + } + } + + fn elab_func<'c>(&'c mut self, ft: &ComponentFuncType<'a>) -> Result, Error<'a>> { + Ok(Func { + params: ft + .params + .iter() + .map(|(n, vt)| { + Ok(Param { + name: Name { name: n }, + ty: self.elab_value(vt)?, + }) + }) + .collect::, Error<'a>>>()?, + result: match &ft.results { + ComponentFuncResult::Unnamed(vt) => etypes::Result::Unnamed(self.elab_value(vt)?), + ComponentFuncResult::Named(rs) => etypes::Result::Named( + rs.iter() + .map(|(n, vt)| { + Ok(Param { + name: Name { name: n }, + ty: self.elab_value(vt)?, + }) + }) + .collect::, Error<'a>>>()?, + ), + }, + }) + } + + /// Elaborate an extern descriptor. This returns any evars that + /// are implied by the descriptor separately, to simplify + /// converting them to context e/u vars, which is usually what you + /// want to do. + fn elab_extern_desc<'c>( + &'c mut self, + ed: &ComponentTypeRef, + ) -> Result<(Vec>, ExternDesc<'a>), Error<'a>> { + match ed { + ComponentTypeRef::Module(i) => match &self.core.types[*i as usize] { + CoreDefined::Module(mt) => Ok((vec![], ExternDesc::CoreModule(mt.clone()))), + _ => { + panic!("internal invariant violation: bad sort for ComponentTypeRef to Module") + } + }, + ComponentTypeRef::Func(i) => match &self.types[*i as usize] { + Defined::Func(ft) => Ok((vec![], ExternDesc::Func(ft.clone()))), + _ => panic!("internal invariant violation: bad sort for ComponentTypeRef to Func"), + }, + ComponentTypeRef::Value(_) => panic!("First-class values are not yet supported"), + ComponentTypeRef::Type(tb) => { + let bound = match tb { + TypeBounds::Eq(i) => TypeBound::Eq(self.types[*i as usize].clone()), + TypeBounds::SubResource => TypeBound::SubResource, + }; + let dt = Defined::Handleable(Handleable::Var(Tyvar::Bound(0))); + Ok((vec![BoundedTyvar::new(bound)], ExternDesc::Type(dt))) + } + ComponentTypeRef::Instance(i) => match &self.types[*i as usize] { + Defined::Instance(qit) => Ok(( + qit.evars.clone(), + ExternDesc::Instance(qit.unqualified.clone()), + )), + _ => panic!( + "internal invariant violation: bad sort for ComponentTypeRef to Instance" + ), + }, + ComponentTypeRef::Component(i) => match &self.types[*i as usize] { + Defined::Component(ct) => Ok((vec![], ExternDesc::Component(ct.clone()))), + _ => panic!( + "internal invariant violation: bad sort for ComponentTypeRef to Component" + ), + }, + } + } + + fn elab_instance_decl<'c>( + &'c mut self, + decl: &InstanceTypeDeclaration<'a>, + ) -> Result>, Error<'a>> { + match decl { + InstanceTypeDeclaration::CoreType(ct) => { + let ct = self.elab_core_type(ct); + self.core.types.push(ct); + Ok(None) + } + InstanceTypeDeclaration::Type(t) => { + let t = self.elab_defined(t)?; + if let Defined::Handleable(_) = t { + return Err(Error::ResourceInDeclarator); + } + self.types.push(t); + Ok(None) + } + InstanceTypeDeclaration::Alias(a) => { + let ed = self.resolve_alias(a)?; + self.add_core_or_component_ed(ed); + Ok(None) + } + InstanceTypeDeclaration::Export { name, ty } => { + let (vs, ed) = self.elab_extern_desc(ty)?; + let sub = self.bound_to_evars(Some(name.0), &vs); + let ed = sub.extern_desc(&ed).not_void(); + self.add_ed(&ed); + Ok(Some(ExternDecl { + kebab_name: name.0, + desc: ed, + })) + } + } + } + + fn elab_instance<'c>( + &'c mut self, + decls: &[InstanceTypeDeclaration<'a>], + ) -> Result, Error<'a>> { + let mut ctx = Ctx::new(Some(self), false); + let mut exports = Vec::new(); + for decl in decls { + let export = ctx.elab_instance_decl(decl)?; + if let Some(export) = export { + exports.push(export); + } + } + ctx.finish_instance(&exports) + } + + /// Convert instance variables in the context into bound variables + /// in the type. This is pulled out separately from raising the + /// resulting type so that it can be shared between + /// [`Ctx::finish_instance`] and [`Ctx::finish_component`], which + /// have different requirements in that respect. + fn finish_instance_evars( + self, + exports: &[ExternDecl<'a>], + ) -> Result, Error<'a>> { + let mut evars = Vec::new(); + let mut sub = substitute::Closing::new(false); + for (bound, _) in self.evars { + let bound = sub.bounded_tyvar(&bound)?; + evars.push(bound); + sub.next_e(); + } + let unqualified = sub.instance(&Instance { + exports: exports.to_vec(), + })?; + Ok(QualifiedInstance { evars, unqualified }) + } + + /// The equivalent of the \oplus in the spec. This has to deal + /// with more bookkeeping because of our variable representation: + /// the free variables in the exports need to be converted to + /// bound variables, and any free variables referring to upper + /// contexts need to have their parent/outer index reduced by one + /// to deal with this context ending. + fn finish_instance( + self, + exports: &[ExternDecl<'a>], + ) -> Result, Error<'a>> { + // When we do the well-formedness check in a minute, we need + // to use the parent ctx, because the closing substitution has + // already been applied. + let fallback_parent = Ctx::new(None, false); + let parent_ctx = self.parent.unwrap_or(&fallback_parent); + + let qi = self.finish_instance_evars(exports)?; + let raise_u_sub = substitute::Closing::new(true); + let it = raise_u_sub.qualified_instance(&qi)?; + parent_ctx + .wf_qualified_instance(wf::DefinedTypePosition::internal(), &it) + .map_err(Error::IllFormed)?; + Ok(it) + } + + fn elab_component_decl<'c>( + &'c mut self, + decl: &ComponentTypeDeclaration<'a>, + ) -> Result<(Option>, Option>), Error<'a>> { + match decl { + ComponentTypeDeclaration::CoreType(ct) => { + let ct = self.elab_core_type(ct); + self.core.types.push(ct); + Ok((None, None)) + } + ComponentTypeDeclaration::Type(t) => { + let t = self.elab_defined(t)?; + if let Defined::Handleable(_) = t { + return Err(Error::ResourceInDeclarator); + } + self.types.push(t); + Ok((None, None)) + } + ComponentTypeDeclaration::Alias(a) => { + let ed = self.resolve_alias(a)?; + self.add_core_or_component_ed(ed); + Ok((None, None)) + } + ComponentTypeDeclaration::Export { name, ty, .. } => { + let (vs, ed) = self.elab_extern_desc(ty)?; + let sub = self.bound_to_evars(Some(name.0), &vs); + let ed = sub.extern_desc(&ed).not_void(); + self.add_ed(&ed); + Ok(( + None, + Some(ExternDecl { + kebab_name: name.0, + desc: ed, + }), + )) + } + ComponentTypeDeclaration::Import(i) => { + let (vs, ed) = self.elab_extern_desc(&i.ty)?; + let sub = self.bound_to_uvars(Some(i.name.0), &vs, true); + let ed = sub.extern_desc(&ed).not_void(); + self.add_ed(&ed); + Ok(( + Some(ExternDecl { + kebab_name: i.name.0, + desc: ed, + }), + None, + )) + } + } + } + + /// Similar to [`Ctx::finish_instance`], but for components; this + /// has to cover uvars as well as evars. + fn finish_component( + self, + imports: &[ExternDecl<'a>], + exports: &[ExternDecl<'a>], + ) -> Result, Error<'a>> { + // When we do the well-formedness check in a minute, we need + // to use the parent ctx, because the closing substitution has + // already been applied. + let fallback_parent = Ctx::new(None, false); + let parent_ctx = self.parent.unwrap_or(&fallback_parent); + + let mut uvars = Vec::new(); + let mut sub = substitute::Closing::new(true); + for (bound, imported) in &self.uvars { + let bound = sub.bounded_tyvar(bound)?; + uvars.push(bound); + sub.next_u(*imported); + } + let imports = imports + .iter() + .map(|ed| sub.extern_decl(ed).map_err(Into::into)) + .collect::>, Error<'a>>>()?; + let instance = sub.qualified_instance(&self.finish_instance_evars(exports)?)?; + let ct = Component { + uvars, + imports, + instance, + }; + parent_ctx + .wf_component(wf::DefinedTypePosition::internal(), &ct) + .map_err(Error::IllFormed)?; + Ok(ct) + } + + fn elab_defined<'c>(&'c mut self, dt: &ComponentType<'a>) -> Result, Error<'a>> { + match dt { + ComponentType::Defined(vt) => Ok(Defined::Value(self.elab_defined_value(vt)?)), + ComponentType::Func(ft) => Ok(Defined::Func(self.elab_func(ft)?)), + ComponentType::Component(cds) => Ok(Defined::Component(self.elab_component(cds)?)), + ComponentType::Instance(ids) => Ok(Defined::Instance(self.elab_instance(ids)?)), + ComponentType::Resource { dtor, .. } => { + let rid = ResourceId { + id: self.rtypes.len() as u32, + }; + self.rtypes.push(Resource { _dtor: *dtor }); + Ok(Defined::Handleable(Handleable::Resource(rid))) + } + } + } +} diff --git a/src/hyperlight_component_util/src/emit.rs b/src/hyperlight_component_util/src/emit.rs new file mode 100644 index 000000000..447467aff --- /dev/null +++ b/src/hyperlight_component_util/src/emit.rs @@ -0,0 +1,762 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + */ + +//! A bunch of utilities used by the actual code emit functions +use std::collections::{BTreeMap, BTreeSet, VecDeque}; +use std::vec::Vec; + +use proc_macro2::TokenStream; +use quote::{format_ident, quote}; +use syn::Ident; + +use crate::etypes::{BoundedTyvar, Defined, Handleable, ImportExport, TypeBound, Tyvar}; + +/// A representation of a trait definition that we will eventually +/// emit. This is used to allow easily adding onto the trait each time +/// we see an extern decl. +#[derive(Debug, Default)] +pub struct Trait { + /// A set of supertrait constraints, each associated with a + /// bindings module path + pub supertraits: BTreeMap, TokenStream>, + /// Keep track for each type variable of: + /// - The identifier that we use for it in the generated source + /// - Whether it comes from a component type variable, and if so, + /// which one. (Most do; the I: Imports on the main component + /// trait is the main one that doesn't). + /// - Whether there are any bounds on it + pub tvs: BTreeMap, TokenStream)>, + /// Raw tokens of the contents of the trait + pub items: TokenStream, +} +impl Trait { + pub fn new() -> Self { + Self { + supertraits: BTreeMap::new(), + tvs: BTreeMap::new(), + items: TokenStream::new(), + } + } + /// Collect the component tyvar indices that correspond to the + /// type variables on this trait. + /// + /// Precondition: all of the type + /// variables on this trait do correspond to component variables. + pub fn tv_idxs(&self) -> Vec { + self.tvs.iter().map(|(_, (n, _))| n.unwrap()).collect() + } + /// See [`State::adjust_vars`]. + pub fn adjust_vars(&mut self, n: u32) { + for (_, (v, _)) in self.tvs.iter_mut() { + if let Some(v) = v.as_mut() { + *v += n; + } + } + } + /// Build a token stream of all type variables and trait bounds on + /// them, e.g. what you would put "inside" the <> in trait T<...>. + pub fn tv_toks_inner(&mut self) -> TokenStream { + let tvs = self + .tvs + .iter() + .map(|(k, (_, v))| { + let colon = if v.is_empty() { + quote! {} + } else { + quote! { : } + }; + quote! { #k #colon #v } + }) + .collect::>(); + quote! { #(#tvs),* } + } + /// Build a token stream for the type variable part of the trait + /// declaration + pub fn tv_toks(&mut self) -> TokenStream { + if !self.tvs.is_empty() { + let toks = self.tv_toks_inner(); + quote! { <#toks> } + } else { + quote! {} + } + } + /// Build a token stream for this entire trait definition + pub fn into_tokens(&mut self, n: Ident) -> TokenStream { + let trait_colon = if !self.supertraits.is_empty() { + quote! { : } + } else { + quote! {} + }; + let supertraits = self + .supertraits + .iter() + .map(|(is, ts)| { + quote! { #(#is)::*#ts } + }) + .collect::>(); + let tvs = self.tv_toks(); + let items = &self.items; + quote! { + pub trait #n #tvs #trait_colon #(#supertraits)+* { #items } + } + } +} + +/// A representation of a module definition that we will eventually +/// emit. This is used to allow easily adding onto the module each time +/// we see a relevant decl. +#[derive(Debug, Default)] +pub struct Mod { + pub submods: BTreeMap, + pub items: TokenStream, + pub traits: BTreeMap, + pub impls: BTreeMap<(Vec, Ident), TokenStream>, +} +impl Mod { + pub fn empty() -> Self { + Self { + submods: BTreeMap::new(), + items: TokenStream::new(), + traits: BTreeMap::new(), + impls: BTreeMap::new(), + } + } + /// Get a reference to a sub-module, creating it if necessary + pub fn submod<'a>(&'a mut self, i: Ident) -> &'a mut Self { + self.submods.entry(i).or_insert(Self::empty()) + } + /// Get an immutable reference to a sub-module + /// + /// Precondition: the named submodule must already exist + pub fn submod_immut<'a>(&'a self, i: Ident) -> &'a Self { + &self.submods[&i] + } + /// Get a reference to a trait definition in this module, creating + /// it if necessary + pub fn r#trait<'a>(&'a mut self, i: Ident) -> &'a mut Trait { + self.traits.entry(i).or_default() + } + /// Get an immutable reference to a trait definition in this module + /// + /// Precondition: the named trait must already exist + pub fn trait_immut<'a>(&'a self, i: Ident) -> &'a Trait { + &self.traits[&i] + } + /// Get a reference to an impl block that is in this module, + /// creating it if necessary. + /// + /// Currently, we don't track much information about these, so + /// it's just a mutable token stream. + pub fn r#impl<'a>(&'a mut self, t: Vec, i: Ident) -> &'a mut TokenStream { + self.impls.entry((t, i)).or_default() + } + /// See [`State::adjust_vars`]. + pub fn adjust_vars(&mut self, n: u32) { + self.submods + .iter_mut() + .map(|(_, m)| m.adjust_vars(n)) + .for_each(drop); + self.traits + .iter_mut() + .map(|(_, t)| t.adjust_vars(n)) + .for_each(drop); + } + /// Build a token stream for this entire module + pub fn into_tokens(self) -> TokenStream { + let mut tt = TokenStream::new(); + for (k, v) in self.submods { + let vt = v.into_tokens(); + tt.extend(quote! { + pub mod #k { #vt } + }); + } + for (n, mut t) in self.traits { + tt.extend(t.into_tokens(n)); + } + tt.extend(self.items); + for ((ns, i), t) in self.impls { + tt.extend(quote! { + impl #(#ns)::* for #i { #t } + }) + } + tt + } +} + +/// A whole grab-bag of useful state to have while emitting Rust +#[derive(Debug)] +pub struct State<'a, 'b> { + /// A pointer to a [`Mod`] that everything we emit will end up in + pub root_mod: &'a mut Mod, + /// A cursor to the current submodule (under [`State::root_mod`]), + /// where decls that we are looking at right now should end up + pub mod_cursor: Vec, + /// If we are currently processing decls that should end up inside + /// a trait (representing an instance or a resource), this names + /// the trait where they should end up. + pub cur_trait: Option, + /// We use a "helper module" for auxiliary definitions: for + /// example, an instance represented by `InstanceTrait` would end + /// up with nominal definitions for its nontrivial types in + /// `instance_trait::Type`. This keeps track of the name of that + /// module, if it presently exists. + pub cur_helper_mod: Option, + /// Whether the trait/type definition that we are currently + /// emitting is in the helper module or the main module + /// corresponding directly to the wit package. This is important + /// to get references to other types correct. + pub is_helper: bool, + /// All the bound variables in the component type that we are + /// currently processing + pub bound_vars: &'a mut VecDeque>, + /// An offset into bound_vars from which any variable indices we + /// see in the source component type will be resolved; used to + /// deal with the fact that when we recurse down into a type in + /// the Eq bound of a type variable, its variables are offset from + /// ours (since we use de Bruijn indices). + pub var_offset: usize, + /// A path through instance import/export names from the root + /// component type to the type we are currently processing. This + /// is used with [`crate::etypes::TyvarOrigin`] to decide whether + /// a type variable we encounter is "locally defined", i.e. should + /// have a type definition emitted for it in this module. + pub origin: Vec>, + /// A set of type variables that we encountered while emitting the + /// type bound for a type variable. + pub cur_needs_vars: Option<&'a mut BTreeSet>, + /// A map from type variables to the type variables used in their + /// bounds, used to ensure that we are parametrized over the + /// things we need to be + pub vars_needs_vars: &'a mut VecDeque>, + /// The Rust type parameter used to represent the type that + /// implements the imports of a component + pub import_param_var: Option, + /// The Rust type parameter used to represent the current Rust + /// state type + pub self_param_var: Option, + /// Whether we are emitting an implementation of the component + /// interfaces, or just the types of the interface + pub is_impl: bool, + /// A namespace path and a name representing the Rust trait + /// generated for the root component that we started codegen from + pub root_component_name: Option<(TokenStream, &'a str)>, + /// Whether we are generating code for the Hyperlight host or the + /// Hyperlight guest + pub is_guest: bool, + /// A temporary hack to enable some special cases used by the + /// wasmtime guest emit. When that is refactored to use the host + /// guest emit, this can go away. + pub is_wasmtime_guest: bool, +} + +/// Create a State with all of its &mut references pointing to +/// sensible things, run a function that emits code into the state, +/// and then generate a token stream representing everything emitted +pub fn run_state<'b, F: for<'a> FnMut(&mut State<'a, 'b>)>( + is_guest: bool, + is_wasmtime_guest: bool, + mut f: F, +) -> TokenStream { + let mut root_mod = Mod::empty(); + let mut bound_vars = std::collections::VecDeque::new(); + let mut vars_needs_vars = std::collections::VecDeque::new(); + { + let mut state = State::new( + &mut root_mod, + &mut bound_vars, + &mut vars_needs_vars, + is_guest, + is_wasmtime_guest, + ); + f(&mut state); + } + root_mod.into_tokens() +} + +impl<'a, 'b> State<'a, 'b> { + pub fn new( + root_mod: &'a mut Mod, + bound_vars: &'a mut VecDeque>, + vars_needs_vars: &'a mut VecDeque>, + is_guest: bool, + is_wasmtime_guest: bool, + ) -> Self { + Self { + root_mod, + mod_cursor: Vec::new(), + cur_trait: None, + cur_helper_mod: None, + is_helper: false, + bound_vars, + var_offset: 0, + origin: Vec::new(), + cur_needs_vars: None, + vars_needs_vars, + import_param_var: None, + self_param_var: None, + is_impl: false, + root_component_name: None, + is_guest, + is_wasmtime_guest, + } + } + pub fn clone<'c>(&'c mut self) -> State<'c, 'b> { + State { + root_mod: self.root_mod, + mod_cursor: self.mod_cursor.clone(), + cur_trait: self.cur_trait.clone(), + cur_helper_mod: self.cur_helper_mod.clone(), + is_helper: self.is_helper, + bound_vars: self.bound_vars, + var_offset: self.var_offset, + origin: self.origin.clone(), + cur_needs_vars: self.cur_needs_vars.as_deref_mut(), + vars_needs_vars: self.vars_needs_vars, + import_param_var: self.import_param_var.clone(), + self_param_var: self.self_param_var.clone(), + is_impl: self.is_impl, + root_component_name: self.root_component_name.clone(), + is_guest: self.is_guest, + is_wasmtime_guest: self.is_wasmtime_guest, + } + } + /// Obtain a reference to the [`Mod`] that we are currently + /// generating code in, creating it if necessary + pub fn cur_mod<'c>(&'c mut self) -> &'c mut Mod { + let mut m: &'c mut Mod = self.root_mod; + for i in &self.mod_cursor { + m = m.submod(i.clone()); + } + if self.is_helper { + m = m.submod(self.cur_helper_mod.clone().unwrap()); + } + m + } + /// Obtain an immutable reference to the [`Mod`] that we are + /// currently generating code in. + /// + /// Precondition: the module must already exist + pub fn cur_mod_immut<'c>(&'c self) -> &'c Mod { + let mut m: &'c Mod = self.root_mod; + for i in &self.mod_cursor { + m = m.submod_immut(i.clone()); + } + if self.is_helper { + m = m.submod_immut(self.cur_helper_mod.clone().unwrap()); + } + m + } + /// Copy the state, changing its module cursor to emit code into a + /// different module + pub fn with_cursor<'c>(&'c mut self, cursor: Vec) -> State<'c, 'b> { + let mut s = self.clone(); + s.mod_cursor = cursor; + s + } + /// Copy the state, replacing its [`State::cur_needs_vars`] reference, + /// allowing a caller to capture the vars referenced by any emit + /// run with the resultant state + pub fn with_needs_vars<'c>(&'c mut self, needs_vars: &'c mut BTreeSet) -> State<'c, 'b> { + let mut s = self.clone(); + s.cur_needs_vars = Some(needs_vars); + s + } + /// Record that an emit sequence needed a var, given an absolute + /// index for the var (i.e. ignoring [`State::var_offset`]) + pub fn need_noff_var(&mut self, n: u32) { + self.cur_needs_vars.as_mut().map(|vs| vs.insert(n)); + } + /// Use the [`State::cur_needs_vars`] map to populate + /// [`State::vars_needs_vars`] for a var that we presumably just + /// finished emitting a bound for + pub fn record_needs_vars(&mut self, n: u32) { + let un = n as usize; + if self.vars_needs_vars.len() < un + 1 { + self.vars_needs_vars.resize(un + 1, BTreeSet::new()); + } + let Some(ref mut cnvs) = self.cur_needs_vars else { + return; + }; + log::debug!("debug varref: recording {:?} for var {:?}", cnvs.iter(), un); + self.vars_needs_vars[un].extend(cnvs.iter()); + } + /// Get a list of all the variables needed by a var, given its absolute + /// index (i.e. ignoring [`State::var_offset`]) + pub fn get_noff_var_refs(&mut self, n: u32) -> BTreeSet { + let un = n as usize; + if self.vars_needs_vars.len() < un + 1 { + return BTreeSet::new(); + }; + log::debug!( + "debug varref: looking up {:?} for var {:?}", + self.vars_needs_vars[un].iter(), + un + ); + self.vars_needs_vars[un].clone() + } + /// Find the exported name which gave rise to a component type + /// variable, given its absolute index (i.e. ignoring + /// [`State::var_offset`]) + pub fn noff_var_id(&self, n: u32) -> Ident { + let Some(n) = self.bound_vars[n as usize].origin.last_name() else { + panic!("missing origin on tyvar in rust emit") + }; + kebab_to_type(n) + } + /// Copy the state, changing it to emit into the helper module of + /// the current trait + pub fn helper<'c>(&'c mut self) -> State<'c, 'b> { + let mut s = self.clone(); + s.is_helper = true; + s + } + /// Construct a namespace token stream that can be emitted in the + /// current module to refer to a name in the root module + pub fn root_path(&self) -> TokenStream { + if self.is_impl { + return TokenStream::new(); + } + let mut s = self + .mod_cursor + .iter() + .map(|_| quote! { super }) + .collect::>(); + if self.is_helper { + s.push(quote! { super }); + } + quote! { #(#s::)* } + } + /// Construct a namespace token stream that can be emitted in the + /// current module to refer to a name in the helper module + pub fn helper_path(&self) -> TokenStream { + if self.is_impl { + let c = &self.mod_cursor; + let helper = self.cur_helper_mod.clone().unwrap(); + let h = if !self.is_helper { + quote! { #helper:: } + } else { + TokenStream::new() + }; + quote! { #(#c::)*#h } + } else if self.is_helper { + quote! { self:: } + } else { + let helper = self.cur_helper_mod.clone().unwrap(); + quote! { #helper:: } + } + } + /// Emit a namespace token stream that can be emitted in the root + /// module to refer to the current trait + pub fn cur_trait_path(&self) -> TokenStream { + let tns = &self.mod_cursor; + let tid = self.cur_trait.clone().unwrap(); + quote! { #(#tns::)* #tid } + } + /// Add a supertrait constraint referring to a trait in the helper + /// module; primarily used to add a constraint for the trait + /// representing a resource type. + pub fn add_helper_supertrait(&mut self, r: Ident) { + let (Some(t), Some(hm)) = (self.cur_trait.clone(), &self.cur_helper_mod.clone()) else { + panic!("invariant violation") + }; + self.cur_mod() + .r#trait(t) + .supertraits + .insert(vec![hm.clone(), r], TokenStream::new()); + } + /// Obtain a reference to the [`Trait`] that we are currently + /// generating code in, creating it if necessary. + /// + /// Precondition: we are currently generating code in a trait + /// (i.e. [`State::cur_trait`] is not [`None`]) + pub fn cur_trait<'c>(&'c mut self) -> &'c mut Trait { + let n = self.cur_trait.as_ref().unwrap().clone(); + self.cur_mod().r#trait(n) + } + /// Obtain an immutable reference to the [`Trait`] that we are + /// currently generating code in. + /// + /// Precondition: we are currently generating code in a trait + /// (i.e. [`State::cur_trait`] is not [`None`]), and that trait has + /// already been created + pub fn cur_trait_immut<'c>(&'c self) -> &'c Trait { + let n = self.cur_trait.as_ref().unwrap().clone(); + self.cur_mod_immut().trait_immut(n) + } + /// Obtain a reference to the trait at the given module path and + /// name from the root module, creating it and any named modules + /// if necessary + pub fn r#trait<'c>(&'c mut self, namespace: &'c [Ident], name: Ident) -> &'c mut Trait { + let mut m: &'c mut Mod = self.root_mod; + for i in namespace { + m = m.submod(i.clone()); + } + m.r#trait(name) + } + /// Add an import/export to [`State::origin`], reflecting that we are now + /// looking at code underneath it + pub fn push_origin<'c>(&'c mut self, is_export: bool, name: &'b str) -> State<'c, 'b> { + let mut s = self.clone(); + s.origin.push(if is_export { + ImportExport::Export(name) + } else { + ImportExport::Import(name) + }); + s + } + /// Find out if a [`Defined`] type is actually a reference to a + /// locally defined type variable, returning its index and bound + /// if it is + pub fn is_var_defn(&self, t: &Defined<'b>) -> Option<(u32, TypeBound<'b>)> { + match t { + Defined::Handleable(Handleable::Var(tv)) => match tv { + Tyvar::Bound(n) => { + let bv = &self.bound_vars[self.var_offset + (*n as usize)]; + log::debug!("checking an origin {:?} {:?}", bv.origin, self.origin); + if bv.origin.matches(self.origin.iter()) { + Some((*n, bv.bound.clone())) + } else { + None + } + } + Tyvar::Free(_) => panic!("free tyvar in finished type"), + }, + _ => None, + } + } + /// Find out if a variable is locally-defined given its absolute + /// index, returning its origin and bound if it is + pub fn is_noff_var_local<'c>( + &'c self, + n: u32, + ) -> Option<(Vec>, TypeBound<'a>)> { + let bv = &self.bound_vars[n as usize]; + bv.origin + .is_local(self.origin.iter()) + .map(|path| (path, bv.bound.clone())) + } + /// Obtain an immutable reference to the trait at the specified + /// namespace path, either from the root module (if `absolute`) + /// is true, or from the current module + /// + /// Precondition: all named traits/modules must exist + pub fn resolve_trait_immut(&self, absolute: bool, path: &[Ident]) -> &Trait { + log::debug!("resolving trait {:?} {:?}", absolute, path); + let mut m = if absolute { + &*self.root_mod + } else { + self.cur_mod_immut() + }; + for x in &path[0..path.len() - 1] { + m = &m.submods[x]; + } + &m.traits[&path[path.len() - 1]] + } + /// Shift all of the type variable indices over, because we have + /// gone under some binders. Used when we switch from looking at + /// a component's import types (where type idxs are de Bruijn into + /// the component's uvar list) to a component's export types + /// (where type idx are de Bruijn first into the evar list and + /// then the uvar list, as we go under the existential binders). + pub fn adjust_vars(&mut self, n: u32) { + self.vars_needs_vars + .iter_mut() + .enumerate() + .for_each(|(i, vs)| { + *vs = vs.iter().map(|v| v + n).collect(); + log::debug!("updated {:?} to {:?}", i, *vs); + }); + for _ in 0..n { + self.vars_needs_vars.push_front(BTreeSet::new()); + } + self.root_mod.adjust_vars(n); + } + /// Resolve a type variable as far as possible: either this ends + /// up with a definition, in which case, let's get that, or it + /// ends up with a resource type, in which case we return the + /// resource index + pub fn resolve_tv(&self, n: u32) -> (u32, Option>) { + match &self.bound_vars[self.var_offset + n as usize].bound { + TypeBound::Eq(Defined::Handleable(Handleable::Var(Tyvar::Bound(nn)))) => { + self.resolve_tv(n + 1 + nn) + } + TypeBound::Eq(t) => (n, Some(t.clone())), + TypeBound::SubResource => (n, None), + } + } + /// Construct a namespace path referring to the resource trait for + /// a resource with the given name + pub fn resource_trait_path(&self, r: Ident) -> Vec { + let mut path = self.mod_cursor.clone(); + let helper = self + .cur_helper_mod + .as_ref() + .expect("There should always be a helper mod to hold a resource trait") + .clone(); + path.push(helper); + path.push(r); + path + } +} + +/// A parsed representation of a WIT name, containing package +/// namespaces, an actual name, and possibly a SemVer version +#[derive(Debug, Clone)] +pub struct WitName<'a> { + pub namespaces: Vec<&'a str>, + pub name: &'a str, + pub _version: Vec<&'a str>, +} +impl<'a> WitName<'a> { + /// Extract a list of Rust module names correspondign to the WIT + /// namespace/package + pub fn namespace_idents(&self) -> Vec { + self.namespaces + .iter() + .map(|x| kebab_to_namespace(x)) + .collect::>() + } + /// Extract a token stream representing the Rust namespace path + /// corresponding to the WIT namespace/package + pub fn namespace_path(&self) -> TokenStream { + let ns = self.namespace_idents(); + quote! { #(#ns)::* } + } +} +/// Parse a kebab-name as a WIT name +pub fn split_wit_name(n: &str) -> WitName { + let mut namespaces = Vec::new(); + let mut colon_components = n.split(':').rev(); + let last = colon_components.next().unwrap(); + namespaces.extend(colon_components.rev()); + let mut slash_components = last.split('/').rev(); + let mut versioned_name = slash_components.next().unwrap().split('@'); + let name = versioned_name.next().unwrap(); + namespaces.extend(slash_components.rev()); + WitName { + namespaces, + name, + _version: versioned_name.collect(), + } +} + +fn kebab_to_snake(n: &str) -> Ident { + if n == "self" { + return format_ident!("self_"); + } + let mut ret = String::new(); + for c in n.chars() { + if c == '-' { + ret.push('_'); + continue; + } + ret.push(c); + } + format_ident!("r#{}", ret) +} + +fn kebab_to_camel(n: &str) -> Ident { + let mut word_start = true; + let mut ret = String::new(); + for c in n.chars() { + if c == '-' { + word_start = true; + continue; + } + if word_start { + ret.extend(c.to_uppercase()) + } else { + ret.push(c) + }; + word_start = false; + } + format_ident!("{}", ret) +} + +/// Convert a kebab name to something suitable for use as a +/// (value-level) variable +pub fn kebab_to_var(n: &str) -> Ident { + kebab_to_snake(n) +} +/// Convert a kebab name to something suitable for use as a +/// type constructor +pub fn kebab_to_cons(n: &str) -> Ident { + kebab_to_camel(n) +} +/// Convert a kebab name to something suitable for use as a getter +/// function name +pub fn kebab_to_getter(n: &str) -> Ident { + kebab_to_snake(n) +} +/// Convert a kebab name to something suitable for use as a type name +pub fn kebab_to_type(n: &str) -> Ident { + kebab_to_camel(n) +} +/// Convert a kebab name to something suitable for use as a module +/// name/namespace path entry +pub fn kebab_to_namespace(n: &str) -> Ident { + kebab_to_snake(n) +} +/// From a kebab name for a Component, derive something suitable for +/// use as the name of the imports trait for that component +pub fn kebab_to_imports_name(trait_name: &str) -> Ident { + format_ident!("{}Imports", kebab_to_type(trait_name)) +} +/// From a kebab name for a Component, derive something suitable for +/// use as the name of the imports trait for that component +pub fn kebab_to_exports_name(trait_name: &str) -> Ident { + format_ident!("{}Exports", kebab_to_type(trait_name)) +} + +/// The kinds of names that a function associated with a resource in +/// WIT can have +pub enum ResourceItemName { + Constructor, + Method(Ident), + Static(Ident), +} + +/// The kinds of names that a function in WIT can have +pub enum FnName { + Associated(Ident, ResourceItemName), + Plain(Ident), +} +/// Parse a kebab-name as a WIT function name, figuring out if it is +/// associated with a resource +pub fn kebab_to_fn(n: &str) -> FnName { + if let Some(n) = n.strip_prefix("[constructor]") { + return FnName::Associated(kebab_to_type(n), ResourceItemName::Constructor); + } + if let Some(n) = n.strip_prefix("[method]") { + let mut i = n.split('.'); + let r = i.next().unwrap(); + let n = i.next().unwrap(); + return FnName::Associated( + kebab_to_type(r), + ResourceItemName::Method(kebab_to_snake(n)), + ); + } + if let Some(n) = n.strip_prefix("[static]") { + let mut i = n.split('.'); + let r = i.next().unwrap(); + let n = i.next().unwrap(); + return FnName::Associated( + kebab_to_type(r), + ResourceItemName::Static(kebab_to_snake(n)), + ); + } + FnName::Plain(kebab_to_snake(n)) +} diff --git a/src/hyperlight_component_util/src/etypes.rs b/src/hyperlight_component_util/src/etypes.rs new file mode 100644 index 000000000..0cec42887 --- /dev/null +++ b/src/hyperlight_component_util/src/etypes.rs @@ -0,0 +1,506 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + */ + +/// Elaborated component model types +/// +/// This has the basic type definitions for the elaborated types. They +/// correspond roughly to the "Elaborated Types" section in the +/// specification. +use crate::structure::*; + +#[derive(Debug, Clone, PartialEq, Copy)] +pub struct Name<'a> { + pub name: &'a str, +} + +#[derive(Debug, Clone, PartialEq, Copy)] +pub enum IntWidth { + I8, + I16, + I32, + I64, +} +impl IntWidth { + pub fn width(self) -> u8 { + match self { + IntWidth::I8 => 8, + IntWidth::I16 => 16, + IntWidth::I32 => 32, + IntWidth::I64 => 64, + } + } +} + +#[derive(Debug, Clone, PartialEq, Copy)] +pub enum FloatWidth { + F32, + F64, +} +impl FloatWidth { + pub fn width(self) -> u8 { + match self { + FloatWidth::F32 => 32, + FloatWidth::F64 => 64, + } + } +} + +/// recordfield_e in the specification +#[derive(Debug, Clone)] +pub struct RecordField<'a> { + pub name: Name<'a>, + pub ty: Value<'a>, +} + +/// variantcase_e in the specification +#[derive(Debug, Clone)] +pub struct VariantCase<'a> { + pub name: Name<'a>, + pub ty: Option>, + pub refines: Option, +} + +/// valtype_e in the specification +#[derive(Debug, Clone)] +pub enum Value<'a> { + Bool, + S(IntWidth), + U(IntWidth), + F(FloatWidth), + Char, + String, + List(Box>), + Record(Vec>), + Tuple(Vec>), + Flags(Vec>), + Variant(Vec>), + Enum(Vec>), + Option(Box>), + Result(Box>>, Box>>), + Own(Handleable), + Borrow(Handleable), + /// This records that a type variable was once here, and is used + /// to enforce export namedness checks. + Var(Option, Box>), +} + +/// Global resource identifier +#[derive(Debug, Clone, Copy, PartialEq)] +pub struct ResourceId { + pub(super) id: u32, +} + +/// To make certain substitutions easier, free type variables are +/// divided into Universal and Existential variables. Each is +/// represented by a pair of indices: the first index is an index into +/// [`Ctx::parents()`], specifying parent context has the variable +/// definition in it, and the second is an index into that context's +/// [`Ctx::uvars`] or [`Ctx::evars`]. +#[derive(Debug, Clone)] +pub enum FreeTyvar { + U(u32, u32), + E(u32, u32), +} + +/// We explicitly distinguish between bound type variables, which are +/// can only only present on types that are themselves inside a +/// [`QualifiedInstance`] or [`Component`], and free type variables +/// that are used while constructing or deconstructing such a type in +/// a [`Ctx`]. +#[derive(Debug, Clone)] +pub enum Tyvar { + /// A bound type variable as a de Bruijn index (0 is the innermost + /// binder) + Bound(u32), + /// A free type variable, whose bounds/other information are + /// stored in the context + Free(FreeTyvar), +} + +#[derive(Debug, Clone)] +pub struct Param<'a> { + pub name: Name<'a>, + pub ty: Value<'a>, +} + +#[derive(Debug, Clone)] +pub enum Result<'a> { + Unnamed(Value<'a>), + Named(Vec>), +} + +/// functype_e in the specification +#[derive(Debug, Clone)] +pub struct Func<'a> { + pub params: Vec>, + pub result: Result<'a>, +} + +/// In the spec, this does not exist, but a validation rule ensures an +/// invariant that certain deftype_e s are of this form. +#[derive(Debug, Clone)] +pub enum Handleable { + Var(Tyvar), + Resource(ResourceId), +} + +/// deftype_e in the specification +#[derive(Debug, Clone)] +pub enum Defined<'a> { + Handleable(Handleable), + Value(Value<'a>), + Func(Func<'a>), + Instance(QualifiedInstance<'a>), + Component(Component<'a>), +} + +/// typebound_e in the specification +#[derive(Debug, Clone)] +pub enum TypeBound<'a> { + Eq(Defined<'a>), + SubResource, +} + +/// The name of an import or export of the current +/// component/context. Not in the spec; only used for +/// [`BoundedTyvar::origin`] below. +/// +/// Any string present in one of these should also be present in an +/// [`ExternDecl::kebab_name`] in a relevant place. +#[derive(Debug, Clone, PartialEq)] +pub enum ImportExport<'a> { + Import(&'a str), + Export(&'a str), +} +impl<'a> ImportExport<'a> { + pub fn name(&self) -> &'a str { + match self { + ImportExport::Import(s) => s, + ImportExport::Export(s) => s, + } + } + pub fn imported(&self) -> bool { + match self { + ImportExport::Import(_) => true, + ImportExport::Export(_) => false, + } + } +} + +/// An (optional) path through the imports/exports of a current +/// component/context. Not in the spec; only used for +/// [`BoundedTyvar::origin`] below. +#[derive(Default, Debug, Clone, PartialEq)] +pub struct TyvarOrigin<'a> { + /// Note that the most recent (closest) element is last + pub path: Option>>, +} + +impl<'a> TyvarOrigin<'a> { + pub fn new() -> Self { + TyvarOrigin { path: Some(vec![]) } + } + pub fn push(&self, x: Option>) -> Self { + match (&self.path, x) { + (None, _) => TyvarOrigin { path: None }, + (_, None) => self.clone(), + (Some(xs), Some(x)) => { + let mut xs = xs.clone(); + xs.push(x); + TyvarOrigin { path: Some(xs) } + } + } + } + pub fn matches>>(&self, path: I) -> bool { + self.path + .as_ref() + .map(|p| p.iter().rev().eq(path)) + .unwrap_or(false) + } + pub fn is_local< + I: DoubleEndedIterator> + + ExactSizeIterator>, + >( + &self, + path: I, + ) -> Option>> { + let other = path.rev().skip(1).rev(); + let path = self.path.as_ref()?; + let path = path.iter(); + let mut path = path.rev(); + for elem in other { + match path.next() { + None => break, + Some(oe) if oe != elem => return None, + _ => (), + } + } + Some(path.cloned().collect()) + } + pub fn last_name(&self) -> Option<&'a str> { + self.path + .as_ref() + .and_then(|x| x.first()) + .map(|ie| ie.name()) + } + pub fn is_imported(&self) -> bool { + let Some(p) = &self.path else { + return false; + }; + p[p.len() - 1].imported() + } +} + +/// boundedtyvar_e in the spec +/// +/// Because we use a de Bruijn representation of type indices, this is +/// only the type_bound - which variable it is binding is implicit in +/// its position in the list. +#[derive(Debug, Clone)] +pub struct BoundedTyvar<'a> { + /// This is not important for typechecking, but is used to keep + /// track of where a type variable originated from in order to + /// decide on a canonical name to be used in bindings + /// generation. + pub origin: TyvarOrigin<'a>, + pub bound: TypeBound<'a>, +} + +impl<'a> BoundedTyvar<'a> { + pub fn new(bound: TypeBound<'a>) -> Self { + BoundedTyvar { + origin: TyvarOrigin::new(), + bound, + } + } + pub fn push_origin(&self, x: Option>) -> Self { + BoundedTyvar { + origin: self.origin.push(x), + ..self.clone() + } + } +} + +/// externdesc_e in the specification +#[derive(Debug, Clone)] +pub enum ExternDesc<'a> { + CoreModule(CoreModule<'a>), + Func(Func<'a>), + /* TODO: First-class values (when the spec gets them) */ + Type(Defined<'a>), + /// This uses an [`Instance`] rather than a [`QualifiedInstance`] + /// because the instance's evars need to be propagated up to the + /// surrounding component/instance (so that e.g. `alias`ing them + /// and using them in another import/export is possible). + Instance(Instance<'a>), + Component(Component<'a>), +} + +/// Merely a convenience for [`Ctx::resolve_alias`] +#[derive(Debug, Clone)] +pub enum CoreOrComponentExternDesc<'a> { + Core(CoreExternDesc), + Component(ExternDesc<'a>), +} + +/// externdecl_e in the specification +#[derive(Debug, Clone)] +pub struct ExternDecl<'a> { + pub kebab_name: &'a str, + pub desc: ExternDesc<'a>, +} + +/// `instancetype_e` in the specification. +/// +/// An "opened" instance, whose existential variables are recorded in +/// some surrounding context. +#[derive(Debug, Clone)] +pub struct Instance<'a> { + pub exports: Vec>, +} + +/// This is an instance together with its existential variables. This +/// concept doesn't exist as a named syntax class in the specification, but +/// is the payload of the instance case of `deftype_e` and the output +/// of the instance declaration inference judgement. +#[derive(Debug, Clone)] +pub struct QualifiedInstance<'a> { + /// Existential variables produced by this instance (which may be + /// referred to by [`exports`](Instance::exports)). These are stored in + /// "outside-in" order that matches how they would be written on + /// paper: de Bruijn index Bound(0) in the imports is the last + /// element in the list, and later elements can depend on earlier + /// ones. + pub evars: Vec>, + pub unqualified: Instance<'a>, +} + +/// componenttype_e in the specification +#[derive(Debug, Clone)] +pub struct Component<'a> { + /// Universal variables over which this component is parameterized + /// (which may be referred to by `imports`). These are stored in + /// "outside-in" order that matches how they would be written on + /// paper: de Bruijn index Bound(0) in the imports is the last + /// element in the list, and later elements can depend on earlier + /// ones. + pub uvars: Vec>, + pub imports: Vec>, + /// Since we already have [`QualifiedInstance`], we use that to + /// keep track of both the evars and the actual instance, unlike + /// in the spec; this is quite natural, since during inference the + /// evars are generated by the exports. However, they conceptually + /// belong here as much as there: instantiating a component should + /// add them to the context as non-imported uvars and produce an + /// [`Instance`], rather than a [`QualifiedInstance`] directly. + pub instance: QualifiedInstance<'a>, +} + +// core:importdecl in the specification is wasmparser::Import + +/// core:importdesc in the specification +#[derive(Debug, Clone)] +pub enum CoreExternDesc { + Func(wasmparser::FuncType), + Table(wasmparser::TableType), + Memory(wasmparser::MemoryType), + Global(wasmparser::GlobalType), +} + +/// core:exportdecl in the specification +#[derive(Debug, Clone)] +pub struct CoreExportDecl<'a> { + pub name: Name<'a>, + pub desc: CoreExternDesc, +} + +// core:functype is wasmparser::FuncType + +/// core:instancetype_e in the specification +#[derive(Debug, Clone)] +pub struct CoreInstance<'a> { + pub exports: Vec>, +} + +/// core:moduletype_e in the specification +#[derive(Debug, Clone)] +pub struct CoreModule<'a> { + pub _imports: Vec>, + pub _exports: Vec>, +} + +/// core:deftype_e in the specification +#[derive(Debug, Clone)] +pub enum CoreDefined<'a> { + Func(wasmparser::FuncType), + Module(CoreModule<'a>), +} + +/// gamma_c in the specification +#[derive(Default, Debug, Clone)] +pub struct CoreCtx<'a> { + pub types: Vec>, + pub funcs: Vec, + pub modules: Vec>, + pub instances: Vec>, + pub tables: Vec, + pub mems: Vec, + pub globals: Vec, +} + +impl<'a> CoreCtx<'a> { + pub fn new() -> Self { + CoreCtx { + types: Vec::new(), + funcs: Vec::new(), + modules: Vec::new(), + instances: Vec::new(), + tables: Vec::new(), + mems: Vec::new(), + globals: Vec::new(), + } + } +} + +/// resourcetype_e in the specification +#[derive(Debug, Clone)] +pub struct Resource { + // One day, there will be a `rep` field here... + pub _dtor: Option, +} + +/// gamma in the specification +#[derive(Debug, Clone)] +pub struct Ctx<'p, 'a> { + pub parent: Option<&'p Ctx<'p, 'a>>, + pub outer_boundary: bool, + pub core: CoreCtx<'a>, + /// Universally-quantified variables, specifying for each the + /// known bound and whether or not it was imported. Uvars can come + /// from imports or component instantiations; only the imported + /// ones can be allowed to escape in the type of a components + /// exports/imports, since only those can be named outside of the + /// component itself. + pub uvars: Vec<(BoundedTyvar<'a>, bool)>, + /// Existentially-quantified variables, specifying for each the + /// known bound and, if it was locally defined, the type which + /// instantiates it. + pub evars: Vec<(BoundedTyvar<'a>, Option>)>, + pub rtypes: Vec, + pub types: Vec>, + pub components: Vec>, + pub instances: Vec>, + pub funcs: Vec>, +} + +impl<'p, 'a> Ctx<'p, 'a> { + pub fn new<'c>(parent: Option<&'p Ctx<'c, 'a>>, outer_boundary: bool) -> Self { + Ctx { + parent, + outer_boundary, + core: CoreCtx::new(), + uvars: Vec::new(), + evars: Vec::new(), + rtypes: Vec::new(), + types: Vec::new(), + components: Vec::new(), + instances: Vec::new(), + funcs: Vec::new(), + } + } +} + +pub struct CtxParentIterator<'i, 'p: 'i, 'a: 'i> { + ctx: Option<&'i Ctx<'p, 'a>>, +} +impl<'i, 'p, 'a> Iterator for CtxParentIterator<'i, 'p, 'a> { + type Item = &'i Ctx<'p, 'a>; + fn next(&mut self) -> Option { + match self.ctx { + Some(ctx) => { + self.ctx = ctx.parent; + Some(ctx) + } + None => None, + } + } +} + +impl<'p, 'a> Ctx<'p, 'a> { + pub fn parents<'i>(&'i self) -> CtxParentIterator<'i, 'p, 'a> { + CtxParentIterator { ctx: Some(self) } + } +} diff --git a/src/hyperlight_component_util/src/guest.rs b/src/hyperlight_component_util/src/guest.rs new file mode 100644 index 000000000..864f9cd0f --- /dev/null +++ b/src/hyperlight_component_util/src/guest.rs @@ -0,0 +1,366 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + */ + +use proc_macro2::TokenStream; +use quote::{format_ident, quote}; + +use crate::emit::{ + FnName, ResourceItemName, State, WitName, kebab_to_exports_name, kebab_to_fn, kebab_to_getter, + kebab_to_imports_name, kebab_to_namespace, kebab_to_type, kebab_to_var, split_wit_name, +}; +use crate::etypes::{Component, Defined, ExternDecl, ExternDesc, Handleable, Instance, Tyvar}; +use crate::hl::{ + emit_fn_hl_name, emit_hl_marshal_param, emit_hl_marshal_result, emit_hl_unmarshal_param, + emit_hl_unmarshal_result, +}; +use crate::{resource, rtypes}; + +/// Emit (mostly via returning) code to be added to an `impl for Host {}` declaration that implements this extern +/// declaration in terms of Hyperlight host calls. +/// +/// For functions associated with a resource, this will instead mutate +/// `s` to directly add them to the resource trait implementation and +/// return an empty token stream. +fn emit_import_extern_decl<'a, 'b, 'c>( + s: &'c mut State<'a, 'b>, + ed: &'c ExternDecl<'b>, +) -> TokenStream { + match &ed.desc { + ExternDesc::CoreModule(_) => panic!("core module (im/ex)ports are not supported"), + ExternDesc::Func(ft) => { + let param_decls = ft + .params + .iter() + .map(|p| rtypes::emit_func_param(s, p)) + .collect::>(); + let result_decl = rtypes::emit_func_result(s, &ft.result); + let hln = emit_fn_hl_name(s, ed.kebab_name); + let ret = format_ident!("ret"); + let marshal = ft + .params + .iter() + .map(|p| { + let me = emit_hl_marshal_param(s, kebab_to_var(p.name.name), &p.ty); + quote! { args.push(::hyperlight_common::flatbuffer_wrappers::function_types::ParameterValue::VecBytes(#me)); } + }) + .collect::>(); + let unmarshal = emit_hl_unmarshal_result(s, ret.clone(), &ft.result); + let fnname = kebab_to_fn(ed.kebab_name); + let n = match &fnname { + FnName::Plain(n) => quote! { #n }, + FnName::Associated(_, m) => match m { + ResourceItemName::Constructor => quote! { new }, + ResourceItemName::Method(mn) => quote! { #mn }, + ResourceItemName::Static(mn) => quote! { #mn }, + }, + }; + let decl = quote! { + fn #n(&mut self, #(#param_decls),*) -> #result_decl { + let mut args = ::alloc::vec::Vec::new(); + #(#marshal)* + let #ret = ::hyperlight_guest_bin::host_comm::call_host_function::<::alloc::vec::Vec>( + #hln, + Some(args), + ::hyperlight_common::flatbuffer_wrappers::function_types::ReturnType::VecBytes, + ); + let ::core::result::Result::Ok(#ret) = #ret else { panic!("bad return from guest {:?}", #ret) }; + #[allow(clippy::unused_unit)] + #unmarshal + } + }; + match fnname { + FnName::Plain(_) => decl, + FnName::Associated(r, _) => { + // if a resource type could depend on another + // tyvar, there might be some complexities + // here, but that is not the case at the + // moment. + let path = s.resource_trait_path(r); + s.root_mod.r#impl(path, format_ident!("Host")).extend(decl); + TokenStream::new() + } + } + } + ExternDesc::Type(t) => match t { + Defined::Handleable(Handleable::Var(Tyvar::Bound(b))) => { + // only resources need something emitted + let (b, None) = s.resolve_tv(*b) else { + return quote! {}; + }; + let rtid = format_ident!("HostResource{}", s.var_offset + b as usize); + let path = s.resource_trait_path(kebab_to_type(ed.kebab_name)); + s.root_mod + .r#impl(path, format_ident!("Host")) + .extend(quote! { + type T = #rtid; + }); + TokenStream::new() + } + _ => quote! {}, + }, + ExternDesc::Instance(it) => { + let wn = split_wit_name(ed.kebab_name); + emit_import_instance(s, wn.clone(), it); + + let getter = kebab_to_getter(wn.name); + let tn = kebab_to_type(wn.name); + quote! { + type #tn = Self; + #[allow(refining_impl_trait)] + fn #getter<'a>(&'a mut self) -> &'a mut Self { + self + } + } + } + ExternDesc::Component(_) => { + panic!("nested components not yet supported in rust bindings"); + } + } +} + +/// Emit (via mutating `s`) an `impl for Host {}` +/// declaration that implements this imported instance in terms of +/// hyperlight host calls +fn emit_import_instance<'a, 'b, 'c>(s: &'c mut State<'a, 'b>, wn: WitName, it: &'c Instance<'b>) { + let mut s = s.with_cursor(wn.namespace_idents()); + s.cur_helper_mod = Some(kebab_to_namespace(wn.name)); + + let imports = it + .exports + .iter() + .map(|ed| emit_import_extern_decl(&mut s, ed)) + .collect::>(); + + let ns = wn.namespace_path(); + let nsi = wn.namespace_idents(); + let trait_name = kebab_to_type(wn.name); + let r#trait = s.r#trait(&nsi, trait_name.clone()); + let tvs = r#trait + .tvs + .iter() + .map(|(_, (tv, _))| tv.unwrap()) + .collect::>(); + let tvs = tvs + .iter() + .map(|tv| rtypes::emit_var_ref(&mut s, &Tyvar::Bound(*tv))) + .collect::>(); + s.root_mod.items.extend(quote! { + impl #ns::#trait_name <#(#tvs),*> for Host { + #(#imports)* + } + }); +} + +/// Emit (via returning) code to register this particular extern +/// definition with Hyperlight as a callable function. +fn emit_export_extern_decl<'a, 'b, 'c>( + s: &'c mut State<'a, 'b>, + path: Vec, + ed: &'c ExternDecl<'b>, +) -> TokenStream { + match &ed.desc { + ExternDesc::CoreModule(_) => panic!("core module (im/ex)ports are not supported"), + ExternDesc::Func(ft) => { + let fname = emit_fn_hl_name(s, ed.kebab_name); + let n = match kebab_to_fn(ed.kebab_name) { + FnName::Plain(n) => n, + FnName::Associated(_, _) => { + panic!("resources exported from wasm not yet supported") + } + }; + let pts = ft.params.iter().map(|_| quote! { ::hyperlight_common::flatbuffer_wrappers::function_types::ParameterType::VecBytes }).collect::>(); + let (pds, pus) = ft.params.iter().enumerate() + .map(|(i, p)| { + let id = kebab_to_var(p.name.name); + let pd = quote! { let ::hyperlight_common::flatbuffer_wrappers::function_types::ParameterValue::VecBytes(#id) = &fc.parameters.as_ref().unwrap()[#i] else { panic!("invariant violation: host passed non-VecBytes core hyperlight argument"); }; }; + let pu = emit_hl_unmarshal_param(s, id, &p.ty); + (pd, pu) + }) + .unzip::<_, _, Vec<_>, Vec<_>>(); + let get_instance = path + .iter() + .map(|export| { + let n = kebab_to_getter(split_wit_name(export).name); + // TODO: Check that name resolution here works + // properly with nested instances (not yet supported + // in WIT, so we need to use a raw component type to + // check) + quote! { + let mut state = state.#n(); + let state = ::core::borrow::BorrowMut::borrow_mut(&mut state); + } + }) + .collect::>(); + let ret = format_ident!("ret"); + let marshal_result = emit_hl_marshal_result(s, ret.clone(), &ft.result); + let trait_path = s.cur_trait_path(); + quote! { + fn #n(fc: &::hyperlight_common::flatbuffer_wrappers::function_call::FunctionCall) -> ::hyperlight_guest::error::Result<::alloc::vec::Vec> { + ::with_guest_state(|state| { + #(#pds)* + #(#get_instance)* + let #ret = #trait_path::#n(state, #(#pus,)*); + ::core::result::Result::Ok(::hyperlight_common::flatbuffer_wrappers::util::get_flatbuffer_result::<&[u8]>(&#marshal_result)) + }) + } + ::hyperlight_guest_bin::guest_function::register::register_function( + ::hyperlight_guest_bin::guest_function::definition::GuestFunctionDefinition::new( + ::alloc::string::ToString::to_string(#fname), + ::alloc::vec![#(#pts),*], + ::hyperlight_common::flatbuffer_wrappers::function_types::ReturnType::VecBytes, + #n:: as usize + ) + ); + } + } + ExternDesc::Type(_) => { + // no runtime representation is needed for types + quote! {} + } + ExternDesc::Instance(it) => { + let wn = split_wit_name(ed.kebab_name); + let mut path = path.clone(); + path.push(ed.kebab_name.to_string()); + emit_export_instance(s, wn.clone(), path, it) + } + ExternDesc::Component(_) => { + panic!("nested components not yet supported in rust bindings"); + } + } +} + +/// Emit (via returning) code to register each export of the given +/// instance with Hyperlight as a callable function. +/// +/// - `path`: the instance path (from the root component) where this +/// definition may be found, used to locate the correct component of +/// the guest state. This should already have been updated for this +/// instance by the caller! +fn emit_export_instance<'a, 'b, 'c>( + s: &'c mut State<'a, 'b>, + wn: WitName, + path: Vec, + it: &'c Instance<'b>, +) -> TokenStream { + let mut s = s.with_cursor(wn.namespace_idents()); + s.cur_helper_mod = Some(kebab_to_namespace(wn.name)); + s.cur_trait = Some(kebab_to_type(wn.name)); + let exports = it + .exports + .iter() + .map(|ed| emit_export_extern_decl(&mut s, path.clone(), ed)) + .collect::>(); + quote! { #(#exports)* } +} + +/// Emit (via mutating `s`): +/// - a resource table for each resource exported by this component +/// - impl T for Host for each relevant trait T +/// +/// Emit (via returning): +/// - Hyperlight guest function ABI wrapper for each guest function +/// - Hyperlight guest function register calls for each guest function +fn emit_component<'a, 'b, 'c>( + s: &'c mut State<'a, 'b>, + wn: WitName, + ct: &'c Component<'b>, +) -> TokenStream { + let mut s = s.with_cursor(wn.namespace_idents()); + let ns = wn.namespace_path(); + let r#trait = kebab_to_type(wn.name); + let import_trait = kebab_to_imports_name(wn.name); + let export_trait = kebab_to_exports_name(wn.name); + s.import_param_var = Some(format_ident!("I")); + s.self_param_var = Some(format_ident!("S")); + + let rtsid = format_ident!("{}Resources", r#trait); + resource::emit_tables( + &mut s, + rtsid.clone(), + quote! { #ns::#import_trait + ::core::marker::Send + 'static }, + Some(quote! { #ns::#export_trait }), + true, + ); + s.root_mod + .items + .extend(s.bound_vars.iter().enumerate().map(|(i, _)| { + let id = format_ident!("HostResource{}", i); + quote! { + pub struct #id { rep: u32 } + } + })); + + s.var_offset = ct.instance.evars.len(); + s.cur_trait = Some(import_trait.clone()); + let imports = ct + .imports + .iter() + .map(|ed| emit_import_extern_decl(&mut s, ed)) + .collect::>(); + + s.var_offset = 0; + + let exports = ct + .instance + .unqualified + .exports + .iter() + .map(|ed| emit_export_extern_decl(&mut s, Vec::new(), ed)) + .collect::>(); + + s.root_mod.items.extend(quote! { + impl #ns::#import_trait for Host { + #(#imports)* + } + }); + quote! { + #(#exports)* + } +} + +/// In addition to the items emitted by [`emit_component`], mutate `s` +/// to emit: +/// - a dummy `Host` type to reflect host functions +/// - a toplevel `Guest` trait that can be implemented to provide access to +/// any guest state +/// - a `hyperlight_guest_init` function that registers all guest +/// - functions when given a type that implements the `Guest` trait +pub fn emit_toplevel<'a, 'b, 'c>(s: &'c mut State<'a, 'b>, n: &str, ct: &'c Component<'b>) { + s.is_impl = true; + log::debug!("\n\n=== starting guest emit ===\n"); + let wn = split_wit_name(n); + + let ns = wn.namespace_path(); + let export_trait = kebab_to_exports_name(wn.name); + + let tokens = emit_component(s, wn, ct); + + s.root_mod.items.extend(quote! { + pub struct Host {} + + /// Because Hyperlight guest functions can't close over any + /// state, this function is used on each guest call to acquire + /// any state that the guest functions might need. + pub trait Guest: #ns::#export_trait { + fn with_guest_state R>(f: F) -> R; + } + /// Register all guest functions. + pub fn hyperlight_guest_init() { + #tokens + } + }); +} diff --git a/src/hyperlight_component_util/src/hl.rs b/src/hyperlight_component_util/src/hl.rs new file mode 100644 index 000000000..5a8e984f3 --- /dev/null +++ b/src/hyperlight_component_util/src/hl.rs @@ -0,0 +1,690 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + */ + +use itertools::Itertools; +use proc_macro2::{Ident, TokenStream}; +use quote::{format_ident, quote}; + +use crate::emit::{State, kebab_to_cons, kebab_to_var}; +use crate::etypes::{self, Defined, Handleable, TypeBound, Tyvar, Value}; +use crate::rtypes; + +/// Construct a string that can be used "on the wire" to identify a +/// given function between the guest/host. This should be replaced +/// with an integer index so that we can dispatch less dynamically in +/// the future. +pub fn emit_fn_hl_name(s: &State, kebab: &str) -> String { + s.mod_cursor + .iter() + .map(|x| x.to_string()) + .chain(std::iter::once(kebab.to_string())) + .join("::") +} + +/// Emit code to unmarshal a value into a toplevel type (i.e. types +/// that cannot be represented inline in a valtype). +/// - `id`: an ident of a slice that the code will unmarshal from; also +/// used as the beginning of any other identifiers that this code +/// declares (if only we had hygiene in stable rust...) +/// - `tv`: the tyvar that we followed to get to this type +/// - `vt`: the value type that we are unmarshalling +/// +/// The token stream produced will be an expression which typechecks +/// as a tuple whose first component is the Rust type (as defined by +/// the [`crate::rtypes`] module) of the given value type and whose +/// second component is an integer. The second component represents +/// the number of bytes consumed from the `id` slice while +/// unmarshalling. +pub fn emit_hl_unmarshal_toplevel_value( + s: &mut State, + id: Ident, + tv: Tyvar, + vt: &Value, +) -> TokenStream { + let tname = rtypes::emit_var_ref_value(s, &tv); + let mut s = s.clone(); + let Tyvar::Bound(n) = tv else { + panic!("impossible tyvar") + }; + s.var_offset += n as usize + 1; + let s = &mut s; + match vt { + Value::Record(rfs) => { + let cursor = format_ident!("{}_cursor", id); + let inid = format_ident!("{}_field", id); + let (decls, uses) = rfs + .iter() + .map(|rf| { + let field_name = kebab_to_var(rf.name.name); + let field_name_var = format_ident!("{}_field_{}", id, field_name); + let vtun = emit_hl_unmarshal_value(s, inid.clone(), &rf.ty); + ( + quote! { + let #inid = &#id[#cursor..]; + let (#field_name_var, b) = { #vtun }; + #cursor += b; + }, + quote! { + #field_name: #field_name_var, + }, + ) + }) + .unzip::<_, _, Vec<_>, Vec<_>>(); + quote! { + let mut #cursor = 0; + #(#decls)* + (#tname { #(#uses)* }, #cursor) + } + } + Value::Flags(ns) => { + let bytes = usize::div_ceil(ns.len(), 8); + let fields = ns.iter().enumerate().map(|(i, n)| { + let byte_offset = i / 8; + let bit_offset = i % 8; + let fieldid = kebab_to_var(n.name); + quote! { + #fieldid: (#id[#byte_offset] >> #bit_offset) & 0x1 == 1, + } + }); + quote! { + (#tname { #(#fields)* }, #bytes) + } + } + Value::Variant(vcs) => { + let inid = format_ident!("{}_body", id); + let vcs = vcs.iter().enumerate().map(|(i, vc)| { + let case_name = kebab_to_cons(vc.name.name); + let i = i as u32; + let case_name_var = format_ident!("{}_case_{}", id, case_name); + match &vc.ty { + Some(ty) => { + let vtun = emit_hl_unmarshal_value(s, inid.clone(), ty); + quote! { + #i => { + let (#case_name_var, b) = { #vtun }; + (#tname::#case_name(#case_name_var), b + 4) + } + } + } + None => quote! { + #i => (#tname::#case_name, 4) + }, + } + }); + quote! { + let n = u32::from_ne_bytes(#id[0..4].try_into().unwrap()); + let #inid = &#id[4..]; + match n { + #(#vcs,)* + _ => panic!("invalid value for variant"), + } + } + } + Value::Enum(ns) => { + let vcs = ns.iter().enumerate().map(|(i, n)| { + let case_name = kebab_to_cons(n.name); + let i = i as u32; + quote! { #i => ( #tname::#case_name, 4) } + }); + quote! { + let n = u32::from_ne_bytes(#id[0..4].try_into().unwrap()); + match n { + #(#vcs,)* + _ => panic!("invalid value for enum"), + } + } + } + _ => emit_hl_unmarshal_value(s, id, vt), + } +} + +/// Find the resource index that the given type variable refers to. +/// +/// Precondition: this type variable does refer to a resource type +fn resolve_tyvar_to_resource(s: &mut State, v: u32) -> u32 { + match s.bound_vars[v as usize].bound { + TypeBound::SubResource => v, + TypeBound::Eq(Defined::Handleable(Handleable::Var(Tyvar::Bound(vv)))) => { + resolve_tyvar_to_resource(s, v + vv + 1) + } + _ => panic!("impossible: resource var is not resource"), + } +} +/// Find the resource index that the given Handleable refers to. +/// +/// Precondition: this type variable does refer to a resource type +pub fn resolve_handleable_to_resource(s: &mut State, ht: &Handleable) -> u32 { + match ht { + Handleable::Var(Tyvar::Bound(vi)) => { + resolve_tyvar_to_resource(s, s.var_offset as u32 + *vi) + } + _ => panic!("impossible handleable in type"), + } +} + +/// Emit code to unmarshal a value into an inline-able value type +/// - `id`: an ident of a slice that the code will unmarshal from; also +/// used as the beginning of any other identifiers that this code +/// declares (if only we had hygiene in stable rust...) +/// - `vt`: the value type that we are unmarshalling +/// +/// The token stream produced will be an expression which typechecks +/// as a tuple whose first component is the Rust type (as defined by +/// the [`crate::rtypes`] module) of the given value type and whose +/// second component is an integer. The second component represents +/// the number of bytes consumed from the `id` slice while +/// unmarshalling. +pub fn emit_hl_unmarshal_value(s: &mut State, id: Ident, vt: &Value) -> TokenStream { + match vt { + Value::Bool => quote! { (#id[0] != 0, 1) }, + Value::S(_) | Value::U(_) | Value::F(_) => { + let (tid, width) = rtypes::numeric_rtype(vt); + let blen = width as usize / 8; + quote! { + (#tid::from_ne_bytes(#id[0..#blen].try_into().unwrap()), #blen) + } + } + Value::Char => quote! { + (unsafe { char::from_u32_unchecked(u32::from_ne_bytes( + #id[0..4].try_into().unwrap())) }, 4) + }, + Value::String => quote! { + let n = u32::from_ne_bytes(#id[0..4].try_into().unwrap()) as usize; + let s = ::alloc::string::ToString::to_string(::core::str::from_utf8(&#id[4..4 + n]).unwrap()); // todo: better error handling + (s, n + 4) + }, + Value::List(vt) => { + let retid = format_ident!("{}_list", id); + let inid = format_ident!("{}_elem", id); + let vtun = emit_hl_unmarshal_value(s, inid.clone(), vt); + quote! { + let n = u32::from_ne_bytes(#id[0..4].try_into().unwrap()) as usize; + let mut #retid = alloc::vec::Vec::new(); + let mut cursor = 4; + for i in 0..n { + let #inid = &#id[cursor..]; + let (x, b) = { #vtun }; + cursor += b; + #retid.push(x); + } + (#retid, cursor) + } + } + Value::Record(_) => panic!("record not at top level of valtype"), + Value::Tuple(vts) => { + let inid = format_ident!("{}_elem", id); + let len = format_ident!("{}_len", id); + let (ns, vtuns) = vts + .iter() + .enumerate() + .map(|(i, vt)| { + let vtun = emit_hl_unmarshal_value(s, inid.clone(), vt); + let retid = format_ident!("{}_elem{}", id, i); + ( + retid.clone(), + quote! { + let (#retid, b) = { #vtun }; + #len += b; + let #inid = &#inid[b..]; + }, + ) + }) + .unzip::<_, _, Vec<_>, Vec<_>>(); + quote! { + let #inid = &#id[0..]; + let mut #len = 0; + #(#vtuns)* + ((#(#ns),*), #len) + } + } + Value::Flags(_) => panic!("flags not at top level of valtype"), + Value::Variant(_) => panic!("variant not at top level of valtype"), + Value::Enum(_) => panic!("enum not at top level of valtype"), + Value::Option(vt) => { + let inid = format_ident!("{}_body", id); + let vtun = emit_hl_unmarshal_value(s, inid.clone(), vt); + quote! { + let n = u8::from_ne_bytes(#id[0..1].try_into().unwrap()); + if n != 0 { + let #inid = &#id[1..]; + let (x, b) = { #vtun }; + (::core::option::Option::Some(x), b + 1) + } else { + (::core::option::Option::None, 1) + } + } + } + Value::Result(vt1, vt2) => { + let inid = format_ident!("{}_body", id); + let vtun1 = if let Some(ref vt1) = **vt1 { + emit_hl_unmarshal_value(s, inid.clone(), vt1) + } else { + quote! { ((), 0) } + }; + let vtun2 = if let Some(ref vt2) = **vt2 { + emit_hl_unmarshal_value(s, inid.clone(), vt2) + } else { + quote! { ((), 0) } + }; + quote! { + let i = u8::from_ne_bytes(#id[0..1].try_into().unwrap()); + let #inid = &#id[1..]; + if i == 0 { + let (x, b) = { #vtun1 }; + (::core::result::Result::Ok(x), b + 1) + } else { + let (x, b)= { #vtun2 }; + (::core::result::Result::Err(x), b +1) + } + } + } + Value::Own(ht) => { + let vi = resolve_handleable_to_resource(s, ht); + log::debug!("resolved ht to r (1) {:?} {:?}", ht, vi); + if s.is_guest { + let rid = format_ident!("HostResource{}", vi); + if s.is_wasmtime_guest { + quote! { + let i = u32::from_ne_bytes(#id[0..4].try_into().unwrap()); + (::wasmtime::component::Resource::<#rid>::new_own(i), 4) + } + } else { + quote! { + let i = u32::from_ne_bytes(#id[0..4].try_into().unwrap()); + (#rid { rep: i }, 4) + } + } + } else { + let rid = format_ident!("resource{}", vi); + quote! { + let i = u32::from_ne_bytes(#id[0..4].try_into().unwrap()); + let Some(v) = rts.#rid[i as usize].take() else { + // todo: better error handling + panic!(""); + }; + (v, 4) + } + } + } + Value::Borrow(ht) => { + let vi = resolve_handleable_to_resource(s, ht); + log::debug!("resolved ht to r (2) {:?} {:?}", ht, vi); + if s.is_guest { + let rid = format_ident!("HostResource{}", vi); + quote! { + let i = u32::from_ne_bytes(#id[0..4].try_into().unwrap()); + (::wasmtime::component::Resource::<#rid>::new_borrow(i), 4) + } + } else { + let rid = format_ident!("resource{}", vi); + quote! { + let i = u32::from_ne_bytes(#id[0..4].try_into().unwrap()); + let Some(v) = rts.#rid[i as usize].borrow() else { + // todo: better error handling + panic!(""); + }; + (v, 4) + } + } + } + Value::Var(tv, _) => { + let Some(Tyvar::Bound(n)) = tv else { + panic!("impossible tyvar") + }; + let (n, Some(Defined::Value(vt))) = s.resolve_tv(*n) else { + panic!("unresolvable tyvar (2)"); + }; + let vt = vt.clone(); + emit_hl_unmarshal_toplevel_value(s, id, Tyvar::Bound(n), &vt) + } + } +} + +/// Emit code to marshal a value from a toplevel type (i.e. types that +/// cannot be represented inline in a valtype). +/// - `id`: an ident of a Rust value of the Rust type (as defined by +/// the [`crate::rtypes`] module) of the given value type that is +/// being marshaled from +/// - `tv`: the tyvar that we followed to get to this type +/// - `vt`: the value type that we are marshaling +/// +/// The token stream produced will be an expression which typechecks +/// as `Vec`. +pub fn emit_hl_marshal_toplevel_value( + s: &mut State, + id: Ident, + tv: Tyvar, + vt: &Value, +) -> TokenStream { + let tname = rtypes::emit_var_ref_value(s, &tv); + let mut s = s.clone(); + let Tyvar::Bound(n) = tv else { + panic!("impossible tyvar") + }; + s.var_offset += n as usize + 1; + let s = &mut s; + match vt { + Value::Record(rfs) => { + let retid = format_ident!("{}_record", id); + let fields = rfs + .iter() + .map(|rf| { + let field_name = kebab_to_var(rf.name.name); + let fieldid = format_ident!("{}_field_{}", id, field_name); + let vtun = emit_hl_marshal_value(s, fieldid.clone(), &rf.ty); + quote! { + let #fieldid = #id.#field_name; + #retid.extend({ #vtun }); + } + }) + .collect::>(); + quote! { + let mut #retid = alloc::vec::Vec::new(); + #(#fields)* + #retid + } + } + Value::Flags(ns) => { + let bytes = usize::div_ceil(ns.len(), 8); + let fields = ns + .iter() + .enumerate() + .map(|(i, n)| { + let byte_offset = i / 8; + let bit_offset = i % 8; + let fieldid = kebab_to_var(n.name); + quote! { + bytes[#byte_offset] |= (if #id.#fieldid { 1 } else { 0 }) << #bit_offset; + } + }) + .collect::>(); + quote! { + let mut bytes = [0; #bytes]; + #(#fields)* + alloc::vec::Vec::from(bytes) + } + } + Value::Variant(vcs) => { + let retid = format_ident!("{}_ret", id); + let bodyid = format_ident!("{}_body", id); + let vcs = vcs + .iter() + .enumerate() + .map(|(i, vc)| { + let i = i as u32; + let case_name = kebab_to_cons(vc.name.name); + match &vc.ty { + Some(ty) => { + let vtun = emit_hl_marshal_value(s, bodyid.clone(), ty); + quote! { + #tname::#case_name(#bodyid) => { + #retid.extend(u32::to_ne_bytes(#i)); + #retid.extend({ #vtun }) + } + } + } + None => { + quote! { + #tname::#case_name => { + #retid.extend(u32::to_ne_bytes(#i)); + } + } + } + } + }) + .collect::>(); + quote! { + let mut #retid = alloc::vec::Vec::new(); + match #id { + #(#vcs)* + } + #retid + } + } + Value::Enum(ns) => { + let vcs = ns.iter().enumerate().map(|(i, n)| { + let case_name = kebab_to_cons(n.name); + let i = i as u32; + quote! { #tname::#case_name => #i } + }); + quote! { + alloc::vec::Vec::from(u32::to_ne_bytes(match #id { + #(#vcs,)* + })) + } + } + _ => emit_hl_marshal_value(s, id, vt), + } +} + +/// Emit code to marshal a value from an inline-able value type +/// - `id`: an ident of a Rust value of the Rust type (as defined by +/// the [`crate::rtypes`] module) of the given value type that is +/// being marshaled from +/// - `vt`: the value type that we are marshaling +/// +/// The token stream produced will be an expression which typechecks +/// as `Vec`. +pub fn emit_hl_marshal_value(s: &mut State, id: Ident, vt: &Value) -> TokenStream { + match vt { + Value::Bool => quote! { + alloc::vec![if #id { 1u8 } else { 0u8 }] + }, + Value::S(_) | Value::U(_) | Value::F(_) => { + let (tid, _) = rtypes::numeric_rtype(vt); + quote! { alloc::vec::Vec::from(#tid::to_ne_bytes(#id)) } + } + Value::Char => quote! { + alloc::vec::Vec::from((#id as u32).to_ne_bytes()) + }, + Value::String => { + let retid = format_ident!("{}_string", id); + let bytesid = format_ident!("{}_bytes", id); + quote! { + let mut #retid = alloc::vec::Vec::new(); + let #bytesid = #id.into_bytes(); + #retid.extend(alloc::vec::Vec::from(u32::to_ne_bytes(#bytesid.len() as u32))); + #retid.extend(#bytesid); + #retid + } + } + Value::List(vt) => { + let retid = format_ident!("{}_list", id); + let inid = format_ident!("{}_elem", id); + let vtun = emit_hl_marshal_value(s, inid.clone(), vt); + quote! { + let mut #retid = alloc::vec::Vec::new(); + let n = #id.len(); + #retid.extend(alloc::vec::Vec::from(u32::to_ne_bytes(n as u32))); + for #inid in #id { + #retid.extend({ #vtun }) + } + #retid + } + } + Value::Record(_) => panic!("record not at top level of valtype"), + Value::Tuple(vts) => { + let retid = format_ident!("{}_tuple", id); + let inid = format_ident!("{}_elem", id); + let vtuns = vts.iter().enumerate().map(|(i, vt)| { + let i = syn::Index::from(i); + let vtun = emit_hl_marshal_value(s, inid.clone(), vt); + quote! { + let #inid = #id.#i; + #retid.extend({ #vtun }); + } + }); + quote! { + let mut #retid = alloc::vec::Vec::new(); + #(#vtuns)* + #retid + } + } + Value::Flags(_) => panic!("flags not at top level of valtype"), + Value::Variant(_) => panic!("flags not at top level of valtype"), + Value::Enum(_) => panic!("flags not at top level of valtype"), + Value::Option(vt) => { + let bodyid = format_ident!("{}_body", id); + let retid = format_ident!("{}_ret", id); + let vtun = emit_hl_marshal_value(s, bodyid.clone(), vt); + quote! { + match #id { + ::core::option::Option::Some(#bodyid) => { + let mut #retid = alloc::vec::Vec::from(u8::to_ne_bytes(1)); + #retid.extend({ #vtun }); + #retid + }, + ::core::option::Option::None => alloc::vec::Vec::from(u8::to_ne_bytes(0)) + } + } + } + Value::Result(vt1, vt2) => { + let bodyid = format_ident!("{}_body", id); + let retid = format_ident!("{}_ret", id); + let vtun1 = if let Some(ref vt1) = **vt1 { + let vtun = emit_hl_marshal_value(s, bodyid.clone(), vt1); + quote! { #retid.extend({ #vtun }); } + } else { + quote! {} + }; + let vtun2 = if let Some(ref vt2) = **vt2 { + let vtun = emit_hl_marshal_value(s, bodyid.clone(), vt2); + quote! { #retid.extend({ #vtun }); } + } else { + quote! {} + }; + quote! { + match #id { + ::core::result::Result::Ok(#bodyid) => { + let mut #retid = alloc::vec::Vec::from(u8::to_ne_bytes(0)); + #vtun1 + #retid + }, + ::core::result::Result::Err(#bodyid) => { + let mut #retid = alloc::vec::Vec::from(u8::to_ne_bytes(1)); + #vtun2 + #retid + }, + } + } + } + Value::Own(ht) => { + let vi = resolve_handleable_to_resource(s, ht); + log::debug!("resolved ht to r (3) {:?} {:?}", ht, vi); + if s.is_guest { + let call = if s.is_wasmtime_guest { + quote! { () } + } else { + quote! {} + }; + quote! { + alloc::vec::Vec::from(u32::to_ne_bytes(#id.rep #call)) + } + } else { + let rid = format_ident!("resource{}", vi); + quote! { + let i = rts.#rid.len(); + rts.#rid.push_back(::hyperlight_common::resource::ResourceEntry::give(#id)); + alloc::vec::Vec::from(u32::to_ne_bytes(i as u32)) + } + } + } + Value::Borrow(ht) => { + let vi = resolve_handleable_to_resource(s, ht); + log::debug!("resolved ht to r (6) {:?} {:?}", ht, vi); + if s.is_guest { + let call = if s.is_wasmtime_guest { + quote! { () } + } else { + quote! {} + }; + quote! { + alloc::vec::Vec::from(u32::to_ne_bytes(#id.rep #call)) + } + } else { + let rid = format_ident!("resource{}", vi); + quote! { + let i = rts.#rid.len(); + rts.#rid.push_back(::hyperlight_common::resource::ResourceEntry::lend(#id)); + alloc::vec::Vec::from(u32::to_ne_bytes(i as u32)) + } + } + } + Value::Var(tv, _) => { + let Some(Tyvar::Bound(n)) = tv else { + panic!("impossible tyvar") + }; + let (n, Some(Defined::Value(vt))) = s.resolve_tv(*n) else { + panic!("unresolvable tyvar (2)"); + }; + let vt = vt.clone(); + emit_hl_marshal_toplevel_value(s, id, Tyvar::Bound(n), &vt) + } + } +} + +/// Emit code to unmarshal a parameter with value type `pt` from a +/// slice named by `id`. The resultant token stream will be an +/// expression which typechecks at the Rust type (as defined by the +/// [`crate::rtypes`] module) of the given value type. +pub fn emit_hl_unmarshal_param(s: &mut State, id: Ident, pt: &Value) -> TokenStream { + let toks = emit_hl_unmarshal_value(s, id, pt); + quote! { { #toks }.0 } +} + +/// Emit code to unmarshal the result of a function with result type +/// `rt` from a slice named by `id`. The resultant token stream +/// will be an expression which typechecks at the Rust type (as +/// defined by the [`crate::rtypes`] module) of the unnamed type of +/// the result, or unit if named results are used. +/// +/// Precondition: the result type must only be a named result if there +/// are no names in it (i.e. a unit type) +pub fn emit_hl_unmarshal_result(s: &mut State, id: Ident, rt: &etypes::Result) -> TokenStream { + match rt { + etypes::Result::Named(rs) if rs.is_empty() => quote! { () }, + etypes::Result::Unnamed(vt) => { + let toks = emit_hl_unmarshal_value(s, id, vt); + quote! { { #toks }.0 } + } + _ => panic!("named results not supported"), + } +} + +/// Emit code to marshal a parameter with value type `pt` from a +/// Rust value named by `id`. The resultant token stream will be an +/// expression which typechecks as `Vec`. +pub fn emit_hl_marshal_param(s: &mut State, id: Ident, pt: &Value) -> TokenStream { + let toks = emit_hl_marshal_value(s, id, pt); + quote! { { #toks } } +} + +/// Emit code to marshal the result of a function with result type +/// `rt` from a Rust value named by `id`. The resultant token stream +/// will be an expression that which typechecks as `Vec`. +/// +/// Precondition: the result type must only be a named result if there +/// are no names in it (a unit type) +pub fn emit_hl_marshal_result(s: &mut State, id: Ident, rt: &etypes::Result) -> TokenStream { + match rt { + etypes::Result::Named(rs) if rs.is_empty() => quote! { ::alloc::vec::Vec::new() }, + etypes::Result::Unnamed(vt) => { + let toks = emit_hl_marshal_value(s, id, vt); + quote! { { #toks } } + } + _ => panic!("named results not supported"), + } +} diff --git a/src/hyperlight_component_util/src/host.rs b/src/hyperlight_component_util/src/host.rs new file mode 100644 index 000000000..0fba3ff33 --- /dev/null +++ b/src/hyperlight_component_util/src/host.rs @@ -0,0 +1,380 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + */ + +use proc_macro2::{Ident, TokenStream}; +use quote::{format_ident, quote}; + +use crate::emit::{ + FnName, ResourceItemName, State, WitName, kebab_to_exports_name, kebab_to_fn, kebab_to_getter, + kebab_to_imports_name, kebab_to_namespace, kebab_to_type, kebab_to_var, split_wit_name, +}; +use crate::etypes::{Component, ExternDecl, ExternDesc, Instance, Tyvar}; +use crate::hl::{ + emit_fn_hl_name, emit_hl_marshal_param, emit_hl_marshal_result, emit_hl_unmarshal_param, + emit_hl_unmarshal_result, +}; +use crate::{resource, rtypes}; + +/// Emit (via returning) code to be added to an `impl +/// for Guest {}` declaration that implements this extern declaration +/// in terms of Hyperlight guest calls +fn emit_export_extern_decl<'a, 'b, 'c>( + s: &'c mut State<'a, 'b>, + ed: &'c ExternDecl<'b>, +) -> TokenStream { + match &ed.desc { + ExternDesc::CoreModule(_) => panic!("core module (im/ex)ports are not supported"), + ExternDesc::Func(ft) => { + match kebab_to_fn(ed.kebab_name) { + FnName::Plain(n) => { + let param_decls = ft + .params + .iter() + .map(|p| rtypes::emit_func_param(s, p)) + .collect::>(); + let result_decl = rtypes::emit_func_result(s, &ft.result); + let hln = emit_fn_hl_name(s, ed.kebab_name); + let ret = format_ident!("ret"); + let marshal = ft + .params + .iter() + .map(|p| emit_hl_marshal_param(s, kebab_to_var(p.name.name), &p.ty)) + .collect::>(); + let unmarshal = emit_hl_unmarshal_result(s, ret.clone(), &ft.result); + quote! { + fn #n(&mut self, #(#param_decls),*) -> #result_decl { + let #ret = ::hyperlight_host::sandbox::Callable::call::<::std::vec::Vec::>(&mut self.sb, + #hln, + (#(#marshal,)*) + ); + let ::std::result::Result::Ok(#ret) = #ret else { panic!("bad return from guest {:?}", #ret) }; + #unmarshal + } + } + } + FnName::Associated(_, _) => + // this can be fixed when the guest wasm and + // general macros are split + { + panic!("guest resources are not currently supported") + } + } + } + ExternDesc::Type(_) => { + // no runtime representation is needed for types + quote! {} + } + ExternDesc::Instance(it) => { + let wn = split_wit_name(ed.kebab_name); + emit_export_instance(s, wn.clone(), it); + + let getter = kebab_to_getter(wn.name); + let tn = kebab_to_type(wn.name); + quote! { + type #tn = Self; + #[allow(refining_impl_trait)] + fn #getter<'a>(&'a mut self) -> &'a mut Self { + self + } + } + } + ExternDesc::Component(_) => { + panic!("nested components not yet supported in rust bindings"); + } + } +} + +/// Emit (via mutating `s`) an `impl for Host {}` +/// declaration that implements this exported instance in terms of +/// hyperlight guest calls +fn emit_export_instance<'a, 'b, 'c>(s: &'c mut State<'a, 'b>, wn: WitName, it: &'c Instance<'b>) { + let mut s = s.with_cursor(wn.namespace_idents()); + s.cur_helper_mod = Some(kebab_to_namespace(wn.name)); + + let exports = it + .exports + .iter() + .map(|ed| emit_export_extern_decl(&mut s, ed)) + .collect::>(); + + let ns = wn.namespace_path(); + let nsi = wn.namespace_idents(); + let trait_name = kebab_to_type(wn.name); + let r#trait = s.r#trait(&nsi, trait_name.clone()); + let tvs = r#trait + .tvs + .iter() + .map(|(_, (tv, _))| tv.unwrap()) + .collect::>(); + let tvs = tvs + .iter() + .map(|tv| rtypes::emit_var_ref(&mut s, &Tyvar::Bound(*tv))) + .collect::>(); + let (root_ns, root_base_name) = s.root_component_name.unwrap(); + let wrapper_name = kebab_to_wrapper_name(root_base_name); + let imports_name = kebab_to_imports_name(root_base_name); + s.root_mod.items.extend(quote! { + impl #ns::#trait_name <#(#tvs),*> for #wrapper_name { + #(#exports)* + } + }); +} + +/// Keep track of how to get the portion of the state that corresponds +/// to the instance that we are presently emitting +#[derive(Clone)] +struct SelfInfo { + orig_id: Ident, + type_id: Vec, + outer_id: Ident, + inner_preamble: TokenStream, + inner_id: Ident, +} +impl SelfInfo { + fn new(orig_id: Ident) -> Self { + let outer_id = format_ident!("captured_{}", orig_id); + let inner_id = format_ident!("slf"); + SelfInfo { + orig_id, + type_id: vec![format_ident!("I")], + inner_preamble: quote! { + let mut #inner_id = #outer_id.lock().unwrap(); + let mut #inner_id = ::std::ops::DerefMut::deref_mut(&mut #inner_id); + }, + outer_id, + inner_id, + } + } + /// Adjust a [`SelfInfo`] to get the portion of the state for the + /// current instance via calling the given getter + fn with_getter(&self, tp: TokenStream, type_name: Ident, getter: Ident) -> Self { + let mut toks = self.inner_preamble.clone(); + let id = self.inner_id.clone(); + let mut type_id = self.type_id.clone(); + toks.extend(quote! { + let mut #id = #tp::#getter(::std::borrow::BorrowMut::<#(#type_id)::*>::borrow_mut(&mut #id)); + }); + type_id.push(type_name); + SelfInfo { + orig_id: self.orig_id.clone(), + type_id, + outer_id: self.outer_id.clone(), + inner_preamble: toks, + inner_id: id, + } + } +} + +/// Emit (via returning) code to register this particular extern definition with +/// Hyperlight as a host function +/// +/// - `get_self`: a [`SelfInfo`] that details how to get from the root +/// component implementation's state to the state for the +/// implementation of this instance. +fn emit_import_extern_decl<'a, 'b, 'c>( + s: &'c mut State<'a, 'b>, + get_self: SelfInfo, + ed: &'c ExternDecl<'b>, +) -> TokenStream { + match &ed.desc { + ExternDesc::CoreModule(_) => panic!("core module (im/ex)ports are not supported"), + ExternDesc::Func(ft) => { + let hln = emit_fn_hl_name(s, ed.kebab_name); + log::debug!("providing host function {}", hln); + let (pds, pus) = ft + .params + .iter() + .map(|p| { + let id = kebab_to_var(p.name.name); + ( + quote! { #id: ::std::vec::Vec }, + emit_hl_unmarshal_param(s, id, &p.ty), + ) + }) + .unzip::<_, _, Vec<_>, Vec<_>>(); + let tp = s.cur_trait_path(); + let callname = match kebab_to_fn(ed.kebab_name) { + FnName::Plain(n) => quote! { #tp::#n }, + FnName::Associated(r, m) => { + let hp = s.helper_path(); + match m { + ResourceItemName::Constructor => quote! { #hp #r::new }, + ResourceItemName::Method(mn) => quote! { #hp #r::#mn }, + ResourceItemName::Static(mn) => quote! { #hp #r::#mn }, + } + } + }; + let SelfInfo { + orig_id, + type_id, + outer_id, + inner_preamble, + inner_id, + } = get_self; + let ret = format_ident!("ret"); + let marshal_result = emit_hl_marshal_result(s, ret.clone(), &ft.result); + quote! { + let #outer_id = #orig_id.clone(); + let captured_rts = rts.clone(); + sb.register_host_function(#hln, move |#(#pds),*| { + let mut rts = captured_rts.lock().unwrap(); + #inner_preamble + let #ret = #callname( + ::std::borrow::BorrowMut::<#(#type_id)::*>::borrow_mut( + &mut #inner_id + ), + #(#pus),* + ); + Ok(#marshal_result) + }) + .unwrap(); + } + } + ExternDesc::Type(_) => { + // no runtime representation is needed for types + quote! {} + } + ExternDesc::Instance(it) => { + let mut s = s.clone(); + let wn = split_wit_name(ed.kebab_name); + let type_name = kebab_to_type(wn.name); + let getter = kebab_to_getter(wn.name); + let tp = s.cur_trait_path(); + let get_self = get_self.with_getter(tp, type_name, getter); //quote! { #get_self let mut slf = &mut #tp::#getter(&mut *slf); }; + emit_import_instance(&mut s, get_self, wn.clone(), it) + } + ExternDesc::Component(_) => { + panic!("nested components not yet supported in rust bindings"); + } + } +} + +/// Emit (via returning) code to register each export of the given +/// instance with Hyperlight as a host function. +/// +/// - `get_self`: a [`SelfInfo`] that details how to get from the root +/// component implementation's state to the state for the +/// implementation of this instance. This should already have been +/// updated for this instance by the caller! +fn emit_import_instance<'a, 'b, 'c>( + s: &'c mut State<'a, 'b>, + get_self: SelfInfo, + wn: WitName, + it: &'c Instance<'b>, +) -> TokenStream { + let mut s = s.with_cursor(wn.namespace_idents()); + s.cur_helper_mod = Some(kebab_to_namespace(wn.name)); + s.cur_trait = Some(kebab_to_type(wn.name)); + + let imports = it + .exports + .iter() + .map(|ed| emit_import_extern_decl(&mut s, get_self.clone(), ed)) + .collect::>(); + + quote! { #(#imports)* } +} + +/// From a kebab name for a Component, derive something suitable for +/// use as the name of the wrapper struct that will implement its +/// exports in terms of guest function calls. +fn kebab_to_wrapper_name(trait_name: &str) -> Ident { + format_ident!("{}Sandbox", kebab_to_type(trait_name)) +} + +/// Emit (via mutating `s`): +/// - a resource table for each resource exported by this component +/// - a wrapper type encapsulating a sandbox and a wrapper table that +/// implements the relevant export trait +/// - an implementation of the component trait itself for Hyperlight's +/// `UninitializedSandbox` that makes it easy to instantiate +fn emit_component<'a, 'b, 'c>(s: &'c mut State<'a, 'b>, wn: WitName, ct: &'c Component<'b>) { + let mut s = s.with_cursor(wn.namespace_idents()); + let ns = wn.namespace_path(); + let r#trait = kebab_to_type(wn.name); + let import_trait = kebab_to_imports_name(wn.name); + let export_trait = kebab_to_exports_name(wn.name); + let wrapper_name = kebab_to_wrapper_name(wn.name); + let import_id = format_ident!("imports"); + + let rtsid = format_ident!("{}Resources", r#trait); + s.import_param_var = Some(format_ident!("I")); + resource::emit_tables( + &mut s, + rtsid.clone(), + quote! { #ns::#import_trait }, + None, + false, + ); + + s.var_offset = ct.instance.evars.len(); + s.cur_trait = Some(import_trait.clone()); + let imports = ct + .imports + .iter() + .map(|ed| emit_import_extern_decl(&mut s, SelfInfo::new(import_id.clone()), ed)) + .collect::>(); + s.var_offset = 0; + + s.root_component_name = Some((ns.clone(), wn.name)); + s.cur_trait = Some(export_trait.clone()); + s.import_param_var = Some(format_ident!("I")); + let exports = ct + .instance + .unqualified + .exports + .iter() + .map(|ed| emit_export_extern_decl(&mut s, ed)) + .collect::>(); + + s.root_mod.items.extend(quote! { + pub struct #wrapper_name { + pub(crate) sb: S, + pub(crate) rt: ::std::sync::Arc<::std::sync::Mutex<#rtsid>>, + } + pub(crate) fn register_host_functions(sb: &mut S, i: I) -> ::std::sync::Arc<::std::sync::Mutex<#rtsid>> { + use ::hyperlight_host::sandbox_state::sandbox::EvolvableSandbox; + let rts = ::std::sync::Arc::new(::std::sync::Mutex::new(#rtsid::new())); + let #import_id = ::std::sync::Arc::new(::std::sync::Mutex::new(i)); + #(#imports)* + rts + } + impl #ns::#export_trait for #wrapper_name { + #(#exports)* + } + impl #ns::#r#trait for ::hyperlight_host::sandbox::UninitializedSandbox { + type Exports = #wrapper_name; + fn instantiate(mut self, i: I) -> Self::Exports { + let rts = register_host_functions(&mut self, i); + let noop = ::core::default::Default::default(); + let sb = ::hyperlight_host::sandbox_state::sandbox::EvolvableSandbox::evolve(self, noop).unwrap(); + let cc = ::hyperlight_host::func::call_ctx::MultiUseGuestCallContext::start(sb); + #wrapper_name { + sb: cc, + rt: rts, + } + } + } + }); +} + +/// See [`emit_component`] +pub fn emit_toplevel<'a, 'b, 'c>(s: &'c mut State<'a, 'b>, n: &str, ct: &'c Component<'b>) { + s.is_impl = true; + log::debug!("\n\n=== starting host emit ===\n"); + let wn = split_wit_name(n); + emit_component(s, wn, ct) +} diff --git a/src/hyperlight_component_util/src/lib.rs b/src/hyperlight_component_util/src/lib.rs new file mode 100644 index 000000000..4d2d0c8bf --- /dev/null +++ b/src/hyperlight_component_util/src/lib.rs @@ -0,0 +1,43 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + */ + +// This, unlike the rest of hyperlight, isn't really a library (since +// it's only used by our own build-time tools), so the reasons not to +// panic don't really apply. +#![allow(clippy::unwrap_used)] +// "Needless" lifetimes are useful for clarity +#![allow(clippy::needless_lifetimes)] + +// Typechecking and elaboration +pub mod component; +pub mod elaborate; +pub mod etypes; +pub mod structure; +pub mod substitute; +pub mod subtype; +pub mod tv; +pub mod wf; + +// Generally useful for code emit +pub mod emit; +pub mod hl; +pub mod resource; +pub mod rtypes; +pub mod util; + +// Specific code emit +pub mod guest; +pub mod host; diff --git a/src/hyperlight_component_util/src/resource.rs b/src/hyperlight_component_util/src/resource.rs new file mode 100644 index 000000000..a84e46bd8 --- /dev/null +++ b/src/hyperlight_component_util/src/resource.rs @@ -0,0 +1,111 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + */ + +use proc_macro2::{Ident, TokenStream}; +use quote::{format_ident, quote}; + +use crate::emit::State; +use crate::etypes::{TypeBound, Tyvar}; +use crate::rtypes::emit_var_ref; + +/// Emit a structure definition for a resource table that keeps track +/// of resources lent/borrowed/given/taken to/from the other side of +/// the Hyperlight boundary. +/// - `rtsid`: The name of the struct to create +/// - `bound`: a bound to be used for a phantom type variable that +/// records the fact that these resource tables are only valid for a +/// component that has been instantiated with a particular +/// implementation of its imports +/// - `sv`: optionally a bound to be used for a phantom type variable +/// that records the fact that these resource tables are only valid +/// for a particular implementation of a component +pub fn emit_tables<'a, 'b, 'c>( + s: &'c mut State<'a, 'b>, + rtsid: Ident, + bound: TokenStream, + sv: Option, + is_guest: bool, +) { + let vs = s.bound_vars.clone(); + let (fields, inits) = vs + .iter() + .enumerate() + .map(|(i, v)| { + let field_name = format_ident!("resource{}", i); + let alloc_ns = if s.is_guest { + quote! { ::alloc } + } else { + quote! { ::std } + }; + match v.bound { + TypeBound::Eq(_) => (quote! { #field_name: () }, quote! { #field_name: () }), + TypeBound::SubResource => { + if v.origin.is_imported() ^ is_guest { + let t = emit_var_ref(s, &Tyvar::Bound(i as u32)); + ( + quote! { + #field_name: #alloc_ns::collections::VecDeque< + ::hyperlight_common::resource::ResourceEntry<#t> + > + }, + quote! { #field_name: #alloc_ns::collections::VecDeque::new() }, + ) + } else { + // we don't need to keep track of anything for + // resources owned by the other side + ( + quote! { + #field_name: () + }, + quote! { #field_name: () }, + ) + } + } + } + }) + .unzip::<_, _, Vec<_>, Vec<_>>(); + let (sv, svs, sphantom, sphantominit) = if let Some(sv) = sv { + ( + quote! { , S: #sv }, + quote! { , S }, + quote! { _phantomS: ::core::marker::PhantomData, }, + quote! { _phantomS: ::core::marker::PhantomData, }, + ) + } else { + ( + TokenStream::new(), + TokenStream::new(), + TokenStream::new(), + TokenStream::new(), + ) + }; + s.root_mod.items.extend(quote! { + pub(crate) struct #rtsid { + #(#fields,)* + _phantomI: ::core::marker::PhantomData, + #sphantom + } + impl #rtsid { + fn new() -> Self { + #rtsid { + #(#inits,)* + _phantomI: ::core::marker::PhantomData, + #sphantominit + } + } + } + }); +} diff --git a/src/hyperlight_component_util/src/rtypes.rs b/src/hyperlight_component_util/src/rtypes.rs new file mode 100644 index 000000000..00a934e44 --- /dev/null +++ b/src/hyperlight_component_util/src/rtypes.rs @@ -0,0 +1,875 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + */ + +//! The Rust representation of a component type (etype) + +use std::collections::{BTreeMap, BTreeSet, VecDeque}; +use std::vec::Vec; + +use proc_macro2::TokenStream; +use quote::{format_ident, quote}; +use syn::Ident; + +use crate::emit::{ + FnName, ResourceItemName, State, WitName, kebab_to_cons, kebab_to_exports_name, kebab_to_fn, + kebab_to_getter, kebab_to_imports_name, kebab_to_namespace, kebab_to_type, kebab_to_var, + split_wit_name, +}; +use crate::etypes::{ + Component, Defined, ExternDecl, ExternDesc, Func, Handleable, ImportExport, Instance, Param, + Result, TypeBound, Tyvar, Value, +}; + +/// When referring to an instance or resource trait, emit a token +/// stream that instantiates any types it is parametrized by with our +/// own best understanding of how to name the relevant type variables +fn emit_tvis(s: &mut State, tvs: Vec) -> TokenStream { + let tvs = tvs + .iter() + .map(|tv| emit_var_ref(s, &Tyvar::Bound(*tv))) + .collect::>(); + if !tvs.is_empty() { + quote! { <#(#tvs),*> } + } else { + TokenStream::new() + } +} + +/// Emit a token stream that references the type of a particular resource +/// +/// - `n`: the absolute index (i.e. ignoring [`State::var_offset`]) of +/// the component tyvar being referenced +/// - `path`: the origin path between the module where we are and the +/// module where the resource is defined. The existence of this +/// path implies that the var is "locally defined". +fn emit_resource_ref(s: &mut State, n: u32, path: Vec) -> TokenStream { + // todo: when the guest codegen is split into generic and wasm, + // this can go away, since an appropriate impl for the imports + // trait will be there + if s.is_guest && s.is_impl { + // Morally, this should check that the var is imported, but + // that information is gone by now (in the common prefix of + // the path that was chopped off), and we won't support + // resources exported from the guest until this whole special + // case is gone, so ignore it. + let id = format_ident!("HostResource{}", n); + return quote! { #id }; + } + // There is always at least one element in the path, which names + // the thing we are referring to + let rtrait = kebab_to_type(path[path.len() - 1].name()); + + // Deal specially with being in the local instance, where there is + // no instance type & so it is not easy to resolve the + // path-from-the-root to the resource type trait in question + if path.len() == 1 { + let helper = s.cur_helper_mod.clone().unwrap(); + let rtrait = kebab_to_type(path[0].name()); + let t = s.resolve_trait_immut(false, &[helper.clone(), rtrait.clone()]); + let tvis = emit_tvis(s, t.tv_idxs()); + let mut sv = quote! { Self }; + if let Some(s) = &s.self_param_var { + sv = quote! { #s }; + }; + return quote! { <#sv as #helper::#rtrait #tvis>::T }; + }; + + // Generally speaking, the structure that we expect to see in + // `path` ends in an instance that exports the resource type, + // followed by the resource type itself. We locate the resource + // trait by using that final instance name directly; any other + // names are just used to get to the type that implements it + let instance = path[path.len() - 2].name(); + let iwn = split_wit_name(instance); + let extras = path[0..path.len() - 2] + .iter() + .map(|p| { + let wn = split_wit_name(p.name()); + kebab_to_type(wn.name) + }) + .collect::>(); + let extras = quote! { #(#extras::)* }; + let rp = s.root_path(); + let tns = iwn.namespace_path(); + let instance_mod = kebab_to_namespace(iwn.name); + let instance_type = kebab_to_type(iwn.name); + let mut sv = quote! { Self }; + if path[path.len() - 2].imported() { + if let Some(iv) = &s.import_param_var { + sv = quote! { #iv } + }; + } else if let Some(s) = &s.self_param_var { + sv = quote! { #s } + }; + let mut trait_path = Vec::new(); + trait_path.extend(iwn.namespace_idents()); + trait_path.push(instance_mod.clone()); + trait_path.push(rtrait.clone()); + let t = s.resolve_trait_immut(true, &trait_path); + let tvis = emit_tvis(s, t.tv_idxs()); + quote! { <#sv::#extras #instance_type as #rp #tns::#instance_mod::#rtrait #tvis>::T } +} + +/// Try to find a way to refer to the given type variable from the +/// current module/trait. If this fails, the type must be coming from +/// a sibling package, so we will have to emit a parametrization that +/// the root (or at least someone higher up the tree) can instantiate. +/// - `n`: the absolute index (i.e. ignoring [`State::var_offset`]) of +/// the component tyvar being referenced +fn try_find_local_var_id( + s: &mut State, + // this should be an absolute var number (no noff) + n: u32, +) -> Option { + if let Some((path, bound)) = s.is_noff_var_local(n) { + let var_is_helper = match bound { + TypeBound::Eq(_) => true, + TypeBound::SubResource => false, + }; + if !var_is_helper { + // it is a resource type + if s.is_helper { + // but we're in that resource type, so that's ok + if path.len() == 1 && s.cur_trait == Some(kebab_to_type(path[0].name())) { + return Some(quote! { Self::T }); + } + // otherwise, there is no way to reference that from here + return None; + } else { + let mut path_strs = vec!["".to_string(); path.len()]; + for (i, p) in path.iter().enumerate() { + path_strs[i] = p.name().to_string(); + } + let path = path + .into_iter() + .enumerate() + .map(|(i, p)| match p { + ImportExport::Import(_) => ImportExport::Import(&path_strs[i]), + ImportExport::Export(_) => ImportExport::Export(&path_strs[i]), + }) + .collect::>(); + return Some(emit_resource_ref(s, n, path)); + } + } + log::debug!("path is {:?}\n", path); + let mut path = path.iter().rev(); + let name = kebab_to_type(path.next().unwrap().name()); + let owner = path.next(); + if let Some(owner) = owner { + // if we have an instance type, use it + let wn = split_wit_name(owner.name()); + let rp = s.root_path(); + let tns = wn.namespace_path(); + let helper = kebab_to_namespace(wn.name); + Some(quote! { #rp #tns::#helper::#name }) + } else { + let hp = s.helper_path(); + Some(quote! { #hp #name }) + } + } else { + None + } +} + +/// Emit a token stream that references the given type variable in a +/// type context, either directly if it is locally defined or by +/// adding a parameter to the current type/trait/etc if necessary. +/// - `tv`: the variable to reference +/// +/// Precondition: `tv` must be a [`Tyvar::Bound`] tyvar +pub fn emit_var_ref(s: &mut State, tv: &Tyvar) -> TokenStream { + let Tyvar::Bound(n) = tv else { + panic!("free tyvar in rust emit") + }; + emit_var_ref_noff(s, n + s.var_offset as u32, false) +} +/// Emit a token stream that references the given type variable in a +/// value context (e.g. a constructor), either directly if it is +/// locally defined or by adding a parameter to the current +/// type/trait/etc if necessary. +/// - `tv`: the variable to reference +/// +/// Precondition: `tv` must be a [`Tyvar::Bound`] tyvar +pub fn emit_var_ref_value(s: &mut State, tv: &Tyvar) -> TokenStream { + let Tyvar::Bound(n) = tv else { + panic!("free tyvar in rust emit") + }; + emit_var_ref_noff(s, n + s.var_offset as u32, true) +} +/// Emit a token stream that references the given bound type variable, +/// either directly if it is locally defined or by adding a parameter +/// to the current type/trait/etc if necessary. +/// - `n`: the absolute index (i.e. ignoring [`State::var_offset`]) of +/// the bound variable being referenced +/// - `is_value`: whether this is a value (e.g. constructor) or type context. +pub fn emit_var_ref_noff(s: &mut State, n: u32, is_value: bool) -> TokenStream { + log::debug!("var_ref {:?} {:?}", &s.bound_vars[n as usize], s.origin); + // if the variable was defined locally, try to reference it directly + let id = try_find_local_var_id(s, n); + let id = match id { + Some(id) => { + // if we are referencing the local one, we need to give it + // the variables it wants + let vs = s.get_noff_var_refs(n); + let vs = vs + .iter() + .map(|n| emit_var_ref_noff(s, *n, false)) + .collect::>(); + let vs_toks = if !vs.is_empty() { + if is_value { + quote! { ::<#(#vs),*> } + } else { + quote! { <#(#vs),*> } + } + } else { + TokenStream::new() + }; + + quote! { #id #vs_toks } + } + None => { + // otherwise, record that whatever type is referencing it needs to + // have it in scope + s.need_noff_var(n); + let id = s.noff_var_id(n); + quote! { #id } + } + }; + quote! { #id } +} + +/// Format the name of the rust type corresponding to a component +/// numeric type. +/// +/// Precondition: `vt` is a numeric type (`S`, `U`, `F`) +pub fn numeric_rtype(vt: &Value) -> (Ident, u8) { + match vt { + Value::S(w) => (format_ident!("i{}", w.width()), w.width()), + Value::U(w) => (format_ident!("u{}", w.width()), w.width()), + Value::F(w) => (format_ident!("f{}", w.width()), w.width()), + _ => panic!("numeric_rtype: internal invariant violation"), + } +} + +/// Emit a Rust type corresponding to a given value type. The +/// resultant token stream will parse as a Rust type. +/// +/// Precondition: `vt` is an inline-able value type. +pub fn emit_value(s: &mut State, vt: &Value) -> TokenStream { + match vt { + Value::Bool => quote! { bool }, + Value::S(_) | Value::U(_) | Value::F(_) => { + let (id, _) = numeric_rtype(vt); + quote! { #id } + } + Value::Char => quote! { char }, + Value::String => quote! { alloc::string::String }, + Value::List(vt) => { + let vt = emit_value(s, vt); + quote! { alloc::vec::Vec<#vt> } + } + Value::Record(_) => panic!("record not at top level of valtype"), + Value::Tuple(vts) => { + let vts = vts.iter().map(|vt| emit_value(s, vt)).collect::>(); + quote! { (#(#vts),*) } + } + Value::Flags(_) => panic!("flags not at top level of valtype"), + Value::Variant(_) => panic!("flags not at top level of valtype"), + Value::Enum(_) => panic!("enum not at top level of valtype"), + Value::Option(vt) => { + let vt = emit_value(s, vt); + quote! { ::core::option::Option<#vt> } + } + Value::Result(vt1, vt2) => { + let unit = Value::Tuple(Vec::new()); + let vt1 = emit_value(s, vt1.as_ref().as_ref().unwrap_or(&unit)); + let vt2 = emit_value(s, vt2.as_ref().as_ref().unwrap_or(&unit)); + quote! { ::core::result::Result<#vt1, #vt2> } + } + Value::Own(ht) => match ht { + Handleable::Resource(_) => panic!("bare resource in type"), + Handleable::Var(tv) => { + if s.is_guest { + let wrap = if s.is_wasmtime_guest { + |toks| quote! { ::wasmtime::component::Resource<#toks> } + } else { + |toks| toks + }; + if !s.is_impl { + wrap(emit_var_ref(s, tv)) + } else { + let n = crate::hl::resolve_handleable_to_resource(s, ht); + log::debug!("resolved ht to r (4) {:?} {:?}", ht, n); + let id = format_ident!("HostResource{}", n); + wrap(quote! { #id }) + } + } else { + emit_var_ref(s, tv) + } + } + }, + Value::Borrow(ht) => match ht { + Handleable::Resource(_) => panic!("bare resource in type"), + Handleable::Var(tv) => { + if s.is_guest { + let wrap = if s.is_wasmtime_guest { + |toks| quote! { ::wasmtime::component::Resource<#toks> } + } else { + |toks| quote! { &#toks } + }; + if !s.is_impl { + wrap(emit_var_ref(s, tv)) + } else { + let n = crate::hl::resolve_handleable_to_resource(s, ht); + log::debug!("resolved ht to r (5) {:?} {:?}", ht, n); + let id = format_ident!("HostResource{}", n); + wrap(quote! { #id }) + } + } else { + let vr = emit_var_ref(s, tv); + quote! { ::hyperlight_common::resource::BorrowedResourceGuard<#vr> } + } + } + }, + Value::Var(Some(tv), _) => emit_var_ref(s, tv), + Value::Var(None, _) => panic!("value type with recorded but unknown var"), + } +} + +/// Emit a Rust type corresponding to a given toplevel value type. The +/// resultant token stream will parse as a Rust type declaration that +/// defines a type named `id`. +fn emit_value_toplevel(s: &mut State, v: Option, id: Ident, vt: &Value) -> TokenStream { + let is_wasmtime_guest = s.is_wasmtime_guest; + match vt { + Value::Record(rfs) => { + let (vs, toks) = gather_needed_vars(s, v, |s| { + let rfs = rfs + .iter() + .map(|rf| { + let orig_name = rf.name.name; + let id = kebab_to_var(orig_name); + let derives = if s.is_wasmtime_guest { + quote! { #[component(name = #orig_name)] } + } else { + TokenStream::new() + }; + let ty = emit_value(s, &rf.ty); + quote! { #derives pub #id: #ty } + }) + .collect::>(); + quote! { #(#rfs),* } + }); + let vs = emit_type_defn_var_list(s, vs); + let derives = if s.is_wasmtime_guest { + quote! { + #[derive(::wasmtime::component::ComponentType)] + #[derive(::wasmtime::component::Lift)] + #[derive(::wasmtime::component::Lower)] + #[component(record)] + } + } else { + TokenStream::new() + }; + quote! { + #derives + #[derive(Debug, Clone, PartialEq)] + pub struct #id #vs { #toks } + } + } + Value::Flags(ns) => { + let (vs, toks) = gather_needed_vars(s, v, |_| { + let ns = ns + .iter() + .map(|n| { + let orig_name = n.name; + let id = kebab_to_var(orig_name); + quote! { pub #id: bool } + }) + .collect::>(); + quote! { #(#ns),* } + }); + let vs = emit_type_defn_var_list(s, vs); + quote! { + #[derive(Debug, Clone, PartialEq)] + pub struct #id #vs { #toks } + } + } + Value::Variant(vcs) => { + let (vs, toks) = gather_needed_vars(s, v, |s| { + let vcs = vcs + .iter() + .map(|vc| { + let orig_name = vc.name.name; + let id = kebab_to_cons(orig_name); + let derives = if s.is_wasmtime_guest { + quote! { #[component(name = #orig_name)] } + } else { + TokenStream::new() + }; + match &vc.ty { + Some(ty) => { + let ty = emit_value(s, ty); + quote! { #derives #id(#ty) } + } + None => quote! { #derives #id }, + } + }) + .collect::>(); + quote! { #(#vcs),* } + }); + let vs = emit_type_defn_var_list(s, vs); + let derives = if s.is_wasmtime_guest { + quote! { + #[derive(::wasmtime::component::ComponentType)] + #[derive(::wasmtime::component::Lift)] + #[derive(::wasmtime::component::Lower)] + #[component(variant)] + } + } else { + TokenStream::new() + }; + quote! { + #derives + #[derive(Debug, Clone, PartialEq)] + pub enum #id #vs { #toks } + } + } + Value::Enum(ns) => { + let (vs, toks) = gather_needed_vars(s, v, |_| { + let ns = ns + .iter() + .map(|n| { + let orig_name = n.name; + let id = kebab_to_cons(orig_name); + let derives = if is_wasmtime_guest { + quote! { #[component(name = #orig_name)] } + } else { + TokenStream::new() + }; + quote! { #derives #id } + }) + .collect::>(); + quote! { #(#ns),* } + }); + let vs = emit_type_defn_var_list(s, vs); + let derives = if s.is_wasmtime_guest { + quote! { + #[derive(::wasmtime::component::ComponentType)] + #[derive(::wasmtime::component::Lift)] + #[derive(::wasmtime::component::Lower)] + #[derive(::core::clone::Clone)] + #[derive(::core::marker::Copy)] + #[component(enum)] + #[repr(u8)] // todo: should this always be u8? + } + } else { + TokenStream::new() + }; + quote! { + #derives + #[derive(Debug, Clone, PartialEq)] + pub enum #id #vs { #toks } + } + } + _ => emit_type_alias(s, v, id, |s| emit_value(s, vt)), + } +} + +/// Emit a Rust type corresponding to a defined type. The token stream +/// will parse as a Rust type declaration that defines a type named `id`. +/// +/// Precondition: `dt` is not an instance or component, which we +/// cannot deal with as first-class at the moment, or a bare resource +/// type. +fn emit_defined(s: &mut State, v: Option, id: Ident, dt: &Defined) -> TokenStream { + match dt { + // the lack of trait aliases makes emitting a name for an + // instance/component difficult in rust + Defined::Instance(_) | Defined::Component(_) => TokenStream::new(), + // toplevel vars should have been handled elsewhere + Defined::Handleable(Handleable::Resource(_)) => panic!("bare resource in type"), + Defined::Handleable(Handleable::Var(tv)) => { + emit_type_alias(s, v, id, |s| emit_var_ref(s, tv)) + } + Defined::Value(vt) => emit_value_toplevel(s, v, id, vt), + Defined::Func(ft) => emit_type_alias(s, v, id, |s| emit_func(s, ft)), + } +} + +/// Emit a Rust argument declaration, suitable for placing in the +/// argument list of a function, for a given component function type +/// parameter. +pub fn emit_func_param(s: &mut State, p: &Param) -> TokenStream { + let name = kebab_to_var(p.name.name); + let ty = emit_value(s, &p.ty); + quote! { #name: #ty } +} + +/// Emit a Rust version of a component function return type. +/// +/// Precondition: the result type must only be a named result if there +/// are no names in it (i.e. a unit type) +pub fn emit_func_result(s: &mut State, r: &Result) -> TokenStream { + match r { + Result::Unnamed(vt) => emit_value(s, vt), + Result::Named(rs) if rs.is_empty() => quote! { () }, + _ => panic!("multiple named function results are not currently supported"), + } +} + +/// Emit a Rust typeversion of a component function type. This is only +/// used for defining certain type aliases of functions, and so it +/// truly is a Rust type-level function type, not a value-level +/// declaration. +fn emit_func(s: &mut State, ft: &Func) -> TokenStream { + let params = ft + .params + .iter() + .map(|p| emit_func_param(s, p)) + .collect::>(); + let result = emit_func_result(s, &ft.result); + quote! { fn(#(#params),*) -> #result } +} + +/// Gather the vars that are referenced when running `f`. If `v` is +/// [`Some(vn)`], also record this as the set of vars needed by the +/// bound tyvar with absolute index `vn`. +fn gather_needed_vars TokenStream>( + s: &mut State, + v: Option, + f: F, +) -> (BTreeSet, TokenStream) { + let mut needs_vars = BTreeSet::new(); + let mut sv = s.with_needs_vars(&mut needs_vars); + let toks = f(&mut sv); + if let Some(vn) = v { + sv.record_needs_vars(vn); + } + drop(sv); + (needs_vars, toks) +} +/// Emit a Rust type parameter list that can be affixed to a type +/// definition, given a set `vs` of the component-level bound tyvars +/// that the type references but are not locally-defined. +fn emit_type_defn_var_list(s: &mut State, vs: BTreeSet) -> TokenStream { + if vs.is_empty() { + TokenStream::new() + } else { + let vs = vs + .iter() + .map(|n| { + if s.is_guest { + let t = s.noff_var_id(*n); + quote! { #t: 'static } + } else { + let t = s.noff_var_id(*n); + quote! { #t } + } + }) + .collect::>(); + quote! { <#(#vs),*> } + } +} +/// Emit a type alias declaration, allowing one to name an anonymous +/// Rust type without creating a new nominal type. +/// +/// - `v`: If [`Some(vn)`], the component-level bound tyvar absolute +/// index that this declaration corresponds to +/// - `id`: The name of the alias to produce +/// - `f`: A function which produces a token stream that parses as a +/// Rust type, to use as the body of the alias +fn emit_type_alias TokenStream>( + s: &mut State, + v: Option, + id: Ident, + f: F, +) -> TokenStream { + let (vs, toks) = gather_needed_vars(s, v, f); + let vs = emit_type_defn_var_list(s, vs); + quote! { pub type #id #vs = #toks; } +} + +/// Emit (via returning) a Rust trait item corresponding to this +/// extern decl +fn emit_extern_decl<'a, 'b, 'c>( + is_export: bool, + s: &'c mut State<'a, 'b>, + ed: &'c ExternDecl<'b>, +) -> TokenStream { + log::debug!(" emitting decl {:?}", ed.kebab_name); + match &ed.desc { + ExternDesc::CoreModule(_) => panic!("core module (im/ex)ports are not supported"), + ExternDesc::Func(ft) => { + let mut s = s.push_origin(is_export, ed.kebab_name); + match kebab_to_fn(ed.kebab_name) { + FnName::Plain(n) => { + let params = ft + .params + .iter() + .map(|p| emit_func_param(&mut s, p)) + .collect::>(); + let result = emit_func_result(&mut s, &ft.result); + quote! { + fn #n(&mut self, #(#params),*) -> #result; + } + } + FnName::Associated(r, n) => { + let mut s = s.helper(); + s.cur_trait = Some(r.clone()); + let mut needs_vars = BTreeSet::new(); + let mut sv = s.with_needs_vars(&mut needs_vars); + let params = ft + .params + .iter() + .map(|p| emit_func_param(&mut sv, p)) + .collect::>(); + match n { + ResourceItemName::Constructor => { + sv.cur_trait().items.extend(quote! { + fn new(&mut self, #(#params),*) -> Self::T; + }); + } + ResourceItemName::Method(n) => { + let result = emit_func_result(&mut sv, &ft.result); + sv.cur_trait().items.extend(quote! { + fn #n(&mut self, #(#params),*) -> #result; + }); + } + ResourceItemName::Static(n) => { + let result = emit_func_result(&mut sv, &ft.result); + sv.cur_trait().items.extend(quote! { + fn #n(&mut self, #(#params),*) -> #result; + }); + } + } + for v in needs_vars { + let id = s.noff_var_id(v); + s.cur_trait().tvs.insert(id, (Some(v), TokenStream::new())); + } + quote! {} + } + } + } + ExternDesc::Type(t) => { + fn go_defined<'a, 'b, 'c>( + s: &'c mut State<'a, 'b>, + ed: &'c ExternDecl<'b>, + t: &'c Defined<'b>, + v: Option, + ) -> TokenStream { + let id = kebab_to_type(ed.kebab_name); + let mut s = s.helper(); + + let t = emit_defined(&mut s, v, id, t); + s.cur_mod().items.extend(t); + TokenStream::new() + } + let edn: &'b str = ed.kebab_name; + let mut s: State<'_, 'b> = s.push_origin(is_export, edn); + if let Some((n, bound)) = s.is_var_defn(t) { + match bound { + TypeBound::Eq(t) => { + // ensure that when go_defined() looks up vars + // that might occur in the type, they resolve + // properly + let noff = s.var_offset as u32 + n; + s.var_offset += n as usize + 1; + go_defined(&mut s, ed, &t, Some(noff)) + } + TypeBound::SubResource => { + let rn = kebab_to_type(ed.kebab_name); + s.add_helper_supertrait(rn.clone()); + let mut s = s.helper(); + s.cur_trait = Some(rn.clone()); + s.cur_trait().items.extend(quote! { + type T: ::core::marker::Send; + }); + quote! {} + } + } + } else { + go_defined(&mut s, ed, t, None) + } + } + ExternDesc::Instance(it) => { + let mut s = s.push_origin(is_export, ed.kebab_name); + let wn = split_wit_name(ed.kebab_name); + emit_instance(&mut s, wn.clone(), it); + + let nsids = wn.namespace_idents(); + let repr = s.r#trait(&nsids, kebab_to_type(wn.name)); + let vs = if !repr.tvs.is_empty() { + let vs = repr.tvs.clone(); + let tvs = vs + .iter() + .map(|(_, (tv, _))| emit_var_ref(&mut s, &Tyvar::Bound(tv.unwrap()))); + quote! { <#(#tvs),*> } + } else { + TokenStream::new() + }; + + let getter = kebab_to_getter(wn.name); + let rp = s.root_path(); + let tns = wn.namespace_path(); + let tn = kebab_to_type(wn.name); + quote! { + type #tn: #rp #tns::#tn #vs; + fn #getter(&mut self) -> impl ::core::borrow::BorrowMut; + } + } + ExternDesc::Component(_) => { + panic!("nested components not yet supported in rust bindings"); + } + } +} + +/// Emit (via mutating `s`) a Rust trait declaration corresponding to +/// this instance type +fn emit_instance<'a, 'b, 'c>(s: &'c mut State<'a, 'b>, wn: WitName, it: &'c Instance<'b>) { + log::debug!("emitting instance {:?}", wn); + let mut s = s.with_cursor(wn.namespace_idents()); + + let name = kebab_to_type(wn.name); + + s.cur_helper_mod = Some(kebab_to_namespace(wn.name)); + s.cur_trait = Some(name.clone()); + if !s.cur_trait().items.is_empty() { + // Temporary hack: we have visited this wit:package/instance + // before, so bail out instead of adding duplicates of + // everything. Since we don't really have strong semantic + // guarantees that the exact same contents will be in each + // occurrence of a wit:package/instance (and indeed they may + // well be stripped down to the essentials in each + // occurrence), this is NOT sound, and will need to be + // revisited. The correct approach here is to change + // emit_extern_decl to create function/resource items in a + // Trait that can be merged properly, instead of directly + // emitting tokens. + return; + } + + let mut needs_vars = BTreeSet::new(); + let mut sv = s.with_needs_vars(&mut needs_vars); + + let exports = it + .exports + .iter() + .map(|ed| emit_extern_decl(true, &mut sv, ed)) + .collect::>(); + + // instantiations for the supertraits + + let mut stvs = BTreeMap::new(); + let _ = sv.cur_trait(); // make sure it exists + let t = sv.cur_trait_immut(); + for (ti, _) in t.supertraits.iter() { + let t = sv.resolve_trait_immut(false, ti); + stvs.insert(ti.clone(), t.tv_idxs()); + } + // hack to make the local-definedness check work properly, since + // it usually should ignore the last origin component + sv.origin.push(ImportExport::Export("self")); + let mut stis = BTreeMap::new(); + for (id, tvs) in stvs.into_iter() { + stis.insert(id, emit_tvis(&mut sv, tvs)); + } + for (id, ts) in stis.into_iter() { + sv.cur_trait().supertraits.get_mut(&id).unwrap().extend(ts); + } + + drop(sv); + log::debug!("after exports, ncur_needs_vars is {:?}", needs_vars); + for v in needs_vars { + let id = s.noff_var_id(v); + s.cur_trait().tvs.insert(id, (Some(v), TokenStream::new())); + } + + s.cur_trait().items.extend(quote! { #(#exports)* }); +} + +/// Emit (via mutating `s`) a set of Rust trait declarations +/// corresponding to this component. This includes an `Imports` and an +/// `Exports` trait, as well as a main trait with an `instantiate()` +/// function that maps from an implementer of the imports to an +/// implementor of the exports +fn emit_component<'a, 'b, 'c>(s: &'c mut State<'a, 'b>, wn: WitName, ct: &'c Component<'b>) { + let mut s = s.with_cursor(wn.namespace_idents()); + + let base_name = kebab_to_type(wn.name); + + s.cur_helper_mod = Some(kebab_to_namespace(wn.name)); + + let import_name = kebab_to_imports_name(wn.name); + *s.bound_vars = ct + .uvars + .iter() + .rev() + .map(Clone::clone) + .collect::>(); + s.cur_trait = Some(import_name.clone()); + let imports = ct + .imports + .iter() + .map(|ed| emit_extern_decl(false, &mut s, ed)) + .collect::>(); + s.cur_trait().items.extend(quote! { #(#imports)* }); + + s.adjust_vars(ct.instance.evars.len() as u32); + + s.import_param_var = Some(format_ident!("I")); + + let export_name = kebab_to_exports_name(wn.name); + *s.bound_vars = ct + .instance + .evars + .iter() + .rev() + .chain(ct.uvars.iter().rev()) + .map(Clone::clone) + .collect::>(); + s.cur_trait = Some(export_name.clone()); + let exports = ct + .instance + .unqualified + .exports + .iter() + .map(|ed| emit_extern_decl(true, &mut s, ed)) + .collect::>(); + s.cur_trait().tvs.insert( + format_ident!("I"), + (None, quote! { #import_name + ::core::marker::Send }), + ); + s.cur_trait().items.extend(quote! { #(#exports)* }); + + s.cur_helper_mod = None; + s.cur_trait = None; + + s.cur_mod().items.extend(quote! { + pub trait #base_name { + type Exports: #export_name; + // todo: can/should this 'static bound be avoided? + // it is important right now because this is closed over in host functions + fn instantiate(self, imports: I) -> Self::Exports; + } + }); +} + +/// See [`emit_component`] +pub fn emit_toplevel<'a, 'b, 'c>(s: &'c mut State<'a, 'b>, n: &str, ct: &'c Component<'b>) { + let wn = split_wit_name(n); + emit_component(s, wn, ct); +} diff --git a/src/hyperlight_component_util/src/structure.rs b/src/hyperlight_component_util/src/structure.rs new file mode 100644 index 000000000..45ec92366 --- /dev/null +++ b/src/hyperlight_component_util/src/structure.rs @@ -0,0 +1,48 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + */ + +/// core:sort in the specification +#[derive(Debug, Clone, Copy)] +pub enum CoreSort { + Func, + Table, + Memory, + Global, + Type, + Module, + Instance, +} + +/// sort in the specification +#[derive(Debug, Clone, Copy)] +pub enum Sort { + Core(CoreSort), + Func, + Value, + Type, + Component, + Instance, +} + +/// sortidx in the specification +#[derive(Debug, Clone, Copy)] +pub struct SortIdx { + pub sort: Sort, + pub idx: u32, +} + +/// funcidx in the specification +pub type FuncIdx = u32; diff --git a/src/hyperlight_component_util/src/substitute.rs b/src/hyperlight_component_util/src/substitute.rs new file mode 100644 index 000000000..94f95cf7b --- /dev/null +++ b/src/hyperlight_component_util/src/substitute.rs @@ -0,0 +1,671 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + */ + +//! Capture-avoiding substitution + +use std::primitive::u32; + +use crate::etypes::{ + BoundedTyvar, Component, Ctx, Defined, ExternDecl, ExternDesc, FreeTyvar, Func, Handleable, + Instance, Param, QualifiedInstance, RecordField, TypeBound, Tyvar, Value, VariantCase, +}; +use crate::tv::ResolvedTyvar; + +/// A substitution +/// +/// This trait can be implemented by specific structures that have +/// specific substitution behavior, which only need to define how the +/// act on bound/existential/universal variables. The implemented +/// methods on the trait will then allow applying that substitution in +/// a capture-avoiding manner to any relevant term. +/// +/// The [`Shiftable`] bound is required because the implementation of +/// substitution for components and instances needs to be able to +/// shift the substitution in order to make substitution +/// capture-avoiding. +pub trait Substitution<'a> +where + Self: Shiftable<'a>, +{ + /// Some, but not all, substitutions are fallible (i.e. may reveal + /// latent misbehaviour in the type they are being applied to), so + /// any given [`Substitution`] can provide its own + /// [`Substitution::Error`] type. + /// + /// An infallible substitution can use [`Void`] to reflect + /// the fact that error is impossible, and callers can use + /// [`Unvoidable::not_void`] to eliminate the impossible case of + /// the result neatly. + type Error: From<<>::Inner as Substitution<'a>>::Error>; + /// Any substitution should define whether a given bound variable + /// should be substituted, and if so with what. + fn subst_bvar(&self, i: u32) -> Result>, Self::Error>; + /// Any substitution should define whether a given existential variable + /// should be substituted, and if so with what. + fn subst_evar(&self, o: u32, i: u32) -> Result>, Self::Error>; + /// Any substitution should define whether a given universal variable + /// should be substituted, and if so with what. + fn subst_uvar(&self, o: u32, i: u32) -> Result>, Self::Error>; + + fn record_fields(&self, rfs: &[RecordField<'a>]) -> Result>, Self::Error> { + rfs.iter() + .map(|rf| { + Ok(RecordField { + name: rf.name, + ty: self.value(&rf.ty)?, + }) + }) + .collect() + } + + fn variant_cases(&self, vcs: &[VariantCase<'a>]) -> Result>, Self::Error> { + vcs.iter() + .map(|vc| { + Ok(VariantCase { + name: vc.name, + ty: self.value_option(&vc.ty)?, + refines: vc.refines, + }) + }) + .collect() + } + + fn value_option(&self, vt: &Option>) -> Result>, Self::Error> { + vt.as_ref().map(|ty| self.value(ty)).transpose() + } + + fn value(&self, vt: &Value<'a>) -> Result, Self::Error> { + Ok(match vt { + Value::Bool => Value::Bool, + Value::S(w) => Value::S(*w), + Value::U(w) => Value::U(*w), + Value::F(w) => Value::F(*w), + Value::Char => Value::Char, + Value::String => Value::String, + Value::List(vt) => Value::List(Box::new(self.value(vt)?)), + Value::Record(rfs) => Value::Record(self.record_fields(rfs)?), + Value::Variant(vcs) => Value::Variant(self.variant_cases(vcs)?), + Value::Flags(ns) => Value::Flags(ns.clone()), + Value::Enum(ns) => Value::Enum(ns.clone()), + Value::Option(vt) => Value::Option(Box::new(self.value(vt)?)), + Value::Tuple(vts) => Value::Tuple( + vts.iter() + .map(|vt| self.value(vt)) + .collect::>, Self::Error>>()?, + ), + Value::Result(vt1, vt2) => Value::Result( + Box::new(self.value_option(vt1)?), + Box::new(self.value_option(vt2)?), + ), + Value::Own(h) => Value::Own(self.handleable_(h)?), + Value::Borrow(h) => Value::Borrow(self.handleable_(h)?), + Value::Var(tv, vt) => Value::Var( + tv.as_ref().and_then(|tv| match self.var(tv) { + Ok(Some(Defined::Handleable(Handleable::Var(tv)))) => Some(tv), + Ok(None) => Some(tv.clone()), + _ => None, + }), + Box::new(self.value(vt)?), + ), + }) + } + + fn param(&self, pt: &Param<'a>) -> Result, Self::Error> { + Ok(Param { + name: pt.name, + ty: self.value(&pt.ty)?, + }) + } + + fn params(&self, pts: &Vec>) -> Result>, Self::Error> { + pts.iter().map(|pt| self.param(pt)).collect() + } + + fn result( + &self, + rt: &crate::etypes::Result<'a>, + ) -> Result, Self::Error> { + Ok(match rt { + crate::etypes::Result::Unnamed(vt) => crate::etypes::Result::Unnamed(self.value(vt)?), + crate::etypes::Result::Named(pts) => crate::etypes::Result::Named(self.params(pts)?), + }) + } + + fn func(&self, ft: &Func<'a>) -> Result, Self::Error> { + Ok(Func { + params: self.params(&ft.params)?, + result: self.result(&ft.result)?, + }) + } + + fn var(&self, tv: &Tyvar) -> Result>, Self::Error> { + match tv { + Tyvar::Bound(i) => self.subst_bvar(*i), + Tyvar::Free(FreeTyvar::U(o, i)) => self.subst_uvar(*o, *i), + Tyvar::Free(FreeTyvar::E(o, i)) => self.subst_evar(*o, *i), + } + } + + fn handleable(&self, h: &Handleable) -> Result, Self::Error> { + let hh = Defined::Handleable(h.clone()); + match h { + Handleable::Resource(_) => Ok(hh), + Handleable::Var(tv) => Ok(self.var(tv)?.unwrap_or(hh)), + } + } + + fn handleable_(&self, h: &Handleable) -> Result { + match self.handleable(h)? { + Defined::Handleable(h_) => Ok(h_), + _ => panic!("internal invariant a violation: owned/borrowed var is not resource"), + } + } + + fn defined(&self, dt: &Defined<'a>) -> Result, Self::Error> { + Ok(match dt { + Defined::Handleable(h) => self.handleable(h)?, + Defined::Value(vt) => Defined::Value(self.value(vt)?), + Defined::Func(ft) => Defined::Func(self.func(ft)?), + Defined::Instance(it) => Defined::Instance(self.qualified_instance(it)?), + Defined::Component(ct) => Defined::Component(self.component(ct)?), + }) + } + + fn type_bound(&self, tb: &TypeBound<'a>) -> Result, Self::Error> { + Ok(match tb { + TypeBound::Eq(dt) => TypeBound::Eq(self.defined(dt)?), + TypeBound::SubResource => TypeBound::SubResource, + }) + } + + fn bounded_tyvar(&self, btv: &BoundedTyvar<'a>) -> Result, Self::Error> { + Ok(BoundedTyvar { + origin: btv.origin.clone(), + bound: self.type_bound(&btv.bound)?, + }) + } + + fn extern_desc(&self, ed: &ExternDesc<'a>) -> Result, Self::Error> { + Ok(match ed { + ExternDesc::CoreModule(cmt) => ExternDesc::CoreModule(cmt.clone()), + ExternDesc::Func(ft) => ExternDesc::Func(self.func(ft)?), + ExternDesc::Type(dt) => ExternDesc::Type(self.defined(dt)?), + ExternDesc::Instance(it) => ExternDesc::Instance(self.instance(it)?), + ExternDesc::Component(ct) => ExternDesc::Component(self.component(ct)?), + }) + } + + fn extern_decl(&self, ed: &ExternDecl<'a>) -> Result, Self::Error> { + Ok(ExternDecl { + kebab_name: ed.kebab_name, + desc: self.extern_desc(&ed.desc)?, + }) + } + + fn instance(&self, it: &Instance<'a>) -> Result, Self::Error> { + let exports = it + .exports + .iter() + .map(|ed| self.extern_decl(ed)) + .collect::, Self::Error>>()?; + Ok(Instance { exports }) + } + + fn qualified_instance( + &self, + qit: &QualifiedInstance<'a>, + ) -> Result, Self::Error> { + let mut evars = Vec::new(); + let mut sub = self.shifted(); + for evar in &qit.evars { + evars.push(sub.bounded_tyvar(evar)?); + sub.bshift(1); + sub.rbshift(1); + } + let it = sub.instance(&qit.unqualified)?; + Ok(QualifiedInstance { + evars, + unqualified: it, + }) + } + + fn component(&self, ct: &Component<'a>) -> Result, Self::Error> { + let mut uvars = Vec::new(); + let mut sub = self.shifted(); + for uvar in &ct.uvars { + uvars.push(sub.bounded_tyvar(uvar)?); + sub.bshift(1); + sub.rbshift(1); + } + let imports = ct + .imports + .iter() + .map(|ed| sub.extern_decl(ed).map_err(Into::into)) + .collect::>, Self::Error>>()?; + let instance = sub.qualified_instance(&ct.instance)?; + Ok(Component { + uvars, + imports, + instance, + }) + } +} + +/// A substitution that shifts bound variables up by a defined offset. +/// This will generally be accessed through [`Shifted`] below. It is +/// important to ensure that a bound variable produced by a +/// substitution is not captured. +struct RBShift { + rbshift: i32, +} +impl<'a> Shiftable<'a> for RBShift { + type Inner = Self; + fn shifted<'b>(&'b self) -> Shifted<'b, Self::Inner> { + Shifted::new(self) + } +} +impl<'a> Substitution<'a> for RBShift { + type Error = Void; + fn subst_bvar(&self, i: u32) -> Result>, Self::Error> { + Ok(Some(Defined::Handleable(Handleable::Var(Tyvar::Bound( + i.checked_add_signed(self.rbshift).unwrap(), + ))))) + } + fn subst_evar(&self, _o: u32, _i: u32) -> Result>, Self::Error> { + Ok(None) + } + fn subst_uvar(&self, _o: u32, _i: u32) -> Result>, Self::Error> { + Ok(None) + } +} + +/// A substitution that can be converted into a [`Shifted`] +/// substitution. All types other than [`Shifted`] itself should +/// implement this with the obvious option of +/// ``` +/// impl<'a> Shiftable<'a> for A { +/// type Inner = Self; +/// fn shifted<'b>(&'b self) -> Shifted<'b, Self::Inner> { Shifted::new(self) } +/// } +/// ``` +/// Unfortunately, it is not reasonably possible to provide this +/// automatically without specialization. +pub trait Shiftable<'a> { + type Inner: ?Sized + Substitution<'a>; + fn shifted<'c>(&'c self) -> Shifted<'c, Self::Inner>; +} + +/// A "shifted" version of a substitution, used internally to assure +/// that substitution is capture-avoiding. +pub struct Shifted<'b, A: ?Sized> { + /// The substitution which is being shifted + underlying: &'b A, + /// The offset to apply to bound variables before querying the + /// original substitution + bshift: i32, + /// The offset to apply to outer instance indices before + /// querying the original substitution + oshift: i32, + /// The offset to apply to free evar indices before + /// querying the original substitution + eshift: i32, + /// The offset to apply to free uvar indices before + /// querying the original substitution + ushift: i32, + /// The offset to apply to bound variables in the result of the + /// original substitution + rbshift: i32, +} +impl<'b, A: ?Sized> Clone for Shifted<'b, A> { + fn clone(&self) -> Self { + Self { + underlying: self.underlying, + bshift: self.bshift, + oshift: self.oshift, + eshift: self.eshift, + ushift: self.ushift, + rbshift: self.rbshift, + } + } +} +impl<'a, 'b, A: ?Sized + Substitution<'a>> Shiftable<'a> for Shifted<'b, A> { + type Inner = A; + fn shifted<'c>(&'c self) -> Shifted<'c, Self::Inner> { + self.clone() + } +} + +impl<'a, 'b, A: ?Sized + Substitution<'a>> Shifted<'b, A> { + fn new(s: &'b A) -> Self { + Self { + underlying: s, + bshift: 0, + oshift: 0, + eshift: 0, + ushift: 0, + rbshift: 0, + } + } + fn bshift(&mut self, bshift: i32) { + self.bshift += bshift; + } + #[allow(unused)] + fn oshift(&mut self, oshift: i32) { + self.oshift += oshift; + } + #[allow(unused)] + fn ushift(&mut self, ushift: i32) { + self.ushift += ushift; + } + #[allow(unused)] + fn eshift(&mut self, eshift: i32) { + self.eshift += eshift; + } + fn rbshift(&mut self, rbshift: i32) { + self.rbshift += rbshift; + } + + fn sub_rbshift( + &self, + dt: Result>, >::Error>, + ) -> Result>, >::Error> { + match dt { + Ok(Some(dt)) => { + let rbsub = RBShift { + rbshift: self.rbshift, + }; + Ok(Some(rbsub.defined(&dt).not_void())) + } + _ => dt, + } + } +} + +impl<'a, 'b, A: ?Sized + Substitution<'a>> Substitution<'a> for Shifted<'b, A> { + type Error = A::Error; + fn subst_bvar(&self, i: u32) -> Result>, Self::Error> { + match i.checked_add_signed(-self.bshift) { + Some(i) => self.sub_rbshift(self.underlying.subst_bvar(i)), + _ => Ok(None), + } + } + fn subst_evar(&self, o: u32, i: u32) -> Result>, Self::Error> { + match ( + o.checked_add_signed(-self.oshift), + i.checked_add_signed(-self.eshift), + ) { + (Some(o), Some(i)) => self.sub_rbshift(self.underlying.subst_evar(o, i)), + _ => Ok(None), + } + } + fn subst_uvar(&self, o: u32, i: u32) -> Result>, Self::Error> { + match ( + o.checked_add_signed(-self.oshift), + i.checked_add_signed(-self.ushift), + ) { + (Some(o), Some(i)) => self.sub_rbshift(self.underlying.subst_uvar(o, i)), + _ => Ok(None), + } + } +} + +/// Innerizing can fail because a type variable needs to be taken +/// through an `outer_boundary` but cannot be resolved to a concrete +/// type that can be copied. +#[derive(Debug)] +pub enum InnerizeError { + IndefiniteTyvar, +} +/// An innerize substitution is used to bring an outer type alias +/// inwards through one context. +pub struct Innerize<'c, 'p, 'a> { + /// What ctx was this type originally in? + ctx: &'c Ctx<'p, 'a>, + /// Are we crossing an outer_boundary? + outer_boundary: bool, +} +impl<'c, 'p, 'a> Shiftable<'a> for Innerize<'c, 'p, 'a> { + type Inner = Self; + fn shifted<'d>(&'d self) -> Shifted<'d, Self::Inner> { + Shifted::new(self) + } +} +impl<'c, 'p, 'a> Substitution<'a> for Innerize<'c, 'p, 'a> { + type Error = InnerizeError; + fn subst_bvar(&self, _i: u32) -> Result>, Self::Error> { + Ok(None) + } + // Note that even if the variables resolve, what they resolve to + // needs to itself be innerized, since it was also designed for + // this context. + fn subst_evar(&self, o: u32, i: u32) -> Result>, Self::Error> { + if !self.outer_boundary { + Ok(Some(Defined::Handleable(Handleable::Var(Tyvar::Free( + FreeTyvar::E(o + 1, i), + ))))) + } else { + match self.ctx.resolve_tyvar(&Tyvar::Free(FreeTyvar::E(o, i))) { + ResolvedTyvar::Definite(dt) => Ok(Some(self.defined(&dt)?)), + _ => Err(InnerizeError::IndefiniteTyvar), + } + } + } + fn subst_uvar(&self, o: u32, i: u32) -> Result>, Self::Error> { + if !self.outer_boundary { + Ok(Some(Defined::Handleable(Handleable::Var(Tyvar::Free( + FreeTyvar::U(o + 1, i), + ))))) + } else { + match self.ctx.resolve_tyvar(&Tyvar::Free(FreeTyvar::U(o, i))) { + ResolvedTyvar::Definite(dt) => Ok(Some(self.defined(&dt)?)), + _ => Err(InnerizeError::IndefiniteTyvar), + } + } + } +} +impl<'c, 'p, 'a> Innerize<'c, 'p, 'a> { + pub fn new(ctx: &'c Ctx<'p, 'a>, outer_boundary: bool) -> Innerize<'c, 'p, 'a> { + Innerize { + ctx, + outer_boundary, + } + } +} + +/// The empty (void) type +pub enum Void {} + +/// Things that you can call [`not_void`](Unvoidable::not_void) on +pub trait Unvoidable { + type Result; + fn not_void(self) -> Self::Result; +} + +/// Eliminate a Result<_, Void> +impl Unvoidable for Result { + type Result = A; + fn not_void(self) -> A { + match self { + Ok(x) => x, + Err(v) => match v {}, + } + } +} + +/// An opening substitution is used to map bound variables into +/// free variables. Note that because of the differences in ordering +/// for bound variable indices (inside out) and context variables +/// (left to right, but variables are inserted in outside-in order), +/// `Bound(0)` gets mapped to `Free(0, base + n)`. +pub struct Opening { + /// Whether to produce E or U free variables + is_universal: bool, + /// At what index in the context are the free variables being + /// inserted? + free_base: u32, + /// How many bound variables are being shifted to the context + how_many: u32, +} +impl<'a> Shiftable<'a> for Opening { + type Inner = Self; + fn shifted<'d>(&'d self) -> Shifted<'d, Self::Inner> { + Shifted::new(self) + } +} +impl<'a> Substitution<'a> for Opening { + type Error = Void; + fn subst_bvar(&self, i: u32) -> Result>, Void> { + let mk = |i| { + let fi = self.free_base + self.how_many - i - 1; + if self.is_universal { + FreeTyvar::U(0, fi) + } else { + FreeTyvar::E(0, fi) + } + }; + Ok(if i < self.how_many { + Some(Defined::Handleable(Handleable::Var(Tyvar::Free(mk(i))))) + } else { + None + }) + } + fn subst_evar(&self, _o: u32, _i: u32) -> Result>, Void> { + Ok(None) + } + fn subst_uvar(&self, _o: u32, _i: u32) -> Result>, Void> { + Ok(None) + } +} +impl Opening { + pub fn new(is_universal: bool, free_base: u32) -> Self { + Opening { + is_universal, + free_base, + how_many: 0, + } + } + pub fn next(&mut self) { + self.how_many += 1; + } +} + +/// A closing substitution is used to map free variables into bound +/// variables when converting a type being built in a context to a +/// closed(ish) type that is above that context. +/// +/// Like [`Opening`], a given [`Closing`] substitution either affects +/// only existential variables or affects only universal variables, as +/// these are closed at different times. +pub struct Closing { + /// If this substitution applies to universal variables, this + /// keeps track of which ones are imported and which are + /// not. Non-imported universal variables may not be referred to + /// in types. + /// + /// Invariant: If this is provided, its length must be equal to + /// self.how_many + universal_imported: Option>, + /// How many of the relevant (u/e) free vars are valid at this point. + how_many: u32, +} +impl Closing { + pub fn new(is_universal: bool) -> Self { + let universal_imported = if is_universal { Some(Vec::new()) } else { None }; + Closing { + universal_imported, + how_many: 0, + } + } + fn is_universal(&self) -> bool { + self.universal_imported.is_some() + } + pub fn next_u(&mut self, imported: bool) { + let Some(ref mut importeds) = self.universal_imported else { + panic!("next_u called on existential Closing"); + }; + importeds.push(imported); + self.how_many += 1; + } + pub fn next_e(&mut self) { + if self.is_universal() { + panic!("next_e called on universal Closing"); + }; + self.how_many += 1; + } + fn subst_uevar<'a>( + &self, + ue_is_u: bool, + o: u32, + i: u32, + ) -> Result>, ClosingError> { + if self.is_universal() ^ ue_is_u { + return Ok(None); + } + let mk_ue = |o, i| { + if self.is_universal() { + Tyvar::Free(FreeTyvar::U(o, i)) + } else { + Tyvar::Free(FreeTyvar::E(o, i)) + } + }; + let mk = |v| Ok(Some(Defined::Handleable(Handleable::Var(v)))); + if o > 0 { + return mk(mk_ue(o - 1, i)); + } + if i >= self.how_many { + return Err(ClosingError::UnknownVar(false, i)); + } + let bidx = if let Some(imported) = &self.universal_imported { + if !imported[i as usize] { + return Err(ClosingError::UnimportedVar(i)); + } + imported[i as usize..].iter().filter(|x| **x).count() as u32 - 1 + } else { + self.how_many - i - 1 + }; + mk(Tyvar::Bound(bidx)) + } +} +impl<'a> Shiftable<'a> for Closing { + type Inner = Self; + fn shifted<'d>(&'d self) -> Shifted<'d, Self::Inner> { + Shifted::new(self) + } +} +/// Closing can fail for a few reasons: +#[derive(Debug)] +#[allow(unused)] +pub enum ClosingError { + /// A variable was encountered that isn't currently being moved to + /// a bound variable. This is an internal invariant violation in + /// the typechecker, not an issue of a malformed input type. + UnknownVar(bool, u32), + /// A universal variable wasn't imported. This is probably an + /// internal invariant violation in the typechecker. + UnimportedVar(u32), +} +impl<'a> Substitution<'a> for Closing { + type Error = ClosingError; + fn subst_bvar(&self, _: u32) -> Result>, ClosingError> { + Ok(None) + } + fn subst_evar(&self, o: u32, i: u32) -> Result>, ClosingError> { + self.subst_uevar(false, o, i) + } + fn subst_uvar(&self, o: u32, i: u32) -> Result>, ClosingError> { + self.subst_uevar(true, o, i) + } +} diff --git a/src/hyperlight_component_util/src/subtype.rs b/src/hyperlight_component_util/src/subtype.rs new file mode 100644 index 000000000..cb39eea4d --- /dev/null +++ b/src/hyperlight_component_util/src/subtype.rs @@ -0,0 +1,256 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + */ + +use itertools::Itertools; + +use crate::etypes::{ + Component, Ctx, Defined, Func, Handleable, Name, QualifiedInstance, ResourceId, TypeBound, + Tyvar, Value, +}; +use crate::tv::ResolvedTyvar; + +/// The various ways in which a value can fail to be a subtype of another +#[derive(Debug)] +#[allow(dead_code)] +pub enum Error<'r> { + /// An unnamed value that was expected was missing (e.g. in a + /// tuple or variant case) + MissingValue(Value<'r>), + /// A record field that was expected was missing + MissingRecordField(Name<'r>), + /// A variant case that was expected was missing + MissingVariantCase(Name<'r>), + /// A value type was present, but incompatible with its expected type + MismatchedValue(Value<'r>, Value<'r>), + /// A defined type was present, but incompatible with its expected type + MismatchedDefined(Defined<'r>, Defined<'r>), + /// A resource was present, but was not the same resource as was expected + MismatchedResources(ResourceId, ResourceId), + /// A type variable could not be resolved to be the same as the + /// expected one + MismatchedVars(Tyvar, Tyvar), + /// A resource was expected but a non-resource tyvar was found, or + /// vice versa + MismatchedResourceVar(Tyvar, ResourceId), + /// A handle was taken to something that wasn't a + /// resource. Strictly speaking, this might be a well-formedness + /// error on one side or the other rather than a subtyping error + NotResource(Handleable), +} + +/// # Subtyping +/// +/// Most of this is a very direct translation of the subset of the +/// OCaml reference interpreter that we need here. Most of the bits +/// with variables and instantiation that require being quite careful +/// are not involved here, since during the elaboration that we are +/// doing we never need to fully subtype entire component types, which +/// makes this quite a bit simpler. +impl<'p, 'a> Ctx<'p, 'a> { + pub fn subtype_value<'r>( + &self, + vt1: &'r Value<'a>, + vt2: &'r Value<'a>, + ) -> Result<(), Error<'a>> { + use Value::*; + use itertools::EitherOrBoth::*; + match (vt1, vt2) { + (Bool, Bool) => Ok(()), + (S(w1), S(w2)) if w1 == w2 => Ok(()), + (U(w1), U(w2)) if w1 == w2 => Ok(()), + (F(w1), F(w2)) if w1 == w2 => Ok(()), + (Char, Char) => Ok(()), + (String, String) => Ok(()), + (List(vt1), List(vt2)) => self.subtype_value(vt1, vt2), + (Record(rfs1), Record(rfs2)) => { + for rf2 in rfs2.iter() { + match rfs1.iter().find(|rf| rf2.name.name == rf.name.name) { + None => return Err(Error::MissingRecordField(rf2.name)), + Some(rf1) => self.subtype_value(&rf1.ty, &rf2.ty)?, + } + } + Ok(()) + } + (Tuple(vts1), Tuple(vts2)) => { + vts1.iter() + .zip_longest(vts2.iter()) + .try_for_each(|vs| match vs { + Both(vt1, vt2) => self.subtype_value(vt1, vt2), + Left(_) => Ok(()), + Right(vt2) => Err(Error::MissingValue(vt2.clone())), + }) + } + (Flags(ns1), Flags(ns2)) => ns2 + .iter() + .find(|n2| !ns1.iter().any(|n| n.name == n2.name)) + .map_or(Ok(()), |n| Err(Error::MissingRecordField(*n))), + (Variant(vcs1), Variant(vcs2)) => { + for vc1 in vcs1.iter() { + match vcs2.iter().find(|vc| vc1.name.name == vc.name.name) { + None => return Err(Error::MissingVariantCase(vc1.name)), + Some(vc2) => self.subtype_value_option(&vc1.ty, &vc2.ty)?, + } + } + Ok(()) + } + (Enum(ns1), Enum(ns2)) => ns1 + .iter() + .find(|n1| !ns2.iter().any(|n| n.name == n1.name)) + .map_or(Ok(()), |n| Err(Error::MissingVariantCase(*n))), + (Option(vt1), Option(vt2)) => self.subtype_value(vt1, vt2), + (Result(vt11, vt12), Result(vt21, vt22)) => self + .subtype_value_option(vt11, vt21) + .and(self.subtype_value_option(vt12, vt22)), + (Own(ht1), Own(ht2)) | (Borrow(ht1), Borrow(ht2)) => { + self.subtype_handleable_is_resource(ht1)?; + self.subtype_handleable_is_resource(ht2)?; + self.subtype_handleable(ht1, ht2) + } + (Var(_, vt1), vt2) => self.subtype_value(vt1, vt2), + (vt1, Var(_, vt2)) => self.subtype_value(vt1, vt2), + _ => Err(Error::MismatchedValue(vt1.clone(), vt2.clone())), + } + } + pub fn subtype_value_option<'r>( + &self, + vt1: &'r Option>, + vt2: &'r Option>, + ) -> Result<(), Error<'a>> { + match (vt1, vt2) { + (None, None) => Ok(()), + (None, Some(vt2)) => Err(Error::MissingValue(vt2.clone())), + (Some(_), None) => Ok(()), + (Some(vt1), Some(vt2)) => self.subtype_value(vt1, vt2), + } + } + pub fn subtype_var_var<'r>(&self, v1: &'r Tyvar, v2: &'r Tyvar) -> Result<(), Error<'a>> { + match (self.resolve_tyvar(v1), self.resolve_tyvar(v2)) { + (ResolvedTyvar::Definite(dt1), ResolvedTyvar::Definite(dt2)) => { + self.subtype_defined(&dt1, &dt2) + } + (ResolvedTyvar::E(o1, i1, _), ResolvedTyvar::E(o2, i2, _)) if o1 == o2 && i1 == i2 => { + Ok(()) + } + (ResolvedTyvar::U(o1, i1, _), ResolvedTyvar::U(o2, i2, _)) if o1 == o2 && i1 == i2 => { + Ok(()) + } + (ResolvedTyvar::Bound(_), _) | (_, ResolvedTyvar::Bound(_)) => { + panic!("internal invariant violation: stray bvar in subtype_var_var") + } + _ => Err(Error::MismatchedVars(v1.clone(), v2.clone())), + } + } + pub fn subtype_var_resource<'r>( + &self, + v1: &'r Tyvar, + rid2: &'r ResourceId, + ) -> Result<(), Error<'a>> { + match self.resolve_tyvar(v1) { + ResolvedTyvar::Definite(Defined::Handleable(Handleable::Resource(rid1))) + if rid1 == *rid2 => + { + Ok(()) + } + _ => Err(Error::MismatchedResourceVar(v1.clone(), *rid2)), + } + } + pub fn subtype_resource_var<'r>( + &self, + rid1: &'r ResourceId, + v2: &'r Tyvar, + ) -> Result<(), Error<'a>> { + match self.resolve_tyvar(v2) { + ResolvedTyvar::Definite(Defined::Handleable(Handleable::Resource(rid2))) + if *rid1 == rid2 => + { + Ok(()) + } + _ => Err(Error::MismatchedResourceVar(v2.clone(), *rid1)), + } + } + pub fn subtype_handleable<'r>( + &self, + ht1: &'r Handleable, + ht2: &'r Handleable, + ) -> Result<(), Error<'a>> { + match (ht1, ht2) { + (Handleable::Var(v1), Handleable::Var(v2)) => self.subtype_var_var(v1, v2), + (Handleable::Var(v1), Handleable::Resource(rid2)) => { + self.subtype_var_resource(v1, rid2) + } + (Handleable::Resource(rid1), Handleable::Var(v2)) => { + self.subtype_resource_var(rid1, v2) + } + (Handleable::Resource(rid1), Handleable::Resource(rid2)) => { + if rid1 == rid2 { + Ok(()) + } else { + Err(Error::MismatchedResources(*rid1, *rid2)) + } + } + } + } + pub fn subtype_func<'r>( + &self, + _ft1: &'r Func<'a>, + _ft2: &'r Func<'a>, + ) -> Result<(), Error<'a>> { + panic!("func <: func should be impossible to encounter during type elaboration") + } + pub fn subtype_qualified_instance<'r>( + &self, + _qi1: &'r QualifiedInstance<'a>, + _qi2: &'r QualifiedInstance<'a>, + ) -> Result<(), Error<'a>> { + panic!("qinstance <: qinstance should be impossible to encounter during type elaboration") + } + pub fn subtype_component<'r>( + &self, + _ct1: &'r Component<'a>, + _ct2: &'r Component<'a>, + ) -> Result<(), Error<'a>> { + panic!("component <: component should be impossible to encounter during type elaboration") + } + pub fn subtype_defined<'r>( + &self, + dt1: &'r Defined<'a>, + dt2: &'r Defined<'a>, + ) -> Result<(), Error<'a>> { + match (dt1, dt2) { + (Defined::Handleable(ht1), Defined::Handleable(ht2)) => { + self.subtype_handleable(ht1, ht2) + } + (Defined::Value(vt1), Defined::Value(vt2)) => self.subtype_value(vt1, vt2), + (Defined::Func(ft1), Defined::Func(ft2)) => self.subtype_func(ft1, ft2), + (Defined::Instance(it1), Defined::Instance(it2)) => { + self.subtype_qualified_instance(it1, it2) + } + (Defined::Component(ct1), Defined::Component(ct2)) => self.subtype_component(ct1, ct2), + _ => Err(Error::MismatchedDefined(dt1.clone(), dt2.clone())), + } + } + pub fn subtype_handleable_is_resource<'r>(&self, ht: &'r Handleable) -> Result<(), Error<'a>> { + match ht { + Handleable::Resource(_) => Ok(()), + Handleable::Var(tv) => match self.resolve_tyvar(tv) { + ResolvedTyvar::Definite(Defined::Handleable(Handleable::Resource(_))) => Ok(()), + ResolvedTyvar::E(_, _, TypeBound::SubResource) => Ok(()), + ResolvedTyvar::U(_, _, TypeBound::SubResource) => Ok(()), + _ => Err(Error::NotResource(ht.clone())), + }, + } + } +} diff --git a/src/hyperlight_component_util/src/tv.rs b/src/hyperlight_component_util/src/tv.rs new file mode 100644 index 000000000..00318f36b --- /dev/null +++ b/src/hyperlight_component_util/src/tv.rs @@ -0,0 +1,119 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + */ + +use crate::etypes::{ + BoundedTyvar, Ctx, Defined, FreeTyvar, Handleable, ImportExport, TypeBound, Tyvar, +}; +use crate::substitute::{self, Substitution, Unvoidable}; + +/// The most information we possibly have about a type variable +pub enum ResolvedTyvar<'a> { + /// Invariant: the head of this [`Defined`] is not `[Defined::Handleable]([HHandleable::Var](...))` + Definite(Defined<'a>), + /// It's just some bound var... so there is no way to look it up. + #[allow(unused)] + Bound(u32), + /// Invariant: the `TypeBound` is not `TypeBound::Eq` + E(u32, u32, TypeBound<'a>), + /// Invariant: the `TypeBound` is not `TypeBound::Eq` + U(u32, u32, TypeBound<'a>), +} + +impl<'p, 'a> Ctx<'p, 'a> { + /// Look up a universal variable in the context, panicking if it doesn't exist + fn lookup_uvar<'c>(&'c self, o: u32, i: u32) -> &'c (BoundedTyvar<'a>, bool) { + // unwrap because failure is an internal invariant violation + &self.parents().nth(o as usize).unwrap().uvars[i as usize] + } + /// Look up an existential variable in the context, panicking if it doesn't exist + fn lookup_evar<'c>(&'c self, o: u32, i: u32) -> &'c (BoundedTyvar<'a>, Option>) { + // unwrap because failure is an internal invariant violation + &self.parents().nth(o as usize).unwrap().evars[i as usize] + } + /// Find a bound for the given free tyvar. Panics if given a + /// TV_bound; by the time you call this, you should have used + /// bound_to_[e/u]var. + pub fn var_bound<'c>(&'c self, tv: &Tyvar) -> &'c TypeBound<'a> { + match tv { + Tyvar::Bound(_) => panic!("Requested bound for Bound tyvar"), + Tyvar::Free(FreeTyvar::U(o, i)) => &self.lookup_uvar(*o, *i).0.bound, + Tyvar::Free(FreeTyvar::E(o, i)) => &self.lookup_evar(*o, *i).0.bound, + } + } + /// Try really hard to resolve a tyvar to a definite type or a + /// descriptive bound. + pub fn resolve_tyvar<'c>(&'c self, v: &Tyvar) -> ResolvedTyvar<'a> { + let check_deftype = |dt: &Defined<'a>| match dt { + Defined::Handleable(Handleable::Var(v_)) => self.resolve_tyvar(v_), + _ => ResolvedTyvar::Definite(dt.clone()), + }; + match *v { + Tyvar::Bound(i) => ResolvedTyvar::Bound(i), + Tyvar::Free(FreeTyvar::E(o, i)) => { + let (tv, def) = self.lookup_evar(o, i); + match (&tv.bound, def) { + (TypeBound::Eq(dt), _) => check_deftype(dt), + (_, Some(dt)) => check_deftype(dt), + (tb, _) => ResolvedTyvar::E(o, i, tb.clone()), + } + } + Tyvar::Free(FreeTyvar::U(o, i)) => { + let (tv, _) = self.lookup_uvar(o, i); + match &tv.bound { + TypeBound::Eq(dt) => check_deftype(dt), + tb => ResolvedTyvar::U(o, i, tb.clone()), + } + } + } + } + /// Modify the context to move the given variables into it as + /// existential variables and compute a substitution + /// that replaces bound variable references to them with free + /// variable references + pub fn bound_to_evars( + &mut self, + origin: Option<&'a str>, + vs: &[BoundedTyvar<'a>], + ) -> substitute::Opening { + let mut sub = substitute::Opening::new(false, self.evars.len() as u32); + for var in vs { + let var = var.push_origin(origin.map(ImportExport::Export)); + let bound = sub.bounded_tyvar(&var).not_void(); + self.evars.push((bound, None)); + sub.next(); + } + sub + } + /// Modify the context to move the given variables into it as + /// universal variables and compute a substitution that replaces + /// bound variable references to them with free variable + /// references + pub fn bound_to_uvars( + &mut self, + origin: Option<&'a str>, + vs: &[BoundedTyvar<'a>], + imported: bool, + ) -> substitute::Opening { + let mut sub = substitute::Opening::new(true, self.uvars.len() as u32); + for var in vs { + let var = var.push_origin(origin.map(ImportExport::Import)); + let bound = sub.bounded_tyvar(&var).not_void(); + self.uvars.push((bound, imported)); + sub.next(); + } + sub + } +} diff --git a/src/hyperlight_component_util/src/util.rs b/src/hyperlight_component_util/src/util.rs new file mode 100644 index 000000000..8f9b853ae --- /dev/null +++ b/src/hyperlight_component_util/src/util.rs @@ -0,0 +1,69 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + */ + +//! General utilities for bindgen macros +use crate::etypes; + +/// Read and parse a WIT type encapsulated in a wasm file from the +/// given filename, relative to the cargo manifest directory. +pub fn read_wit_type_from_file R>( + filename: impl AsRef, + mut cb: F, +) -> R { + let path = std::path::Path::new(&filename); + let manifest_dir = std::env::var("CARGO_MANIFEST_DIR").unwrap(); + let manifest_dir = std::path::Path::new(&manifest_dir); + let path = manifest_dir.join(path); + + let bytes = std::fs::read(path).unwrap(); + let i = wasmparser::Parser::new(0).parse_all(&bytes); + let ct = crate::component::read_component_single_exported_type(i); + + // because of the two-level encapsulation scheme, we need to look + // for the single export of the component type that we just read + if !ct.uvars.is_empty() + || !ct.imports.is_empty() + || !ct.instance.evars.is_empty() + || ct.instance.unqualified.exports.len() != 1 + { + panic!("malformed component type container for wit type"); + }; + let export = &ct.instance.unqualified.exports[0]; + use etypes::ExternDesc; + let ExternDesc::Component(ct) = &export.desc else { + panic!("malformed component type container: does not contain component type"); + }; + log::debug!("hcm: considering component type {:?}", ct); + cb(export.kebab_name.to_string(), ct) +} + +/// Deal with `$HYPERLIGHT_COMPONENT_MACRO_DEBUG`: if it is present, +/// save the given token stream (representing the result of +/// macroexpansion) to the debug file and include that file instead of +/// directly returning the given token stream. +pub fn emit_decls(decls: proc_macro2::TokenStream) -> proc_macro2::TokenStream { + if let Ok(dbg_out) = std::env::var("HYPERLIGHT_COMPONENT_MACRO_DEBUG") { + if let Ok(file) = syn::parse2(decls.clone()) { + std::fs::write(&dbg_out, prettyplease::unparse(&file)).unwrap(); + } else { + let decls = format!("{}", &decls); + std::fs::write(&dbg_out, &decls).unwrap(); + } + quote::quote! { include!(#dbg_out); } + } else { + decls + } +} diff --git a/src/hyperlight_component_util/src/wf.rs b/src/hyperlight_component_util/src/wf.rs new file mode 100644 index 000000000..db15a7613 --- /dev/null +++ b/src/hyperlight_component_util/src/wf.rs @@ -0,0 +1,398 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + */ + +//! Component type well-formedness +//! +//! This is a pretty direct port of the relevant sections of the OCaml +//! reference interpreter. +use itertools::Itertools; + +use crate::etypes::{ + BoundedTyvar, Component, Ctx, Defined, ExternDecl, ExternDesc, Func, Handleable, Instance, + Name, Param, QualifiedInstance, RecordField, TypeBound, Value, VariantCase, +}; +use crate::substitute::{Substitution, Unvoidable}; +use crate::subtype; + +/// The various position metadata that affect what value types are +/// well-formed +#[derive(Clone, Copy)] +struct ValueTypePosition { + /// Is this well-formedness check for a type that is part of the + /// parameter type of a function? (Borrows should be allowed) + is_param: bool, + dtp: DefinedTypePosition, +} + +impl From for ValueTypePosition { + fn from(p: DefinedTypePosition) -> ValueTypePosition { + ValueTypePosition { + is_param: false, + dtp: p, + } + } +} +impl ValueTypePosition { + fn not_anon_export(self) -> Self { + ValueTypePosition { + dtp: self.dtp.not_anon_export(), + ..self + } + } + fn anon_export(self) -> Self { + ValueTypePosition { + dtp: self.dtp.anon_export(), + ..self + } + } +} + +/// The various position metadata that affect what defined types are +/// well-formed +#[derive(Clone, Copy)] +pub struct DefinedTypePosition { + /// Is this well-formedness check for a type one that should be + /// exportable (e.g. one that is being + /// exported/imported/outer-aliased-through-an-outer-boundary)? + /// (Bare resource types should be disallowed) + is_export: bool, + /// Is this well-formedness check for a type that should be + /// allowed in an "unnamed" export (i.e. nested under some other + /// type constructor in an export)? (Record, variant, enum, and + /// flags types, which must always be named in exports due to WIT + /// constraints, should not be allowed). + is_anon_export: bool, +} +impl DefinedTypePosition { + pub fn internal() -> Self { + DefinedTypePosition { + is_export: false, + is_anon_export: false, + } + } + pub fn export() -> Self { + DefinedTypePosition { + is_export: true, + is_anon_export: false, + } + } + fn not_anon_export(self) -> Self { + DefinedTypePosition { + is_anon_export: false, + ..self + } + } + fn anon_export(self) -> Self { + DefinedTypePosition { + is_anon_export: true, + ..self + } + } +} + +/// There are several ways in which a type may be ill-formed: +#[derive(Debug)] +#[allow(dead_code)] +pub enum Error<'a> { + /// A component/instance exported a bare resource type not behind + /// a tyvar (and therefore not named) + BareResourceExport, + /// A component/instance exported certain complex value types not + /// behind a tyvar (and therefore not named) + BareComplexValTypeExport(Value<'a>), + /// A record has multiple fields with the same name + DuplicateRecordField(Name<'a>), + /// A variant has multiple cases with the same name + DuplicateVariantField(Name<'a>), + /// A variant case is marked as refining another case, but that + /// case does not exist + NonexistentVariantRefinement(u32), + /// A variant case is marked as refining another case, but its + /// associated value is not a subtype of the value of the refined + /// case + IncompatibleVariantRefinement(subtype::Error<'a>), + /// A flags has multiple flags with the same name + DuplicateFlagsName(Name<'a>), + /// An enum has multiple cases with the same name + DuplicateEnumName(Name<'a>), + /// An import/export has the same name as another; the boolean is + /// true if it is an import + DuplicateExternName(&'a str, bool), + /// A value type owns or borrows a type that is not a resource type + NotAResource(subtype::Error<'a>), + /// A borrow type exists somewhere other than a function parameter + BorrowOutsideParam, +} + +fn error_if_duplicates_by( + i: impl Iterator, + f: impl FnMut(&T) -> U, + e: impl Fn(T) -> E, +) -> Result<(), E> { + let mut duplicates = i.duplicates_by(f); + if let Some(x) = duplicates.next() { + Err(e(x)) + } else { + Ok(()) + } +} + +/// # Well-formedness +/// +/// Most of this is a very direct translation of the specification +/// (Well-formedness subsections of section 3.4 Type Elaboration). +impl<'p, 'a> Ctx<'p, 'a> { + fn wf_record_fields<'r>( + &'r self, + p: ValueTypePosition, + rfs: &'r [RecordField<'a>], + ) -> Result<(), Error<'a>> { + rfs.iter() + .try_for_each(|rf: &'r RecordField<'a>| self.wf_value(p, &rf.ty))?; + error_if_duplicates_by( + rfs.iter(), + |&rf| rf.name.name, + |rf| Error::DuplicateRecordField(rf.name), + )?; + Ok(()) + } + fn wf_variant_cases<'r>( + &'r self, + p: ValueTypePosition, + vcs: &'r [VariantCase<'a>], + ) -> Result<(), Error<'a>> { + vcs.iter() + .try_for_each(|vc: &'r VariantCase<'a>| self.wf_value_option(p, &vc.ty))?; + error_if_duplicates_by( + vcs.iter(), + |&vc| vc.name.name, + |vc| Error::DuplicateVariantField(vc.name), + )?; + for vc in vcs { + if let Some(ri) = vc.refines { + let rvc = vcs + .get(ri as usize) + .ok_or(Error::NonexistentVariantRefinement(ri))?; + self.subtype_value_option(&vc.ty, &rvc.ty) + .map_err(Error::IncompatibleVariantRefinement)?; + } + } + Ok(()) + } + fn wf_value<'r>(&'r self, p: ValueTypePosition, vt: &'r Value<'a>) -> Result<(), Error<'a>> { + let anon_err: Result<(), Error<'a>> = if p.dtp.is_export && p.dtp.is_anon_export { + Err(Error::BareComplexValTypeExport(vt.clone())) + } else { + Ok(()) + }; + let p_ = p.anon_export(); + let resource_err = |h| { + self.wf_handleable(p.dtp, h).and( + self.subtype_handleable_is_resource(h) + .map_err(Error::NotAResource), + ) + }; + match vt { + Value::Bool => Ok(()), + Value::S(_) => Ok(()), + Value::U(_) => Ok(()), + Value::F(_) => Ok(()), + Value::Char => Ok(()), + Value::String => Ok(()), + Value::List(vt) => self.wf_value(p_, vt), + Value::Record(rfs) => anon_err.and(self.wf_record_fields(p_, rfs)), + Value::Variant(vcs) => anon_err.and(self.wf_variant_cases(p_, vcs)), + Value::Flags(ns) => anon_err.and(error_if_duplicates_by( + ns.iter(), + |&n| n.name, + |n| Error::DuplicateFlagsName(*n), + )), + Value::Enum(ns) => anon_err.and(error_if_duplicates_by( + ns.iter(), + |&n| n.name, + |n| Error::DuplicateEnumName(*n), + )), + Value::Option(vt) => self.wf_value(p_, vt), + Value::Tuple(vs) => vs + .iter() + .try_for_each(|vt: &'r Value<'a>| self.wf_value(p_, vt)), + Value::Result(vt1, vt2) => self + .wf_value_option(p_, vt1) + .and(self.wf_value_option(p_, vt2)), + Value::Own(h) => resource_err(h), + Value::Borrow(h) => { + if p.is_param { + resource_err(h) + } else { + Err(Error::BorrowOutsideParam) + } + } + Value::Var(tv, vt) => tv + .as_ref() + .map(|tv| self.wf_type_bound(p.dtp, self.var_bound(tv))) + .unwrap_or(Ok(())) + .and(self.wf_value(p.not_anon_export(), vt)), + } + } + fn wf_value_option<'r>( + &'r self, + p: ValueTypePosition, + vt: &'r Option>, + ) -> Result<(), Error<'a>> { + vt.as_ref().map_or(Ok(()), |ty| self.wf_value(p, ty)) + } + fn wf_func<'r>(&'r self, p: DefinedTypePosition, ft: &'r Func<'a>) -> Result<(), Error<'a>> { + let p_ = p.anon_export(); + let param_pos = ValueTypePosition { + is_param: true, + dtp: p_, + }; + let result_pos = ValueTypePosition { + is_param: false, + dtp: p_, + }; + ft.params + .iter() + .try_for_each(|fp: &'r Param<'a>| self.wf_value(param_pos, &fp.ty))?; + match &ft.result { + crate::etypes::Result::Unnamed(vt) => self.wf_value(result_pos, vt), + crate::etypes::Result::Named(ps) => ps + .iter() + .try_for_each(|fp: &'r Param<'a>| self.wf_value(result_pos, &fp.ty)), + } + } + fn wf_type_bound<'r>( + &'r self, + p: DefinedTypePosition, + tb: &'r TypeBound<'a>, + ) -> Result<(), Error<'a>> { + match tb { + TypeBound::SubResource => Ok(()), + TypeBound::Eq(dt) => self.wf_defined(p.not_anon_export(), dt), + } + } + fn wf_bounded_tyvar<'r>( + &'r self, + p: DefinedTypePosition, + btv: &'r BoundedTyvar<'a>, + ) -> Result<(), Error<'a>> { + match &btv.bound { + TypeBound::SubResource => Ok(()), + TypeBound::Eq(dt) => self.wf_defined(p, dt), + } + } + + fn wf_handleable<'r>( + &'r self, + p: DefinedTypePosition, + ht: &'r Handleable, + ) -> Result<(), Error<'a>> { + match ht { + Handleable::Var(tv) => self.wf_type_bound(p, self.var_bound(tv)), + Handleable::Resource(rid) => { + if p.is_export { + Err(Error::BareResourceExport) + } else { + // Internal invariant: rtidx should always exist + assert!((rid.id as usize) < self.rtypes.len()); + Ok(()) + } + } + } + } + pub fn wf_defined<'r>( + &'r self, + p: DefinedTypePosition, + dt: &'r Defined<'a>, + ) -> Result<(), Error<'a>> { + match dt { + Defined::Handleable(ht) => self.wf_handleable(p, ht), + Defined::Value(vt) => self.wf_value(p.into(), vt), + Defined::Func(ft) => self.wf_func(p, ft), + Defined::Instance(it) => self.wf_qualified_instance(p, it), + Defined::Component(ct) => self.wf_component(p, ct), + } + } + fn wf_extern_desc<'r>( + &self, + p: DefinedTypePosition, + ed: &'r ExternDesc<'a>, + ) -> Result<(), Error<'a>> { + match ed { + ExternDesc::CoreModule(_) => Ok(()), + ExternDesc::Func(ft) => self.wf_func(p, ft), + ExternDesc::Type(dt) => self.wf_defined(p, dt), + ExternDesc::Instance(it) => self.wf_instance(p, it), + ExternDesc::Component(ct) => self.wf_component(p, ct), + } + } + fn wf_extern_decl<'r>( + &self, + p: DefinedTypePosition, + ed: &'r ExternDecl<'a>, + ) -> Result<(), Error<'a>> { + self.wf_extern_desc(p, &ed.desc) + } + fn wf_instance<'r>( + &self, + p: DefinedTypePosition, + it: &'r Instance<'a>, + ) -> Result<(), Error<'a>> { + error_if_duplicates_by( + it.exports.iter(), + |&ex| ex.kebab_name, + |ex| Error::DuplicateExternName(ex.kebab_name, false), + )?; + it.exports + .iter() + .try_for_each(|ed| self.wf_extern_decl(p, ed)) + } + pub fn wf_qualified_instance<'r>( + &self, + p: DefinedTypePosition, + qit: &'r QualifiedInstance<'a>, + ) -> Result<(), Error<'a>> { + let mut ctx_ = self.clone(); + let subst = ctx_.bound_to_evars(None, &qit.evars); + ctx_.evars + .iter() + .try_for_each(|(btv, _)| ctx_.wf_bounded_tyvar(p, btv))?; + let it = subst.instance(&qit.unqualified).not_void(); + ctx_.wf_instance(p, &it) + } + pub fn wf_component<'r>( + &self, + p: DefinedTypePosition, + ct: &'r Component<'a>, + ) -> Result<(), Error<'a>> { + let mut ctx_ = self.clone(); + let subst = ctx_.bound_to_uvars(None, &ct.uvars, false); + ctx_.uvars + .iter() + .try_for_each(|(btv, _)| ctx_.wf_bounded_tyvar(p, btv))?; + error_if_duplicates_by( + ct.imports.iter(), + |&im| im.kebab_name, + |im| Error::DuplicateExternName(im.kebab_name, true), + )?; + ct.imports + .iter() + .map(|ed| subst.extern_decl(ed).not_void()) + .try_for_each(|ed| ctx_.wf_extern_decl(p, &ed))?; + let it = subst.qualified_instance(&ct.instance).not_void(); + ctx_.wf_qualified_instance(p, &it) + } +} diff --git a/src/hyperlight_host/Cargo.toml b/src/hyperlight_host/Cargo.toml index b1dddbae0..0717b1e7e 100644 --- a/src/hyperlight_host/Cargo.toml +++ b/src/hyperlight_host/Cargo.toml @@ -33,7 +33,7 @@ log = "0.4.27" tracing = { version = "0.1.41", features = ["log"] } tracing-log = "0.2.0" tracing-core = "0.1.33" -hyperlight-common = { workspace = true, default-features = true } +hyperlight-common = { workspace = true, default-features = true, features = [ "std" ] } vmm-sys-util = "0.14.0" crossbeam = "0.8.0" crossbeam-channel = "0.5.15" @@ -102,6 +102,7 @@ metrics-util = "0.19.1" metrics-exporter-prometheus = "0.17.0" tracing-tracy = "0.11.4" serde_json = "1.0" +hyperlight-component-macro = { workspace = true } [target.'cfg(windows)'.dev-dependencies] windows = { version = "0.61", features = [ diff --git a/src/hyperlight_host/benches/benchmarks.rs b/src/hyperlight_host/benches/benchmarks.rs index 317b29956..ff4cc6ccc 100644 --- a/src/hyperlight_host/benches/benchmarks.rs +++ b/src/hyperlight_host/benches/benchmarks.rs @@ -16,7 +16,9 @@ limitations under the License. use criterion::{Criterion, criterion_group, criterion_main}; use hyperlight_host::GuestBinary; -use hyperlight_host::sandbox::{MultiUseSandbox, SandboxConfiguration, UninitializedSandbox}; +use hyperlight_host::sandbox::{ + Callable, MultiUseSandbox, SandboxConfiguration, UninitializedSandbox, +}; use hyperlight_host::sandbox_state::sandbox::EvolvableSandbox; use hyperlight_host::sandbox_state::transition::Noop; use hyperlight_testing::simple_guest_as_string; diff --git a/src/hyperlight_host/examples/func_ctx/main.rs b/src/hyperlight_host/examples/func_ctx/main.rs index 9a6862cb8..ce427a651 100644 --- a/src/hyperlight_host/examples/func_ctx/main.rs +++ b/src/hyperlight_host/examples/func_ctx/main.rs @@ -15,7 +15,7 @@ limitations under the License. */ use hyperlight_host::func::call_ctx::MultiUseGuestCallContext; -use hyperlight_host::sandbox::{MultiUseSandbox, UninitializedSandbox}; +use hyperlight_host::sandbox::{Callable, MultiUseSandbox, UninitializedSandbox}; use hyperlight_host::sandbox_state::sandbox::EvolvableSandbox; use hyperlight_host::sandbox_state::transition::Noop; use hyperlight_host::{GuestBinary, Result}; diff --git a/src/hyperlight_host/examples/logging/main.rs b/src/hyperlight_host/examples/logging/main.rs index 01b0fdba1..ea6e18b31 100644 --- a/src/hyperlight_host/examples/logging/main.rs +++ b/src/hyperlight_host/examples/logging/main.rs @@ -18,6 +18,7 @@ extern crate hyperlight_host; use std::sync::{Arc, Barrier}; +use hyperlight_host::sandbox::Callable; use hyperlight_host::sandbox::uninitialized::UninitializedSandbox; use hyperlight_host::sandbox_state::sandbox::EvolvableSandbox; use hyperlight_host::sandbox_state::transition::Noop; diff --git a/src/hyperlight_host/examples/metrics/main.rs b/src/hyperlight_host/examples/metrics/main.rs index f713b25cd..5b7cc103f 100644 --- a/src/hyperlight_host/examples/metrics/main.rs +++ b/src/hyperlight_host/examples/metrics/main.rs @@ -18,6 +18,7 @@ extern crate hyperlight_host; use std::sync::{Arc, Barrier}; use std::thread::{JoinHandle, spawn}; +use hyperlight_host::sandbox::Callable; use hyperlight_host::sandbox::uninitialized::UninitializedSandbox; use hyperlight_host::sandbox_state::sandbox::EvolvableSandbox; use hyperlight_host::sandbox_state::transition::Noop; diff --git a/src/hyperlight_host/examples/tracing-otlp/main.rs b/src/hyperlight_host/examples/tracing-otlp/main.rs index 190577a82..ab8196972 100644 --- a/src/hyperlight_host/examples/tracing-otlp/main.rs +++ b/src/hyperlight_host/examples/tracing-otlp/main.rs @@ -26,6 +26,7 @@ use std::io::stdin; use std::sync::{Arc, Barrier, Mutex}; use std::thread::{JoinHandle, spawn}; +use hyperlight_host::sandbox::Callable; use hyperlight_host::sandbox::uninitialized::UninitializedSandbox; use hyperlight_host::sandbox_state::sandbox::EvolvableSandbox; use hyperlight_host::sandbox_state::transition::Noop; diff --git a/src/hyperlight_host/examples/tracing/main.rs b/src/hyperlight_host/examples/tracing/main.rs index ec75a7d87..dfc680576 100644 --- a/src/hyperlight_host/examples/tracing/main.rs +++ b/src/hyperlight_host/examples/tracing/main.rs @@ -19,6 +19,7 @@ extern crate hyperlight_host; use std::sync::{Arc, Barrier}; use std::thread::{JoinHandle, spawn}; +use hyperlight_host::sandbox::Callable; use hyperlight_host::sandbox::uninitialized::UninitializedSandbox; use hyperlight_host::sandbox_state::sandbox::EvolvableSandbox; use hyperlight_host::sandbox_state::transition::Noop; diff --git a/src/hyperlight_host/src/func/call_ctx.rs b/src/hyperlight_host/src/func/call_ctx.rs index c80512fa9..180731910 100644 --- a/src/hyperlight_host/src/func/call_ctx.rs +++ b/src/hyperlight_host/src/func/call_ctx.rs @@ -17,6 +17,7 @@ limitations under the License. use tracing::{Span, instrument}; use super::{ParameterTuple, SupportedReturnType}; +use crate::sandbox::Callable; use crate::{MultiUseSandbox, Result}; /// A context for calling guest functions. /// @@ -47,6 +48,31 @@ impl MultiUseGuestCallContext { Self { sbox } } + /// Close out the context and get back the internally-stored + /// `MultiUseSandbox`. Future contexts opened by the returned sandbox + /// will have guest state restored. + #[instrument(err(Debug), skip(self), parent = Span::current())] + pub fn finish(mut self) -> Result { + self.sbox.restore_state()?; + Ok(self.sbox) + } + /// Close out the context and get back the internally-stored + /// `MultiUseSandbox`. + /// + /// Note that this method is pub(crate) and does not reset the state of the + /// sandbox. + /// + /// It is intended to be used when evolving a MultiUseSandbox to a new state + /// and is not intended to be called publicly. It allows the state of the guest to be altered + /// during the evolution of one sandbox state to another, enabling the new state created + /// to be captured and stored in the Sandboxes state stack. + /// + pub(crate) fn finish_no_reset(self) -> MultiUseSandbox { + self.sbox + } +} + +impl Callable for MultiUseGuestCallContext { /// Call the guest function called `func_name` with the given arguments /// `args`, and expect the return value have the same type as /// `func_ret_type`. @@ -58,7 +84,7 @@ impl MultiUseGuestCallContext { /// If you want to reset state, call `finish()` on this `MultiUseGuestCallContext` /// and get a new one from the resulting `MultiUseSandbox` #[instrument(err(Debug),skip(self, args),parent = Span::current())] - pub fn call( + fn call( &mut self, func_name: &str, args: impl ParameterTuple, @@ -75,29 +101,6 @@ impl MultiUseGuestCallContext { ); Output::from_value(ret?) } - - /// Close out the context and get back the internally-stored - /// `MultiUseSandbox`. Future contexts opened by the returned sandbox - /// will have guest state restored. - #[instrument(err(Debug), skip(self), parent = Span::current())] - pub fn finish(mut self) -> Result { - self.sbox.restore_state()?; - Ok(self.sbox) - } - /// Close out the context and get back the internally-stored - /// `MultiUseSandbox`. - /// - /// Note that this method is pub(crate) and does not reset the state of the - /// sandbox. - /// - /// It is intended to be used when evolving a MultiUseSandbox to a new state - /// and is not intended to be called publicly. It allows the state of the guest to be altered - /// during the evolution of one sandbox state to another, enabling the new state created - /// to be captured and stored in the Sandboxes state stack. - /// - pub(crate) fn finish_no_reset(self) -> MultiUseSandbox { - self.sbox - } } #[cfg(test)] @@ -108,6 +111,7 @@ mod tests { use hyperlight_testing::simple_guest_as_string; use super::MultiUseGuestCallContext; + use crate::sandbox::Callable; use crate::sandbox_state::sandbox::EvolvableSandbox; use crate::sandbox_state::transition::Noop; use crate::{GuestBinary, HyperlightError, MultiUseSandbox, Result, UninitializedSandbox}; diff --git a/src/hyperlight_host/src/func/host_functions.rs b/src/hyperlight_host/src/func/host_functions.rs index c37120ee7..ca055c0b8 100644 --- a/src/hyperlight_host/src/func/host_functions.rs +++ b/src/hyperlight_host/src/func/host_functions.rs @@ -23,6 +23,52 @@ use super::{ParameterTuple, ResultType, SupportedReturnType}; use crate::sandbox::{ExtraAllowedSyscall, UninitializedSandbox}; use crate::{Result, log_then_return, new_error}; +/// A sandbox on which (primitive) host functions can be registered +/// +pub trait Registerable { + /// Register a primitive host function + fn register_host_function( + &mut self, + name: &str, + hf: impl Into>, + ) -> Result<()>; + /// Register a primitive host function whose worker thread has + /// extra permissive seccomp filters installed + #[cfg(all(feature = "seccomp", target_os = "linux"))] + fn register_host_function_with_syscalls( + &mut self, + name: &str, + hf: impl Into>, + eas: Vec, + ) -> Result<()>; +} +impl Registerable for UninitializedSandbox { + fn register_host_function( + &mut self, + name: &str, + hf: impl Into>, + ) -> Result<()> { + let mut hfs = self + .host_funcs + .try_lock() + .map_err(|e| new_error!("Error locking at {}:{}: {}", file!(), line!(), e))?; + (*hfs).register_host_function(name.to_string(), hf.into().into()) + } + #[cfg(all(feature = "seccomp", target_os = "linux"))] + fn register_host_function_with_syscalls( + &mut self, + name: &str, + hf: impl Into>, + eas: Vec, + ) -> Result<()> { + let mut hfs = self + .host_funcs + .try_lock() + .map_err(|e| new_error!("Error locking at {}:{}: {}", file!(), line!(), e))?; + (*hfs).register_host_function_with_syscalls(name.to_string(), hf.into().into(), eas) + } +} + /// A representation of a host function. /// This is a thin wrapper around a `Fn(Args) -> Result`. #[derive(Clone)] diff --git a/src/hyperlight_host/src/func/mod.rs b/src/hyperlight_host/src/func/mod.rs index a70c85fae..a0efe113e 100644 --- a/src/hyperlight_host/src/func/mod.rs +++ b/src/hyperlight_host/src/func/mod.rs @@ -38,7 +38,7 @@ pub(crate) mod param_type; pub(crate) mod ret_type; /// Re-export for `HostFunction` trait -pub use host_functions::HostFunction; +pub use host_functions::{HostFunction, Registerable}; /// Re-export for `ParameterValue` enum pub use hyperlight_common::flatbuffer_wrappers::function_types::ParameterValue; /// Re-export for `ReturnType` enum diff --git a/src/hyperlight_host/src/sandbox/callable.rs b/src/hyperlight_host/src/sandbox/callable.rs new file mode 100644 index 000000000..a800ddab8 --- /dev/null +++ b/src/hyperlight_host/src/sandbox/callable.rs @@ -0,0 +1,28 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + */ + +use crate::Result; +use crate::func::{ParameterTuple, SupportedReturnType}; + +/// Trait used by the macros to paper over the differences between hyperlight and hyperlight-wasm +pub trait Callable { + /// Call a guest function dynamically + fn call( + &mut self, + func_name: &str, + args: impl ParameterTuple, + ) -> Result; +} diff --git a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs index d7e8a42f1..8d1e27918 100644 --- a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs +++ b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs @@ -321,7 +321,7 @@ mod tests { use hyperlight_testing::simple_guest_as_string; use crate::func::call_ctx::MultiUseGuestCallContext; - use crate::sandbox::SandboxConfiguration; + use crate::sandbox::{Callable, SandboxConfiguration}; use crate::sandbox_state::sandbox::{DevolvableSandbox, EvolvableSandbox}; use crate::sandbox_state::transition::{MultiUseContextCallback, Noop}; use crate::{GuestBinary, HyperlightError, MultiUseSandbox, Result, UninitializedSandbox}; diff --git a/src/hyperlight_host/src/sandbox/mod.rs b/src/hyperlight_host/src/sandbox/mod.rs index 605c65a4c..baf98c72f 100644 --- a/src/hyperlight_host/src/sandbox/mod.rs +++ b/src/hyperlight_host/src/sandbox/mod.rs @@ -37,6 +37,11 @@ pub mod uninitialized; /// initialized `Sandbox`es. pub(crate) mod uninitialized_evolve; +/// Trait used by the macros to paper over the differences between hyperlight and hyperlight-wasm +mod callable; + +/// Trait used by the macros to paper over the differences between hyperlight and hyperlight-wasm +pub use callable::Callable; /// Re-export for `SandboxConfiguration` type pub use config::SandboxConfiguration; /// Re-export for the `MultiUseSandbox` type diff --git a/src/hyperlight_host/tests/sandbox_host_tests.rs b/src/hyperlight_host/tests/sandbox_host_tests.rs index 7f285ceae..1c8bf92b6 100644 --- a/src/hyperlight_host/tests/sandbox_host_tests.rs +++ b/src/hyperlight_host/tests/sandbox_host_tests.rs @@ -19,7 +19,7 @@ use std::sync::mpsc::channel; use std::sync::{Arc, Mutex}; use common::new_uninit; -use hyperlight_host::sandbox::SandboxConfiguration; +use hyperlight_host::sandbox::{Callable, SandboxConfiguration}; use hyperlight_host::sandbox_state::sandbox::EvolvableSandbox; use hyperlight_host::sandbox_state::transition::Noop; use hyperlight_host::{ diff --git a/src/hyperlight_host/tests/wit_test.rs b/src/hyperlight_host/tests/wit_test.rs new file mode 100644 index 000000000..d07e32bfa --- /dev/null +++ b/src/hyperlight_host/tests/wit_test.rs @@ -0,0 +1,304 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + */ +#![allow(clippy::disallowed_macros)] + +use std::sync::{Arc, Mutex}; + +use hyperlight_common::resource::BorrowedResourceGuard; +use hyperlight_host::{GuestBinary, MultiUseGuestCallContext, UninitializedSandbox}; +use hyperlight_testing::wit_guest_as_string; + +extern crate alloc; +mod bindings { + hyperlight_component_macro::host_bindgen!("../tests/rust_guests/witguest/interface.wasm"); +} + +use bindings::*; + +struct Host {} + +impl test::wit::Roundtrip for Host { + fn roundtrip_bool(&mut self, x: bool) -> bool { + x + } + fn roundtrip_s8(&mut self, x: i8) -> i8 { + x + } + fn roundtrip_s16(&mut self, x: i16) -> i16 { + x + } + fn roundtrip_s32(&mut self, x: i32) -> i32 { + x + } + fn roundtrip_s64(&mut self, x: i64) -> i64 { + x + } + fn roundtrip_u8(&mut self, x: u8) -> u8 { + x + } + fn roundtrip_u16(&mut self, x: u16) -> u16 { + x + } + fn roundtrip_u32(&mut self, x: u32) -> u32 { + x + } + fn roundtrip_u64(&mut self, x: u64) -> u64 { + x + } + fn roundtrip_f32(&mut self, x: f32) -> f32 { + x + } + fn roundtrip_f64(&mut self, x: f64) -> f64 { + x + } + fn roundtrip_char(&mut self, x: char) -> char { + x + } + fn roundtrip_string(&mut self, x: alloc::string::String) -> alloc::string::String { + x + } + fn roundtrip_list(&mut self, x: alloc::vec::Vec) -> alloc::vec::Vec { + x + } + fn roundtrip_tuple(&mut self, x: (alloc::string::String, u8)) -> (alloc::string::String, u8) { + x + } + fn roundtrip_option( + &mut self, + x: ::core::option::Option, + ) -> ::core::option::Option { + x + } + fn roundtrip_result( + &mut self, + x: ::core::result::Result, + ) -> ::core::result::Result { + x + } + fn roundtrip_record( + &mut self, + x: test::wit::roundtrip::Testrecord, + ) -> test::wit::roundtrip::Testrecord { + x + } + fn roundtrip_flags_small( + &mut self, + x: test::wit::roundtrip::Smallflags, + ) -> test::wit::roundtrip::Smallflags { + x + } + fn roundtrip_flags_large( + &mut self, + x: test::wit::roundtrip::Largeflags, + ) -> test::wit::roundtrip::Largeflags { + x + } + fn roundtrip_variant( + &mut self, + x: test::wit::roundtrip::Testvariant, + ) -> test::wit::roundtrip::Testvariant { + x + } + fn roundtrip_enum( + &mut self, + x: test::wit::roundtrip::Testenum, + ) -> test::wit::roundtrip::Testenum { + x + } +} + +struct TestResource { + n_calls: u32, + x: String, + last: char, +} + +use std::sync::atomic::AtomicBool; +use std::sync::atomic::Ordering::Relaxed; +// We only have 1 test that uses this, and it isn't a proptest or +// anything, so it should only run once. If multiple tests using this +// could run in parallel, there would be problems. +static HAS_BEEN_DROPPED: AtomicBool = AtomicBool::new(false); + +impl Drop for TestResource { + fn drop(&mut self) { + assert_eq!(self.x, "strabc"); + assert_eq!(self.last, 'c'); + assert!(!HAS_BEEN_DROPPED.swap(true, Relaxed)); + } +} + +impl test::wit::host_resource::Testresource for Host { + type T = Arc>; + fn new(&mut self, x: String, last: char) -> Self::T { + Arc::new(Mutex::new(TestResource { + n_calls: 0, + x, + last, + })) + } + fn append_char(&mut self, self_: BorrowedResourceGuard<'_, Self::T>, c: char) { + let mut self_ = self_.lock().unwrap(); + match self_.n_calls { + // These line up to the initial values and calls made by + // witguest.rs. Mostly, this just checks that (even after + // round-tripping an owned reference through the host), we + // do always seem to get the correct structure. + 0 => { + assert_eq!(self_.x, "str"); + assert_eq!(self_.last, 'z'); + } + 1 => { + assert_eq!(self_.x, "stra"); + assert_eq!(self_.last, 'a'); + } + 2 => { + assert_eq!(self_.x, "strab"); + assert_eq!(self_.last, 'b'); + } + _ => panic!(), + }; + self_.n_calls += 1; + self_.x.push(c); + self_.last = c; + } +} + +impl test::wit::HostResource for Host { + fn roundtrip_own(&mut self, owned: Arc>) -> Arc> { + owned + } + + fn return_own(&mut self, _: Arc>) { + // Not much to do here other than let it be dropped + } +} + +#[allow(refining_impl_trait)] +impl test::wit::TestImports for Host { + type Roundtrip = Self; + fn roundtrip(&mut self) -> &mut Self { + self + } + type HostResource = Self; + fn host_resource(&mut self) -> &mut Self { + self + } +} + +fn sb() -> TestSandbox { + let path = wit_guest_as_string().unwrap(); + let guest_path = GuestBinary::FilePath(path); + let uninit = UninitializedSandbox::new(guest_path, None).unwrap(); + test::wit::Test::instantiate(uninit, Host {}) +} + +mod wit_test { + + use proptest::prelude::*; + + use crate::bindings::test::wit::{Roundtrip, TestExports, TestHostResource, roundtrip}; + use crate::sb; + + prop_compose! { + fn arb_testrecord()(contents in ".*", length in any::()) -> roundtrip::Testrecord { + roundtrip::Testrecord { contents, length } + } + } + + prop_compose! { + fn arb_smallflags()(flag_a: bool, flag_b: bool, flag_c: bool) -> roundtrip::Smallflags { + roundtrip::Smallflags { flag_a, flag_b, flag_c } + } + } + + prop_compose! { + fn arb_largeflags()( + flag00: bool, flag01: bool, flag02: bool, flag03: bool, flag04: bool, flag05: bool, flag06: bool, flag07: bool, + flag08: bool, flag09: bool, flag0a: bool, flag0b: bool, flag0c: bool, flag0d: bool, flag0e: bool, flag0f: bool, + + flag10: bool, flag11: bool, flag12: bool, flag13: bool, flag14: bool, flag15: bool, flag16: bool, flag17: bool, + flag18: bool, flag19: bool, flag1a: bool, flag1b: bool, flag1c: bool, flag1d: bool, flag1e: bool, flag1f: bool, + ) -> roundtrip::Largeflags { + roundtrip::Largeflags { + flag00, flag01, flag02, flag03, flag04, flag05, flag06, flag07, + flag08, flag09, flag0a, flag0b, flag0c, flag0d, flag0e, flag0f, + + flag10, flag11, flag12, flag13, flag14, flag15, flag16, flag17, + flag18, flag19, flag1a, flag1b, flag1c, flag1d, flag1e, flag1f, + } + } + } + + fn arb_testvariant() -> impl Strategy { + use roundtrip::Testvariant::*; + prop_oneof![ + Just(VariantA), + any::().prop_map(VariantB), + any::().prop_map(VariantC), + ] + } + + fn arb_testenum() -> impl Strategy { + use roundtrip::Testenum::*; + prop_oneof![Just(EnumA), Just(EnumB), Just(EnumC),] + } + + macro_rules! make_test { + ($fn:ident, $($ty:tt)*) => { + proptest! { + #[test] + fn $fn(x $($ty)*) { + assert_eq!(x, sb().roundtrip().$fn(x.clone())) + } + } + } + } + + make_test! { roundtrip_bool, : bool } + make_test! { roundtrip_u8, : u8 } + make_test! { roundtrip_u16, : u16 } + make_test! { roundtrip_u32, : u32 } + make_test! { roundtrip_u64, : u64 } + make_test! { roundtrip_s8, : i8 } + make_test! { roundtrip_s16, : i16 } + make_test! { roundtrip_s32, : i32 } + make_test! { roundtrip_s64, : i64 } + make_test! { roundtrip_f32, : f32 } + make_test! { roundtrip_f64, : f64 } + make_test! { roundtrip_char, : char } + make_test! { roundtrip_string, : String } + + make_test! { roundtrip_list, : Vec } + make_test! { roundtrip_tuple, : (String, u8) } + make_test! { roundtrip_option, : Option } + make_test! { roundtrip_result, : Result } + + make_test! { roundtrip_record, in arb_testrecord() } + make_test! { roundtrip_flags_small, in arb_smallflags() } + make_test! { roundtrip_flags_large, in arb_largeflags() } + make_test! { roundtrip_variant, in arb_testvariant() } + make_test! { roundtrip_enum, in arb_testenum() } + + #[test] + fn test_host_resource() { + { + sb().test_host_resource().test(); + } + use std::sync::atomic::Ordering::Relaxed; + assert!(crate::HAS_BEEN_DROPPED.load(Relaxed)); + } +} diff --git a/src/hyperlight_testing/src/lib.rs b/src/hyperlight_testing/src/lib.rs index c27442361..eb48e203a 100644 --- a/src/hyperlight_testing/src/lib.rs +++ b/src/hyperlight_testing/src/lib.rs @@ -80,6 +80,14 @@ pub fn callback_guest_as_string() -> Result { .ok_or_else(|| anyhow!("couldn't convert callback guest PathBuf to string")) } +/// Get a fully-qualified OS-specific path to the witguest elf binary +pub fn wit_guest_as_string() -> Result { + let buf = rust_guest_as_pathbuf("witguest"); + buf.to_str() + .map(|s| s.to_string()) + .ok_or_else(|| anyhow!("couldn't convert callback guest PathBuf to string")) +} + /// Get a fully qualified OS-specific path to the dummyguest elf binary pub fn dummy_guest_as_string() -> Result { let buf = rust_guest_as_pathbuf("dummyguest"); diff --git a/src/tests/rust_guests/callbackguest/Cargo.lock b/src/tests/rust_guests/callbackguest/Cargo.lock index b75ceb519..33d88b217 100644 --- a/src/tests/rust_guests/callbackguest/Cargo.lock +++ b/src/tests/rust_guests/callbackguest/Cargo.lock @@ -76,6 +76,7 @@ dependencies = [ "anyhow", "flatbuffers", "log", + "spin 0.10.0", ] [[package]] diff --git a/src/tests/rust_guests/simpleguest/Cargo.lock b/src/tests/rust_guests/simpleguest/Cargo.lock index 2b53c3de3..08b52f282 100644 --- a/src/tests/rust_guests/simpleguest/Cargo.lock +++ b/src/tests/rust_guests/simpleguest/Cargo.lock @@ -67,6 +67,7 @@ dependencies = [ "anyhow", "flatbuffers", "log", + "spin 0.10.0", ] [[package]] diff --git a/src/tests/rust_guests/witguest/.cargo/config.toml b/src/tests/rust_guests/witguest/.cargo/config.toml new file mode 100644 index 000000000..f977e97ab --- /dev/null +++ b/src/tests/rust_guests/witguest/.cargo/config.toml @@ -0,0 +1,19 @@ +[build] +target = "x86_64-unknown-none" + +[target.x86_64-unknown-none] +rustflags = [ + "-C", + "code-model=small", + "-C", + "link-args=-e entrypoint", +] +linker = "rust-lld" + +[profile.release] +opt-level = 0 +panic = "abort" + +[profile.dev] +opt-level = 0 +panic = "abort" \ No newline at end of file diff --git a/src/tests/rust_guests/witguest/Cargo.lock b/src/tests/rust_guests/witguest/Cargo.lock new file mode 100644 index 000000000..c46a89d23 --- /dev/null +++ b/src/tests/rust_guests/witguest/Cargo.lock @@ -0,0 +1,595 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "anstream" +version = "0.6.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "301af1932e46185686725e0fad2f8f2aa7da69dd70bf6ecc44d6b703844a3933" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd" + +[[package]] +name = "anstyle-parse" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c8bdeb6047d8983be085bab0ba1472e6dc604e7041dbf6fcd5e71523014fae9" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "403f75924867bb1033c59fbf0797484329750cfbe3c4325cd33127941fabc882" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys", +] + +[[package]] +name = "anyhow" +version = "1.0.98" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" + +[[package]] +name = "autocfg" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" + +[[package]] +name = "bitflags" +version = "2.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd" + +[[package]] +name = "buddy_system_allocator" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1a0108968a3a2dab95b089c0fc3f1afa7759aa5ebe6f1d86d206d6f7ba726eb" +dependencies = [ + "spin 0.9.8", +] + +[[package]] +name = "cc" +version = "1.2.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0fc897dc1e865cc67c0e05a836d9d3f1df3cbe442aa4a9473b18e12624a4951" +dependencies = [ + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "colorchoice" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + +[[package]] +name = "env_filter" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "186e05a59d4c50738528153b83b0b0194d3a29507dfec16eccd4b342903397d0" +dependencies = [ + "log", + "regex", +] + +[[package]] +name = "env_logger" +version = "0.11.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13c863f0904021b108aa8b2f55046443e6b1ebde8fd4a15c399893aae4fa069f" +dependencies = [ + "anstream", + "anstyle", + "env_filter", + "jiff", + "log", +] + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "flatbuffers" +version = "25.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1045398c1bfd89168b5fd3f1fc11f6e70b34f6f66300c87d44d3de849463abf1" +dependencies = [ + "bitflags", + "rustc_version", +] + +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + +[[package]] +name = "glob" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" + +[[package]] +name = "hashbrown" +version = "0.15.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84b26c544d002229e640969970a2e74021aadf6e2f96372b9c58eff97de08eb3" +dependencies = [ + "foldhash", + "serde", +] + +[[package]] +name = "hyperlight-common" +version = "0.5.1" +dependencies = [ + "anyhow", + "flatbuffers", + "log", + "spin 0.10.0", +] + +[[package]] +name = "hyperlight-component-macro" +version = "0.5.1" +dependencies = [ + "env_logger", + "hyperlight-component-util", + "itertools", + "prettyplease", + "proc-macro2", + "quote", + "syn", + "wasmparser", +] + +[[package]] +name = "hyperlight-component-util" +version = "0.5.1" +dependencies = [ + "itertools", + "log", + "prettyplease", + "proc-macro2", + "quote", + "syn", + "wasmparser", +] + +[[package]] +name = "hyperlight-guest" +version = "0.5.1" +dependencies = [ + "anyhow", + "hyperlight-common", + "serde_json", +] + +[[package]] +name = "hyperlight-guest-bin" +version = "0.5.1" +dependencies = [ + "buddy_system_allocator", + "cc", + "cfg-if", + "glob", + "hyperlight-common", + "hyperlight-guest", + "log", + "spin 0.10.0", +] + +[[package]] +name = "indexmap" +version = "2.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" +dependencies = [ + "equivalent", + "hashbrown", + "serde", +] + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" + +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" + +[[package]] +name = "jiff" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a194df1107f33c79f4f93d02c80798520551949d59dfad22b6157048a88cca93" +dependencies = [ + "jiff-static", + "log", + "portable-atomic", + "portable-atomic-util", + "serde", +] + +[[package]] +name = "jiff-static" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c6e1db7ed32c6c71b759497fae34bf7933636f75a251b9e736555da426f6442" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "lock_api" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "once_cell_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad" + +[[package]] +name = "portable-atomic" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "350e9b48cbc6b0e028b0473b114454c6316e57336ee184ceab6e53f72c178b3e" + +[[package]] +name = "portable-atomic-util" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507" +dependencies = [ + "portable-atomic", +] + +[[package]] +name = "prettyplease" +version = "0.2.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dee91521343f4c5c6a63edd65e54f31f5c92fe8978c40a4282f8372194c6a7d" +dependencies = [ + "proc-macro2", + "syn", +] + +[[package]] +name = "proc-macro2" +version = "1.0.94" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a31971752e70b8b2686d7e46ec17fb38dad4051d94024c88df49b667caea9c84" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "regex" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" + +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", +] + +[[package]] +name = "ryu" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "semver" +version = "1.0.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0" + +[[package]] +name = "serde" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.140" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "spin" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" +dependencies = [ + "lock_api", +] + +[[package]] +name = "spin" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5fe4ccb98d9c292d56fec89a5e07da7fc4cf0dc11e156b41793132775d3e591" +dependencies = [ + "lock_api", +] + +[[package]] +name = "syn" +version = "2.0.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b09a44accad81e1ba1cd74a32461ba89dee89095ba17b32f5d03683b1b1fc2a0" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "wasmparser" +version = "0.224.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04f17a5917c2ddd3819e84c661fae0d6ba29d7b9c1f0e96c708c65a9c4188e11" +dependencies = [ + "bitflags", + "hashbrown", + "indexmap", + "semver", + "serde", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "witguest" +version = "0.1.0" +dependencies = [ + "hyperlight-common", + "hyperlight-component-macro", + "hyperlight-guest", + "hyperlight-guest-bin", +] diff --git a/src/tests/rust_guests/witguest/Cargo.toml b/src/tests/rust_guests/witguest/Cargo.toml new file mode 100644 index 000000000..63b38fc13 --- /dev/null +++ b/src/tests/rust_guests/witguest/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "witguest" +version = "0.1.0" +edition = "2021" + +[dependencies] +hyperlight-guest = { path = "../../../hyperlight_guest" } +hyperlight-guest-bin = { path = "../../../hyperlight_guest_bin" } +hyperlight-common = { path = "../../../hyperlight_common", default-features = false } +hyperlight-component-macro = { path = "../../../hyperlight_component_macro" } \ No newline at end of file diff --git a/src/tests/rust_guests/witguest/guest.wit b/src/tests/rust_guests/witguest/guest.wit new file mode 100644 index 000000000..5242fa1de --- /dev/null +++ b/src/tests/rust_guests/witguest/guest.wit @@ -0,0 +1,69 @@ +package test:wit; + +world test { + import roundtrip; + import host-resource; + export roundtrip; + export test-host-resource; +} + +interface roundtrip { + roundtrip-bool: func(x: bool) -> bool; + roundtrip-s8: func(x: s8) -> s8; + roundtrip-s16: func(x: s16) -> s16; + roundtrip-s32: func(x: s32) -> s32; + roundtrip-s64: func(x: s64) -> s64; + roundtrip-u8: func(x: u8) -> u8; + roundtrip-u16: func(x: u16) -> u16; + roundtrip-u32: func(x: u32) -> u32; + roundtrip-u64: func(x: u64) -> u64; + roundtrip-f32: func(x: f32) -> f32; + roundtrip-f64: func(x: f64) -> f64; + roundtrip-char: func(x: char) -> char; + roundtrip-string: func(x: string) -> string; + roundtrip-list: func(x: list) -> list; + roundtrip-tuple: func(x: tuple) -> tuple; + roundtrip-option: func(x: option) -> option; + roundtrip-result: func(x: result) -> result; + + record testrecord { + contents: string, + length: u64, + } + roundtrip-record: func(x: testrecord) -> testrecord; + flags smallflags { + flag-a, + flag-b, + flag-c, + } + roundtrip-flags-small: func(x: smallflags) -> smallflags; + flags largeflags { + flag00, flag01, flag02, flag03, flag04, flag05, flag06, flag07, + flag08, flag09, flag0a, flag0b, flag0c, flag0d, flag0e, flag0f, + + flag10, flag11, flag12, flag13, flag14, flag15, flag16, flag17, + flag18, flag19, flag1a, flag1b, flag1c, flag1d, flag1e, flag1f, + } + roundtrip-flags-large: func(x: largeflags) -> largeflags; + variant testvariant { + variant-a, variant-b(string), variant-c(char), + } + roundtrip-variant: func(x: testvariant) -> testvariant; + enum testenum { + enum-a, enum-b, enum-c + } + roundtrip-enum: func(x: testenum) -> testenum; +} + +interface host-resource { + resource testresource { + constructor(init1: string, init2: char); + append-char: func(x: char); + } + roundtrip-own: func(x: own) -> own; + return-own: func(x: own); +} + +interface test-host-resource { + test: func() -> bool; +} \ No newline at end of file diff --git a/src/tests/rust_guests/witguest/src/bindings.rs b/src/tests/rust_guests/witguest/src/bindings.rs new file mode 100644 index 000000000..2316def29 --- /dev/null +++ b/src/tests/rust_guests/witguest/src/bindings.rs @@ -0,0 +1,19 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +extern crate alloc; + +hyperlight_component_macro::guest_bindgen!("interface.wasm"); diff --git a/src/tests/rust_guests/witguest/src/main.rs b/src/tests/rust_guests/witguest/src/main.rs new file mode 100644 index 000000000..9fcabe087 --- /dev/null +++ b/src/tests/rust_guests/witguest/src/main.rs @@ -0,0 +1,168 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#![no_std] +#![no_main] + +extern crate alloc; +extern crate hyperlight_guest; + +mod bindings; +use bindings::*; + +struct Guest {} + +impl test::wit::Roundtrip for Guest { + fn roundtrip_bool(&mut self, x: bool) -> bool { + (Host {}).roundtrip_bool(x) + } + fn roundtrip_s8(&mut self, x: i8) -> i8 { + (Host {}).roundtrip_s8(x) + } + fn roundtrip_s16(&mut self, x: i16) -> i16 { + (Host {}).roundtrip_s16(x) + } + fn roundtrip_s32(&mut self, x: i32) -> i32 { + (Host {}).roundtrip_s32(x) + } + fn roundtrip_s64(&mut self, x: i64) -> i64 { + (Host {}).roundtrip_s64(x) + } + fn roundtrip_u8(&mut self, x: u8) -> u8 { + (Host {}).roundtrip_u8(x) + } + fn roundtrip_u16(&mut self, x: u16) -> u16 { + (Host {}).roundtrip_u16(x) + } + fn roundtrip_u32(&mut self, x: u32) -> u32 { + (Host {}).roundtrip_u32(x) + } + fn roundtrip_u64(&mut self, x: u64) -> u64 { + (Host {}).roundtrip_u64(x) + } + fn roundtrip_f32(&mut self, x: f32) -> f32 { + (Host {}).roundtrip_f32(x) + } + fn roundtrip_f64(&mut self, x: f64) -> f64 { + (Host {}).roundtrip_f64(x) + } + fn roundtrip_char(&mut self, x: char) -> char { + (Host {}).roundtrip_char(x) + } + fn roundtrip_string(&mut self, x: alloc::string::String) -> alloc::string::String { + (Host {}).roundtrip_string(x) + } + fn roundtrip_list(&mut self, x: alloc::vec::Vec) -> alloc::vec::Vec { + (Host {}).roundtrip_list(x) + } + fn roundtrip_tuple(&mut self, x: (alloc::string::String, u8)) -> (alloc::string::String, u8) { + (Host {}).roundtrip_tuple(x) + } + fn roundtrip_option( + &mut self, + x: ::core::option::Option, + ) -> ::core::option::Option { + (Host {}).roundtrip_option(x) + } + fn roundtrip_result( + &mut self, + x: ::core::result::Result, + ) -> ::core::result::Result { + (Host {}).roundtrip_result(x) + } + fn roundtrip_record( + &mut self, + x: test::wit::roundtrip::Testrecord, + ) -> test::wit::roundtrip::Testrecord { + (Host {}).roundtrip_record(x) + } + fn roundtrip_flags_small( + &mut self, + x: test::wit::roundtrip::Smallflags, + ) -> test::wit::roundtrip::Smallflags { + (Host {}).roundtrip_flags_small(x) + } + fn roundtrip_flags_large( + &mut self, + x: test::wit::roundtrip::Largeflags, + ) -> test::wit::roundtrip::Largeflags { + (Host {}).roundtrip_flags_large(x) + } + fn roundtrip_variant( + &mut self, + x: test::wit::roundtrip::Testvariant, + ) -> test::wit::roundtrip::Testvariant { + (Host {}).roundtrip_variant(x) + } + fn roundtrip_enum( + &mut self, + x: test::wit::roundtrip::Testenum, + ) -> test::wit::roundtrip::Testenum { + (Host {}).roundtrip_enum(x) + } +} + +impl test::wit::TestHostResource for Guest { + fn test(&mut self) -> bool { + use test::wit::host_resource::Testresource; + let mut host = Host {}; + use alloc::string::ToString; + let r = ::new(&mut host, "str".to_string(), 'z'); + ::append_char(&mut host, &r, 'a'); + ::append_char(&mut host, &r, 'b'); + let r = ::roundtrip_own(&mut host, r); + let r = ::roundtrip_own(&mut host, r); + ::append_char(&mut host, &r, 'c'); + ::return_own(&mut host, r); + true + } +} + +#[allow(refining_impl_trait)] +impl test::wit::TestExports for Guest { + type Roundtrip = Self; + fn roundtrip(&mut self) -> &mut Self { + self + } + type TestHostResource = Self; + fn test_host_resource(&mut self) -> &mut Self { + self + } +} + +impl bindings::Guest for Guest { + fn with_guest_state R>(f: F) -> R { + let mut g = Guest {}; + f(&mut g) + } +} + +#[no_mangle] +pub extern "C" fn hyperlight_main() { + bindings::hyperlight_guest_init::(); +} + +use ::alloc::vec::Vec; +use ::hyperlight_common::flatbuffer_wrappers::function_call::FunctionCall; +use ::hyperlight_common::flatbuffer_wrappers::guest_error::ErrorCode; +use ::hyperlight_guest::error::{HyperlightGuestError, Result}; +#[no_mangle] +pub fn guest_dispatch_function(function_call: FunctionCall) -> Result> { + Err(HyperlightGuestError::new( + ErrorCode::GuestFunctionNotFound, + function_call.function_name.clone(), + )) +} diff --git a/typos.toml b/typos.toml index 0477c7778..bdf1e81f2 100644 --- a/typos.toml +++ b/typos.toml @@ -1,5 +1,5 @@ [default] -extend-ignore-identifiers-re = ["Fo"] +extend-ignore-identifiers-re = ["Fo", "edn", "ue"] [files] extend-exclude = ["**/*.patch", "src/hyperlight_guest_bin/third_party/**/*", "NOTICE.txt"]