From e49e61df0b56c05cdd9d7c3c13d24f11b9bbe0cb Mon Sep 17 00:00:00 2001 From: Pratik Chowdhury <28534575+pratikpc@users.noreply.github.com> Date: Mon, 20 May 2024 18:33:13 +0000 Subject: [PATCH 1/4] feat(idna-&-lib/&str->ada_owned_string): change return type to ada_owned_string ada_owned_string is supposed to be cleared manually. This was not being done currently. Instead an internal reference to the memory was being returned by these functions. This led to us being unable to delete the memory out of fear of invalidating the &str present in user space. By returning ada_owned_string, this issue is resolved and the memory leaks no longer happen in library space. Old &str can still be accessed using as_ref. BREAKING CHANGE: Functions using ada_owned_string now return ada_owned_string instead of &str. Memory leaks no longer happen in library space. Old &str can still be accessed using as_ref. --- src/idna.rs | 30 ++++++++++++++---------------- src/lib.rs | 12 ++++-------- 2 files changed, 18 insertions(+), 24 deletions(-) diff --git a/src/idna.rs b/src/idna.rs index 9f035a4..078ea6a 100644 --- a/src/idna.rs +++ b/src/idna.rs @@ -13,15 +13,11 @@ impl Idna { /// /// ``` /// use ada_url::Idna; - /// assert_eq!(Idna::unicode("xn--meagefactory-m9a.ca"), "meßagefactory.ca"); + /// assert_eq!(Idna::unicode("xn--meagefactory-m9a.ca").as_ref(), "meßagefactory.ca"); /// ``` #[must_use] - pub fn unicode(input: &str) -> &str { - unsafe { - let out = ffi::ada_idna_to_unicode(input.as_ptr().cast(), input.len()); - let slice = core::slice::from_raw_parts(out.data.cast(), out.length); - core::str::from_utf8_unchecked(slice) - } + pub fn unicode(input: &str) -> ffi::ada_owned_string { + unsafe { ffi::ada_idna_to_unicode(input.as_ptr().cast(), input.len()) } } /// Process international domains according to the UTS #46 standard. @@ -31,15 +27,11 @@ impl Idna { /// /// ``` /// use ada_url::Idna; - /// assert_eq!(Idna::ascii("meßagefactory.ca"), "xn--meagefactory-m9a.ca"); + /// assert_eq!(Idna::ascii("meßagefactory.ca").as_ref(), "xn--meagefactory-m9a.ca"); /// ``` #[must_use] - pub fn ascii(input: &str) -> &str { - unsafe { - let out = ffi::ada_idna_to_ascii(input.as_ptr().cast(), input.len()); - let slice = core::slice::from_raw_parts(out.data.cast(), out.length); - core::str::from_utf8_unchecked(slice) - } + pub fn ascii(input: &str) -> ffi::ada_owned_string { + unsafe { ffi::ada_idna_to_ascii(input.as_ptr().cast(), input.len()) } } } @@ -49,11 +41,17 @@ mod tests { #[test] fn unicode_should_work() { - assert_eq!(Idna::unicode("xn--meagefactory-m9a.ca"), "meßagefactory.ca"); + assert_eq!( + Idna::unicode("xn--meagefactory-m9a.ca").as_ref(), + "meßagefactory.ca" + ); } #[test] fn ascii_should_work() { - assert_eq!(Idna::ascii("meßagefactory.ca"), "xn--meagefactory-m9a.ca"); + assert_eq!( + Idna::ascii("meßagefactory.ca").as_ref(), + "xn--meagefactory-m9a.ca" + ); } } diff --git a/src/lib.rs b/src/lib.rs index 1bdd2ef..a8a94bf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -267,15 +267,11 @@ impl Url { /// use ada_url::Url; /// /// let url = Url::parse("blob:https://example.com/foo", None).expect("Invalid URL"); - /// assert_eq!(url.origin(), "https://example.com"); + /// assert_eq!(url.origin().as_ref(), "https://example.com"); /// ``` #[must_use] - pub fn origin(&self) -> &str { - unsafe { - let out = ffi::ada_get_origin(self.0); - let slice = core::slice::from_raw_parts(out.data.cast(), out.length); - core::str::from_utf8_unchecked(slice) - } + pub fn origin(&self) -> ffi::ada_owned_string { + unsafe { ffi::ada_get_origin(self.0) } } /// Return the parsed version of the URL with all components. @@ -949,7 +945,7 @@ mod test { None, ) .expect("Should have parsed a simple url"); - assert_eq!(out.origin(), "https://google.com:9090"); + assert_eq!(out.origin().as_ref(), "https://google.com:9090"); assert_eq!( out.href(), "https://username:password@google.com:9090/search?query#hash" From f9979bdb0506ed04a47c4f644c77c6e2e8ea8da8 Mon Sep 17 00:00:00 2001 From: Pratik Chowdhury <28534575+pratikpc@users.noreply.github.com> Date: Mon, 20 May 2024 18:34:18 +0000 Subject: [PATCH 2/4] feat(ada_owned_string/Drop): add Clears the memory after the ada_owned_string goes out of scope. This ensures that ada_owned_string is cleared in user space. BREAKING CHANGE: ada_owned_string was previously not freed upon going out of scope. Now it is. --- src/ffi.rs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/ffi.rs b/src/ffi.rs index 749a450..99cf3e1 100644 --- a/src/ffi.rs +++ b/src/ffi.rs @@ -38,6 +38,19 @@ impl AsRef for ada_owned_string { } } +impl Drop for ada_owned_string { + fn drop(&mut self) { + // @note This is needed because ada_free_owned_string accepts by value + let copy = ada_owned_string { + data: self.data, + length: self.length, + }; + unsafe { + ada_free_owned_string(copy); + }; + } +} + #[repr(C)] pub struct ada_url_components { pub protocol_end: u32, From 21ac2ee61f49e8fc71b10ea579fc9d93328c3591 Mon Sep 17 00:00:00 2001 From: Pratik Chowdhury <28534575+pratikpc@users.noreply.github.com> Date: Mon, 20 May 2024 18:56:47 +0000 Subject: [PATCH 3/4] feat(ada_owned_string/ToString): add Makes it possible to convert ada_owned_string to ToString --- src/ffi.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/ffi.rs b/src/ffi.rs index 99cf3e1..94fc93f 100644 --- a/src/ffi.rs +++ b/src/ffi.rs @@ -1,6 +1,9 @@ #![allow(non_camel_case_types)] use core::ffi::{c_char, c_uint}; +#[cfg(feature = "std")] +extern crate std; + #[repr(C)] pub struct ada_url { _unused: [u8; 0], @@ -38,6 +41,13 @@ impl AsRef for ada_owned_string { } } +#[cfg(feature = "std")] +impl ToString for ada_owned_string { + fn to_string(&self) -> std::string::String { + self.as_ref().to_owned() + } +} + impl Drop for ada_owned_string { fn drop(&mut self) { // @note This is needed because ada_free_owned_string accepts by value From d9b3e745ccb37f4f2dcfead3caca48362c57ce0e Mon Sep 17 00:00:00 2001 From: Pratik Chowdhury <28534575+pratikpc@users.noreply.github.com> Date: Mon, 20 May 2024 18:59:52 +0000 Subject: [PATCH 4/4] refactor(ada_owned_string->String): change return type from ada_owned_string to String Ensures that ada_owned_string does not leak to user space and allows us to make internal breaking changes to its implementation. Fixes all leak issue. Causes small performance downgrades due to copies. Causes functions to be usable only if std is enabled. BREAKING CHANGE: Return type changed from ada_owned_string->String. Functions now work only with std feature enabled. --- src/idna.rs | 34 ++++++++++++++++++++-------------- src/lib.rs | 18 ++++++++++++++---- 2 files changed, 34 insertions(+), 18 deletions(-) diff --git a/src/idna.rs b/src/idna.rs index 078ea6a..d6d6832 100644 --- a/src/idna.rs +++ b/src/idna.rs @@ -1,5 +1,12 @@ +#[cfg(feature = "std")] +extern crate std; + +#[cfg_attr(not(feature = "std"), allow(unused_imports))] use crate::ffi; +#[cfg(feature = "std")] +use std::string::String; + /// IDNA struct implements the `to_ascii` and `to_unicode` functions from the Unicode Technical /// Standard supporting a wide range of systems. It is suitable for URL parsing. /// For more information, [read the specification](https://www.unicode.org/reports/tr46/#ToUnicode) @@ -13,11 +20,12 @@ impl Idna { /// /// ``` /// use ada_url::Idna; - /// assert_eq!(Idna::unicode("xn--meagefactory-m9a.ca").as_ref(), "meßagefactory.ca"); + /// assert_eq!(Idna::unicode("xn--meagefactory-m9a.ca"), "meßagefactory.ca"); /// ``` #[must_use] - pub fn unicode(input: &str) -> ffi::ada_owned_string { - unsafe { ffi::ada_idna_to_unicode(input.as_ptr().cast(), input.len()) } + #[cfg(feature = "std")] + pub fn unicode(input: &str) -> String { + unsafe { ffi::ada_idna_to_unicode(input.as_ptr().cast(), input.len()) }.to_string() } /// Process international domains according to the UTS #46 standard. @@ -27,31 +35,29 @@ impl Idna { /// /// ``` /// use ada_url::Idna; - /// assert_eq!(Idna::ascii("meßagefactory.ca").as_ref(), "xn--meagefactory-m9a.ca"); + /// assert_eq!(Idna::ascii("meßagefactory.ca"), "xn--meagefactory-m9a.ca"); /// ``` #[must_use] - pub fn ascii(input: &str) -> ffi::ada_owned_string { - unsafe { ffi::ada_idna_to_ascii(input.as_ptr().cast(), input.len()) } + #[cfg(feature = "std")] + pub fn ascii(input: &str) -> String { + unsafe { ffi::ada_idna_to_ascii(input.as_ptr().cast(), input.len()) }.to_string() } } #[cfg(test)] mod tests { + #[cfg_attr(not(feature = "std"), allow(unused_imports))] use crate::idna::*; #[test] fn unicode_should_work() { - assert_eq!( - Idna::unicode("xn--meagefactory-m9a.ca").as_ref(), - "meßagefactory.ca" - ); + #[cfg(feature = "std")] + assert_eq!(Idna::unicode("xn--meagefactory-m9a.ca"), "meßagefactory.ca"); } #[test] fn ascii_should_work() { - assert_eq!( - Idna::ascii("meßagefactory.ca").as_ref(), - "xn--meagefactory-m9a.ca" - ); + #[cfg(feature = "std")] + assert_eq!(Idna::ascii("meßagefactory.ca"), "xn--meagefactory-m9a.ca"); } } diff --git a/src/lib.rs b/src/lib.rs index a8a94bf..8199db8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -46,6 +46,12 @@ pub mod ffi; mod idna; pub use idna::Idna; +#[cfg(feature = "std")] +extern crate std; + +#[cfg(feature = "std")] +use std::string::String; + use core::{borrow, ffi::c_uint, fmt, hash, ops}; use derive_more::Display; @@ -267,11 +273,12 @@ impl Url { /// use ada_url::Url; /// /// let url = Url::parse("blob:https://example.com/foo", None).expect("Invalid URL"); - /// assert_eq!(url.origin().as_ref(), "https://example.com"); + /// assert_eq!(url.origin(), "https://example.com"); /// ``` #[must_use] - pub fn origin(&self) -> ffi::ada_owned_string { - unsafe { ffi::ada_get_origin(self.0) } + #[cfg(feature = "std")] + pub fn origin(&self) -> String { + unsafe { ffi::ada_get_origin(self.0) }.to_string() } /// Return the parsed version of the URL with all components. @@ -945,7 +952,10 @@ mod test { None, ) .expect("Should have parsed a simple url"); - assert_eq!(out.origin().as_ref(), "https://google.com:9090"); + + #[cfg(feature = "std")] + assert_eq!(out.origin(), "https://google.com:9090"); + assert_eq!( out.href(), "https://username:password@google.com:9090/search?query#hash"