diff --git a/Cargo.toml b/Cargo.toml
index 97936957..b7506683 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -22,9 +22,12 @@ std = []
 # expose SIMD implementations in basic::imp::* and compat::imp::*
 public_imp = []
 
-# aarch64 NEON SIMD implementation - requires nightly
+# aarch64 Neon SIMD implementation - requires nightly
 aarch64_neon = []
 
+# arm Neon SIMD implementation - requires nightly
+arm_neon = []
+
 # deprecated - does not do anything
 hints = []
 
diff --git a/src/implementation/arm/mod.rs b/src/implementation/arm/mod.rs
new file mode 100644
index 00000000..90f95f15
--- /dev/null
+++ b/src/implementation/arm/mod.rs
@@ -0,0 +1,40 @@
+#[cfg(all(feature = "arm_neon", target_feature = "neon"))]
+pub(crate) mod neon;
+
+#[inline]
+#[cfg(all(feature = "arm_neon", target_feature = "neon"))]
+pub(crate) unsafe fn validate_utf8_basic(input: &[u8]) -> Result<(), crate::basic::Utf8Error> {
+    if input.len() < super::helpers::SIMD_CHUNK_SIZE {
+        return super::validate_utf8_basic_fallback(input);
+    }
+
+    validate_utf8_basic_neon(input)
+}
+
+#[inline(never)]
+#[cfg(all(feature = "arm_neon", target_feature = "neon"))]
+unsafe fn validate_utf8_basic_neon(input: &[u8]) -> Result<(), crate::basic::Utf8Error> {
+    neon::validate_utf8_basic(input)
+}
+
+#[cfg(not(all(feature = "arm_neon", target_feature = "neon")))]
+pub(crate) use super::validate_utf8_basic_fallback as validate_utf8_basic;
+
+#[inline]
+#[cfg(all(feature = "arm_neon", target_feature = "neon"))]
+pub(crate) unsafe fn validate_utf8_compat(input: &[u8]) -> Result<(), crate::compat::Utf8Error> {
+    if input.len() < super::helpers::SIMD_CHUNK_SIZE {
+        return super::validate_utf8_compat_fallback(input);
+    }
+
+    validate_utf8_compat_neon(input)
+}
+
+#[inline(never)]
+#[cfg(all(feature = "arm_neon", target_feature = "neon"))]
+unsafe fn validate_utf8_compat_neon(input: &[u8]) -> Result<(), crate::compat::Utf8Error> {
+    neon::validate_utf8_compat(input)
+}
+
+#[cfg(not(all(feature = "arm_neon", target_feature = "neon")))]
+pub(crate) use super::validate_utf8_compat_fallback as validate_utf8_compat;
diff --git a/src/implementation/arm/neon.rs b/src/implementation/arm/neon.rs
new file mode 100644
index 00000000..472ad014
--- /dev/null
+++ b/src/implementation/arm/neon.rs
@@ -0,0 +1,235 @@
+//! Contains the arm UTF-8 validation implementation.
+
+use core::arch::arm::{
+    uint8x16_t, vandq_u8, vcgtq_u8, vdupq_n_u8, veorq_u8, vextq_u8, vget_high_u32, vget_lane_u32,
+    vget_low_u32, vld1q_u8, vmovq_n_u8, vorr_u32, vorrq_u8, vpmax_u32, vqsubq_u8,
+    vreinterpretq_u32_u8, vshrq_n_u8,
+};
+
+use crate::implementation::helpers::Utf8CheckAlgorithm;
+
+// arm Neon SIMD primitives
+
+type SimdU8Value = crate::implementation::helpers::SimdU8Value<uint8x16_t>;
+
+impl SimdU8Value {
+    #[inline]
+    #[allow(clippy::too_many_arguments)]
+    #[allow(clippy::cast_possible_wrap)]
+    unsafe fn from_32_cut_off_leading(
+        _v0: u8,
+        _v1: u8,
+        _v2: u8,
+        _v3: u8,
+        _v4: u8,
+        _v5: u8,
+        _v6: u8,
+        _v7: u8,
+        _v8: u8,
+        _v9: u8,
+        _v10: u8,
+        _v11: u8,
+        _v12: u8,
+        _v13: u8,
+        _v14: u8,
+        _v15: u8,
+        v16: u8,
+        v17: u8,
+        v18: u8,
+        v19: u8,
+        v20: u8,
+        v21: u8,
+        v22: u8,
+        v23: u8,
+        v24: u8,
+        v25: u8,
+        v26: u8,
+        v27: u8,
+        v28: u8,
+        v29: u8,
+        v30: u8,
+        v31: u8,
+    ) -> Self {
+        let arr: [u8; 16] = [
+            v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31,
+        ];
+        Self::from(vld1q_u8(arr.as_ptr()))
+    }
+
+    #[inline]
+    #[allow(clippy::too_many_arguments)]
+    #[allow(clippy::cast_possible_wrap)]
+    unsafe fn repeat_16(
+        v0: u8,
+        v1: u8,
+        v2: u8,
+        v3: u8,
+        v4: u8,
+        v5: u8,
+        v6: u8,
+        v7: u8,
+        v8: u8,
+        v9: u8,
+        v10: u8,
+        v11: u8,
+        v12: u8,
+        v13: u8,
+        v14: u8,
+        v15: u8,
+    ) -> Self {
+        let arr: [u8; 16] = [
+            v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15,
+        ];
+        Self::from(vld1q_u8(arr.as_ptr()))
+    }
+
+    #[inline]
+    #[allow(clippy::cast_ptr_alignment)]
+    unsafe fn load_from(ptr: *const u8) -> Self {
+        // WORKAROUND for https://github.com/rust-lang/stdarch/issues/1148
+        // The vld1q_u8 intrinsic is currently broken, it treats it as individual
+        // byte loads so the compiler sometimes decides it is a better to load
+        // individual bytes to "optimize" a subsequent SIMD shuffle
+        //
+        // This code forces a full 128-bit load.
+        let mut dst = core::mem::MaybeUninit::<uint8x16_t>::uninit();
+        core::ptr::copy_nonoverlapping(
+            ptr.cast::<u8>(),
+            dst.as_mut_ptr().cast::<u8>(),
+            core::mem::size_of::<uint8x16_t>(),
+        );
+        Self::from(dst.assume_init())
+    }
+
+    #[inline]
+    #[allow(clippy::too_many_arguments)]
+    unsafe fn lookup_16(
+        self,
+        v0: u8,
+        v1: u8,
+        v2: u8,
+        v3: u8,
+        v4: u8,
+        v5: u8,
+        v6: u8,
+        v7: u8,
+        v8: u8,
+        v9: u8,
+        v10: u8,
+        v11: u8,
+        v12: u8,
+        v13: u8,
+        v14: u8,
+        v15: u8,
+    ) -> Self {
+        unimplemented!();
+    }
+
+    #[inline]
+    #[allow(clippy::cast_possible_wrap)]
+    unsafe fn splat(val: u8) -> Self {
+        Self::from(vmovq_n_u8(val))
+    }
+
+    #[inline]
+    #[allow(clippy::cast_possible_wrap)]
+    unsafe fn splat0() -> Self {
+        Self::from(vdupq_n_u8(0))
+    }
+
+    #[inline]
+    unsafe fn or(self, b: Self) -> Self {
+        Self::from(vorrq_u8(self.0, b.0))
+    }
+
+    #[inline]
+    unsafe fn and(self, b: Self) -> Self {
+        Self::from(vandq_u8(self.0, b.0))
+    }
+
+    #[inline]
+    unsafe fn xor(self, b: Self) -> Self {
+        Self::from(veorq_u8(self.0, b.0))
+    }
+
+    #[inline]
+    unsafe fn saturating_sub(self, b: Self) -> Self {
+        Self::from(vqsubq_u8(self.0, b.0))
+    }
+
+    // ugly but shr<N> requires const generics
+
+    #[allow(clippy::cast_lossless)]
+    #[inline]
+    unsafe fn shr4(self) -> Self {
+        Self::from(vshrq_n_u8(self.0, 4))
+    }
+
+    // ugly but prev<N> requires const generics
+
+    #[allow(clippy::cast_lossless)]
+    #[inline]
+    unsafe fn prev1(self, prev: Self) -> Self {
+        Self::from(vextq_u8(prev.0, self.0, 16 - 1))
+    }
+
+    // ugly but prev<N> requires const generics
+
+    #[allow(clippy::cast_lossless)]
+    #[inline]
+    unsafe fn prev2(self, prev: Self) -> Self {
+        Self::from(vextq_u8(prev.0, self.0, 16 - 2))
+    }
+
+    // ugly but prev<N> requires const generics
+
+    #[allow(clippy::cast_lossless)]
+    #[inline]
+    unsafe fn prev3(self, prev: Self) -> Self {
+        Self::from(vextq_u8(prev.0, self.0, 16 - 3))
+    }
+
+    #[inline]
+    unsafe fn unsigned_gt(self, other: Self) -> Self {
+        Self::from(vcgtq_u8(self.0, other.0))
+    }
+
+    #[inline]
+    unsafe fn any_bit_set(self) -> bool {
+        let tmp = vreinterpretq_u32_u8(self.0);
+        let tmp = vorr_u32(vget_low_u32(tmp), vget_high_u32(tmp));
+        return vget_lane_u32(vpmax_u32(tmp, tmp), 0) != 0;
+    }
+
+    #[inline]
+    unsafe fn is_ascii(self) -> bool {
+        unimplemented!();
+    }
+}
+
+impl From<uint8x16_t> for SimdU8Value {
+    #[inline]
+    fn from(val: uint8x16_t) -> Self {
+        Self { 0: val }
+    }
+}
+
+impl Utf8CheckAlgorithm<SimdU8Value> {
+    #[inline]
+    unsafe fn must_be_2_3_continuation(prev2: SimdU8Value, prev3: SimdU8Value) -> SimdU8Value {
+        let is_third_byte = prev2.unsigned_gt(SimdU8Value::splat(0b1110_0000 - 1));
+        let is_fourth_byte = prev3.unsigned_gt(SimdU8Value::splat(0b1111_0000 - 1));
+
+        is_third_byte.or(is_fourth_byte)
+    }
+}
+
+#[inline]
+unsafe fn simd_prefetch(ptr: *const u8) {
+    // _pld intrinsic currently not available, potential benefit also unknown
+}
+
+const PREFETCH: bool = false;
+use crate::implementation::helpers::TempSimdChunkA16 as TempSimdChunk;
+simd_input_128_bit!("neon");
+algorithm_simd!("neon");
diff --git a/src/implementation/mod.rs b/src/implementation/mod.rs
index 3d3d8181..d0fa3a54 100644
--- a/src/implementation/mod.rs
+++ b/src/implementation/mod.rs
@@ -49,12 +49,33 @@ pub(super) use aarch64::validate_utf8_basic;
 #[cfg(target_arch = "aarch64")]
 pub(super) use aarch64::validate_utf8_compat;
 
+// arm implementation
+
+#[cfg(target_arch = "arm")]
+pub(crate) mod arm;
+
+#[cfg(target_arch = "arm")]
+pub(super) use arm::validate_utf8_basic;
+
+#[cfg(target_arch = "arm")]
+pub(super) use arm::validate_utf8_compat;
+
 // fallback for unsupported architectures
 
-#[cfg(not(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64")))]
+#[cfg(not(any(
+    target_arch = "x86",
+    target_arch = "x86_64",
+    target_arch = "aarch64",
+    target_arch = "arm"
+)))]
 pub(super) use validate_utf8_basic_fallback as validate_utf8_basic;
 
-#[cfg(not(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64")))]
+#[cfg(not(any(
+    target_arch = "x86",
+    target_arch = "x86_64",
+    target_arch = "aarch64",
+    target_arch = "arm"
+)))]
 pub(super) use validate_utf8_compat_fallback as validate_utf8_compat;
 
 // fallback method implementations
diff --git a/src/lib.rs b/src/lib.rs
index a0391c46..79ce5f6a 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -14,9 +14,16 @@
 #![cfg_attr(not(feature = "std"), no_std)]
 #![cfg_attr(docsrs, feature(doc_cfg))]
 #![cfg_attr(
-    all(feature = "aarch64_neon", target_arch = "aarch64"),
+    any(
+        all(feature = "aarch64_neon", target_arch = "aarch64"),
+        all(feature = "arm_neon", target_arch = "arm")
+    ),
     feature(stdsimd)
 )]
+#![cfg_attr(
+    all(feature = "arm_neon", target_arch = "arm"),
+    feature(arm_target_feature)
+)]
 
 //! Blazingly fast API-compatible UTF-8 validation for Rust using SIMD extensions, based on the implementation from
 //! [simdjson](https://github.com/simdjson/simdjson). Originally ported to Rust by the developers of [simd-json.rs](https://simd-json.rs), but now heavily improved.