feat: add checks for confidential compute (#76)

jorgeantonio21 · web-flow · commit 5fb051995b7a · 2025-03-22T11:32:51.000-07:00
Add support for cc attestation generation, as well as cert chain support.
Check for cc capabilities of the node, as well if node is in ready state for cc
diff --git a/nvml-wrapper/src/device.rs b/nvml-wrapper/src/device.rs
@@ -850,6 +850,154 @@ impl<'nvml> Device<'nvml> {
         }
     }
 
+    /**
+    Checks simultaneously if confidential compute is enabled, if the device is in a production environment,
+    and if the device is accepting client requests.
+    # Errors
+    * `Uninitialized`, if the library has not been successfully initialized
+    * `NotSupported`, if this query is not supported by the device
+    * `InvalidArg`, if confidential compute state is invalid
+    */
+    pub fn check_confidential_compute_status(&self) -> Result<bool, NvmlError> {
+        let cc_state_sym = nvml_sym(self.nvml.lib.nvmlSystemGetConfComputeState.as_ref())?;
+        let cc_gpus_ready_sym = nvml_sym(
+            self.nvml
+                .lib
+                .nvmlSystemGetConfComputeGpusReadyState
+                .as_ref(),
+        )?;
+
+        unsafe {
+            let mut state: nvmlConfComputeSystemState_t = mem::zeroed();
+            nvml_try(cc_state_sym(&mut state))?;
+
+            let is_cc_enabled = state.ccFeature == NVML_CC_SYSTEM_FEATURE_ENABLED;
+            let is_prod_environment = state.environment == NVML_CC_SYSTEM_ENVIRONMENT_PROD;
+
+            let mut cc_gpus_ready: std::os::raw::c_uint = 0;
+            nvml_try(cc_gpus_ready_sym(&mut cc_gpus_ready))?;
+            let is_accepting_client_requests =
+                cc_gpus_ready == NVML_CC_ACCEPTING_CLIENT_REQUESTS_TRUE;
+
+            Ok(is_cc_enabled && is_prod_environment && is_accepting_client_requests)
+        }
+    }
+
+    /**
+    Gets the confidential compute capabilities for this `Device`.
+    # Errors
+    * `Uninitialized`, if the library has not been successfully initialized
+    * `InvalidArg`, if device is invalid or memory is NULL
+    * `NotSupported`, if this query is not supported by the device
+    * `Unknown`, on any unexpected error
+    */
+    pub fn get_confidential_compute_capabilities(
+        &self,
+    ) -> Result<ConfidentialComputeCapabilities, NvmlError> {
+        let sym = nvml_sym(self.nvml.lib.nvmlSystemGetConfComputeCapabilities.as_ref())?;
+
+        unsafe {
+            let mut capabilities: nvmlConfComputeSystemCaps_t = mem::zeroed();
+            nvml_try(sym(&mut capabilities))?;
+
+            let cpu_caps = match capabilities.cpuCaps {
+                NVML_CC_SYSTEM_CPU_CAPS_NONE => ConfidentialComputeCpuCapabilities::None,
+                NVML_CC_SYSTEM_CPU_CAPS_AMD_SEV => ConfidentialComputeCpuCapabilities::AmdSev,
+                NVML_CC_SYSTEM_CPU_CAPS_INTEL_TDX => ConfidentialComputeCpuCapabilities::IntelTdx,
+                _ => return Err(NvmlError::Unknown),
+            };
+
+            let gpus_caps = match capabilities.gpusCaps {
+                NVML_CC_SYSTEM_GPUS_CC_CAPABLE => ConfidentialComputeGpuCapabilities::Capable,
+                NVML_CC_SYSTEM_GPUS_CC_NOT_CAPABLE => {
+                    ConfidentialComputeGpuCapabilities::NotCapable
+                }
+                _ => return Err(NvmlError::Unknown),
+            };
+
+            Ok(ConfidentialComputeCapabilities {
+                cpu_caps,
+                gpus_caps,
+            })
+        }
+    }
+
+    /**
+    Gets the confidential compute GPU certificate for this `Device`.
+    # Errors
+    * `Uninitialized` if the library has not been successfully initialized
+    * `InvalidArg` if device is invalid or memory is NULL
+    * `NotSupported` if this query is not supported by the device
+    * `Unknown` on any unexpected error
+    */
+    pub fn confidential_compute_gpu_certificate(
+        &self,
+    ) -> Result<ConfidentialComputeGpuCertificate, NvmlError> {
+        let sym = nvml_sym(
+            self.nvml
+                .lib
+                .nvmlDeviceGetConfComputeGpuCertificate
+                .as_ref(),
+        )?;
+
+        unsafe {
+            let mut certificate_chain: nvmlConfComputeGpuCertificate_t = mem::zeroed();
+            nvml_try(sym(self.device, &mut certificate_chain))?;
+
+            Ok(ConfidentialComputeGpuCertificate {
+                cert_chain_size: certificate_chain.certChainSize,
+                attestation_cert_chain_size: certificate_chain.attestationCertChainSize,
+                cert_chain: certificate_chain.certChain.to_vec(),
+                attestation_cert_chain: certificate_chain.attestationCertChain.to_vec(),
+            })
+        }
+    }
+
+    /**
+    Fetches the confidential compute attestation report for this [`Device`].
+    This method retrieves a comprehensive attestation report from the device, which includes:
+    - A 32-byte nonce
+    - The attestation report size (as big-endian bytes)
+    - The attestation report data (up to 8192 bytes)
+    - A flag indicating if CEC attestation is present (as big-endian bytes)
+    - The CEC attestation report size (as big-endian bytes)
+    - The CEC attestation report data (up to 4096 bytes)
+    The returned vector contains all these components concatenated together in the order listed above.
+    # Errors
+    * `Uninitialized`, if the library has not been successfully initialized
+    * `InvalidArg`, if device is invalid or memory is NULL
+    * `NotSupported`, if this query is not supported by the device
+    * `Unknown`, on any unexpected error
+    */
+    #[doc(alias = "nvmlDeviceGetAttestationReport")]
+    pub fn confidential_compute_gpu_attestation_report(
+        &self,
+        nonce: [u8; NVML_CC_GPU_CEC_NONCE_SIZE as usize],
+    ) -> Result<ConfidentialComputeGpuAttestationReport, NvmlError> {
+        let sym = nvml_sym(
+            self.nvml
+                .lib
+                .nvmlDeviceGetConfComputeGpuAttestationReport
+                .as_ref(),
+        )?;
+
+        unsafe {
+            let mut report: nvmlConfComputeGpuAttestationReport_st = mem::zeroed();
+            report.nonce = nonce;
+
+            nvml_try(sym(self.device, &mut report))?;
+
+            let is_cec_attestation_report_present = report.isCecAttestationReportPresent == 1;
+            Ok(ConfidentialComputeGpuAttestationReport {
+                attestation_report_size: report.attestationReportSize,
+                attestation_report: report.attestationReport.to_vec(),
+                is_cec_attestation_report_present,
+                cec_attestation_report_size: report.cecAttestationReportSize,
+                cec_attestation_report: report.cecAttestationReport.to_vec(),
+            })
+        }
+    }
+
     /**
     Gets the current utilization and sampling size (sampling size in μs) for the Decoder.
 
diff --git a/nvml-wrapper/src/structs/device.rs b/nvml-wrapper/src/structs/device.rs
@@ -119,3 +119,69 @@ pub struct RetiredPage {
 #[derive(Debug, Clone, Eq, PartialEq, Hash)]
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 pub struct FieldId(pub u32);
+
+/// Returned from `Device.get_confidential_compute_capabilities()`
+#[derive(Debug, Clone, Eq, PartialEq, Hash)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+pub struct ConfidentialComputeCapabilities {
+    /// The CPU capabilities.
+    pub cpu_caps: ConfidentialComputeCpuCapabilities,
+    /// The GPU capabilities.
+    pub gpus_caps: ConfidentialComputeGpuCapabilities,
+}
+
+/// The possible CPU capabilities for confidential compute (either None, AMD SEV or Intel TDX)
+#[derive(Debug, Clone, Eq, PartialEq, Hash)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+pub enum ConfidentialComputeCpuCapabilities {
+    /// No CPU capabilities.
+    None,
+    /// AMD SEV confidential compute capabilities.
+    AmdSev,
+    /// Intel TDX confidential compute capabilities.
+    IntelTdx,
+}
+
+/// The possible GPU capabilities for confidential compute (either not capable or capable)
+#[derive(Debug, Clone, Eq, PartialEq, Hash)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+pub enum ConfidentialComputeGpuCapabilities {
+    /// Capable.
+    Capable,
+    /// Not capable.
+    NotCapable,
+}
+
+/// Returned from `Device.confidential_compute_gpu_certificate()`
+#[derive(Debug, Clone, Eq, PartialEq, Hash)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+pub struct ConfidentialComputeGpuCertificate {
+    /// The size of the certificate chain.
+    pub cert_chain_size: u32,
+    /// The size of the attestation certificate chain.
+    pub attestation_cert_chain_size: u32,
+    /// The certificate chain, of size
+    /// `ffi::bindings::NVML_GPU_CERT_CHAIN_SIZE == 4096`.
+    pub cert_chain: Vec<u8>,
+    /// The attestation certificate chain, of size
+    /// `ffi::bindings::NVML_GPU_ATTESTATION_CERT_CHAIN_SIZE == 5120`.
+    pub attestation_cert_chain: Vec<u8>,
+}
+
+/// Returned from `Device.confidential_compute_gpu_attestation_report_bytes()`
+#[derive(Debug, Clone, Eq, PartialEq, Hash)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+pub struct ConfidentialComputeGpuAttestationReport {
+    /// The size of the attestation report.
+    pub attestation_report_size: u32,
+    /// The attestation report, of size
+    /// `ffi::bindings::NVML_CC_GPU_ATTESTATION_REPORT_SIZE == 8192`.
+    pub attestation_report: Vec<u8>,
+    /// Whether the CEC attestation report is present.
+    pub is_cec_attestation_report_present: bool,
+    /// The size of the CEC attestation report.
+    pub cec_attestation_report_size: u32,
+    /// The CEC attestation report, of size
+    /// `ffi::bindings::NVML_CC_GPU_CEC_ATTESTATION_REPORT_SIZE == 4096`.
+    pub cec_attestation_report: Vec<u8>,
+}