diff --git a/examples/.DS_Store b/examples/.DS_Store
new file mode 100644
index 0000000..dc6a861
Binary files /dev/null and b/examples/.DS_Store differ
diff --git a/examples/sdk/.DS_Store b/examples/sdk/.DS_Store
new file mode 100644
index 0000000..3558a46
Binary files /dev/null and b/examples/sdk/.DS_Store differ
diff --git a/examples/sdk/general-inference/create_private_inference_auth_bearer_token.py b/examples/sdk/general-inference/create_private_inference_auth_bearer_token.py
new file mode 100644
index 0000000..9d0ec66
--- /dev/null
+++ b/examples/sdk/general-inference/create_private_inference_auth_bearer_token.py
@@ -0,0 +1,36 @@
+import centml
+from centml.sdk.api import get_centml_client
+from centml.sdk import DeploymentType, CreateInferenceDeploymentRequest, UserVaultType
+
+
+
+def main():
+    with get_centml_client() as cclient:
+        token = cclient.get_user_vault(UserVaultType.BEARER_TOKENS)
+        request = CreateInferenceDeploymentRequest(
+            name="vllm",
+            cluster_id=1000,
+            hardware_instance_id=1000,
+            image_url="vllm",
+            port=8080,
+            min_scale=1,
+            max_scale=1,
+            endpoint_bearer_token=token["general-inference"], #token must exist in vault
+        )
+        response = cclient.create_inference(request)
+        print("Create deployment response: ", response)
+
+        ### Get deployment details
+        deployment = cclient.get_inference(response.id)
+        print("Deployment details: ", deployment)
+
+        '''
+        ### Pause the deployment
+        cclient.pause(deployment.id)
+
+        ### Delete the deployment
+        cclient.delete(deployment.id)
+        '''
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/sdk/create_inference.py b/examples/sdk/general-inference/create_private_inference_auth_cert.py
similarity index 88%
rename from examples/sdk/create_inference.py
rename to examples/sdk/general-inference/create_private_inference_auth_cert.py
index 8af4d20..b7173e4 100644
--- a/examples/sdk/create_inference.py
+++ b/examples/sdk/general-inference/create_private_inference_auth_cert.py
@@ -8,14 +8,14 @@ def main():
         certs = cclient.get_user_vault(UserVaultType.CERTIFICATES)
 
         request = CreateInferenceDeploymentRequest(
-            name="nginx",
+            name="nginx-cert-private",
             cluster_id=1000,
             hardware_instance_id=1000,
             image_url="nginxinc/nginx-unprivileged",
             port=8080,
             min_scale=1,
             max_scale=1,
-            endpoint_certificate_authority=certs["my_cert"],
+            endpoint_certificate_authority=certs["my_cert"], #Cert must exist in vault
         )
         response = cclient.create_inference(request)
         print("Create deployment response: ", response)
diff --git a/examples/sdk/general-inference/create_public_inference.py b/examples/sdk/general-inference/create_public_inference.py
new file mode 100644
index 0000000..05dceb4
--- /dev/null
+++ b/examples/sdk/general-inference/create_public_inference.py
@@ -0,0 +1,34 @@
+import centml
+from centml.sdk.api import get_centml_client
+from centml.sdk import DeploymentType, CreateInferenceDeploymentRequest, UserVaultType
+
+
+def main():
+    with get_centml_client() as cclient:
+       
+        request = CreateInferenceDeploymentRequest(
+            name="nginx",
+            cluster_id=1000,
+            hardware_instance_id=1000,
+            image_url="nginxinc/nginx-unprivileged",
+            port=8080,
+            min_scale=1,
+            max_scale=1,
+        )
+        response = cclient.create_inference(request)
+        print("Create deployment response: ", response)
+
+        ### Get deployment details
+        deployment = cclient.get_inference(response.id)
+        print("Deployment details: ", deployment)
+
+        '''
+        ### Pause the deployment
+        cclient.pause(deployment.id)
+
+        ### Delete the deployment
+        cclient.delete(deployment.id)
+        '''
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/sdk/llm-endpoints/basic-examples/endpoint/ENDPOINT_README.MD b/examples/sdk/llm-endpoints/basic-examples/endpoint/ENDPOINT_README.MD
new file mode 100644
index 0000000..e84e579
--- /dev/null
+++ b/examples/sdk/llm-endpoints/basic-examples/endpoint/ENDPOINT_README.MD
@@ -0,0 +1,58 @@
+# 🚀 CentML Endpoint Creator
+
+This script creates a CentML CServe V2 deployment for a given model, using either the fastest available hardware or a default hardware configuration. This is meant to be an example to help users work with the CentML SDK. 
+
+---
+
+## 📄 Script: `create_endpoint.py`
+
+### 🔧 What it does
+
+- Fetches the fastest available CServe recipe for the specified model
+- Builds a deployment request with appropriate cluster and hardware info
+- Optionally modifies recipe properties (e.g. `max_num_seqs`)
+- Submits the deployment via the CentML SDK
+- Prints the deployment response and metadata
+
+---
+
+## 🧰 Requirements
+
+- Python 3.8+
+- [CentML Python SDK](https://pypi.org/project/centml/)
+
+Install:
+
+```bash
+pip install centml
+```
+## Default behavior:
+
+    Uses the fastest recipe from get_cserve_recipe(...)
+
+    Falls back to hardcoded cluster ID 1001 in get_default_cserve_config(...) if needed
+
+You can adjust the model and deployment name here:
+```python
+qwen_config = get_fastest_cserve_config(
+    cclient,
+    name="qwen-fastest",
+    model="Qwen/Qwen2-VL-7B-Instruct"
+)
+```
+Or use the default config instead:
+
+```python
+qwen_config = get_default_cserve_config(
+    cclient,
+    name="qwen-default",
+    model="Qwen/Qwen2-VL-7B-Instruct"
+)
+```
+🧪 Running the Script
+
+`python3 create_endpoint.py`
+
+📬 Questions?
+
+Reach out to the CentML team or maintainers if you encounter unexpected recipe/hardware mismatches.
diff --git a/examples/sdk/create_cserve.py b/examples/sdk/llm-endpoints/basic-examples/endpoint/create_endpoint.py
similarity index 100%
rename from examples/sdk/create_cserve.py
rename to examples/sdk/llm-endpoints/basic-examples/endpoint/create_endpoint.py
diff --git a/examples/sdk/llm-endpoints/centml-tools/Makefile b/examples/sdk/llm-endpoints/centml-tools/Makefile
new file mode 100644
index 0000000..07f04ce
--- /dev/null
+++ b/examples/sdk/llm-endpoints/centml-tools/Makefile
@@ -0,0 +1,23 @@
+CONFIG ?= test_config.json
+NAME ?=
+
+PYTHON := /opt/homebrew/bin/python3.11
+
+
+.PHONY: help deploy delete inspect
+
+help:
+	@echo "Available commands:"
+	@echo "  make deploy CONFIG=<config_file.json>"
+	@echo "  make delete CONFIG=<config_file.json>"
+	@echo "  make inspect NAME=<model_name> or CONFIG=<config_file.json>"
+
+deploy:
+	@echo "Using Python: $(PYTHON)"
+	$(PYTHON) scripts/deploy_model.py $(CONFIG)
+
+delete:
+	$(PYTHON) scripts/delete_deployment.py $(CONFIG)
+
+inspect:
+	$(PYTHON) scripts/inspect_model.py $(CONFIG)
diff --git a/examples/sdk/llm-endpoints/centml-tools/README.MD b/examples/sdk/llm-endpoints/centml-tools/README.MD
new file mode 100644
index 0000000..ff6a8b9
--- /dev/null
+++ b/examples/sdk/llm-endpoints/centml-tools/README.MD
@@ -0,0 +1,22 @@
+# 🧠 CentML Deployment Tools
+
+This repository provides a simple Makefile interface to deploy, inspect, and delete model deployments using the CentML SDK.
+
+## 📦 Prerequisites
+
+- Python 3.11 (or update the `PYTHON` path in the Makefile)
+- The CentML Python SDK installed:  
+  ```bash
+  pip install centml
+## 🛠️ Usage
+Run any of the following commands from the project root:
+### 🚀 Deploy a Model
+`make deploy CONFIG=test_config.json` 
+Deploys a model based on the configuration in test_config.json.
+### 🔍 Inspect a Model
+`make inspect CONFIG="model_name"
+Displays model about the deployment with name model_name.
+Or, if you have the config file:
+`make inspect CONFIG=test_config.json`
+### Register a Deployment
+WIP 
diff --git a/examples/sdk/llm-endpoints/centml-tools/scripts/README.md b/examples/sdk/llm-endpoints/centml-tools/scripts/README.md
new file mode 100644
index 0000000..4f1dab3
--- /dev/null
+++ b/examples/sdk/llm-endpoints/centml-tools/scripts/README.md
@@ -0,0 +1,148 @@
+# 🧠 CentML Deployment Toolkit
+
+This directory contains Python utilities to manage CentML model deployments, query available hardware, and clean up resources.
+
+---
+
+## 📦 Tools Overview
+
+| Script                  | Description                                                                 |
+|-------------------------|-----------------------------------------------------------------------------|
+| `deploy_model.py`       | Creates or updates a model deployment from a config JSON                   |
+| `delete_deployment.py`  | Deletes a deployment by name using a config JSON                           |
+| `inspect_model.py`      | Lists all available hardware and deployment recipes for a given model      |
+| `create_endpoint.py`    | Fetches the fastest  CServe recipe for the specified model and deploys it  |
+
+
+## 🚀 Deployment Script
+
+### 📄 File: `deploy_model.py`
+
+This script deploys a model to CentML using the CServe V2 API. It will:
+
+1. Load a JSON configuration file.
+2. Validate cluster and hardware instance availability.
+3. Check if the deployment already exists:
+   - If it does, it updates it.
+   - If it doesn't, it creates a new deployment.
+
+### ✅ JSON Config Example:
+
+```json
+{
+  "model": "meta-llama/Llama-3.2-3B-Instruct",
+  "deployment_name": "sample",
+  "hardware_instance_id": 1086,
+  "cluster_id": 1001,
+  "min_scale": 1,
+  "max_scale": 1,
+  "recipe": {
+    "model": "meta-llama/Llama-3.2-3B-Instruct",
+    "is_embedding_model": false,
+    "additional_properties": {
+      "tokenizer": "meta-llama/Llama-3.2-3B-Instruct",
+      "dtype": "auto",
+      "tensor_parallel_size": 1
+    }
+  }
+}
+
+
+### Usage
+
+`python3 deploy_model.py <config_file.json>`
+
+## Deletion Script
+
+### 📄 File: `delete_deployment.py` 
+This script deletes an existing deployment if one with the provided deployment_name exists.
+
+It does not raise an error if the deployment isn't found — it exits cleanly. Uses the same JSON config as the deploy_model.
+
+
+### Usage 
+`python3 delete_deployment.py <config_file.json>`
+
+
+## 🔍 Inspect Model Script
+### 📄 File: `inspect_model.py`
+Inspects available deployment recipes and hardware for a specific model.
+
+### Usage 
+`python3 inspect_model.py meta-llama/Llama-3.2-3B-Instruct`
+The script will:
+
+    List all recipe variants (e.g., fastest, cheapest)
+
+    Print detailed hardware specs for each variant
+
+    Display all available hardware options
+
+
+## 📄 File: `create_endpoint.py`
+
+### 🔧 What it does
+
+- Fetches the fastest available CServe recipe for the specified model
+- Builds a deployment request with appropriate cluster and hardware info
+- Optionally modifies recipe properties (e.g. `max_num_seqs`)
+- Submits the deployment via the CentML SDK
+- Prints the deployment response and metadata
+
+---
+
+## 🧰 Requirements
+
+- Python 3.8+
+- [CentML Python SDK](https://pypi.org/project/centml/)
+
+Install:
+
+```bash
+pip install centml
+```
+## Default behavior:
+
+    Uses the fastest recipe from get_cserve_recipe(...)
+
+    Falls back to hardcoded cluster ID 1001 in get_default_cserve_config(...) if needed
+
+You can adjust the model and deployment name here:
+```python
+qwen_config = get_fastest_cserve_config(
+    cclient,
+    name="qwen-fastest",
+    model="Qwen/Qwen2-VL-7B-Instruct"
+)
+```
+Or use the default config instead:
+
+```python
+qwen_config = get_default_cserve_config(
+    cclient,
+    name="qwen-default",
+    model="Qwen/Qwen2-VL-7B-Instruct"
+)
+```
+🧪 Running the Script
+
+`python3 create_endpoint.py`
+
+
+
+
+
+## 🧰 Prerequisites
+
+* Python 3.8+
+* CentML Python SDK
+* Valid CentML credentials (e.g., via environment or local config)
+
+### Install Dependencies
+`pip install centml`
+
+
+
+📬 Questions?
+
+Reach out to the CentML team or maintainers if you encounter unexpected recipe/hardware mismatches.
diff --git a/examples/sdk/llm-endpoints/centml-tools/scripts/delete_deployment.py b/examples/sdk/llm-endpoints/centml-tools/scripts/delete_deployment.py
new file mode 100644
index 0000000..e674b1a
--- /dev/null
+++ b/examples/sdk/llm-endpoints/centml-tools/scripts/delete_deployment.py
@@ -0,0 +1,56 @@
+import json
+import sys
+from centml.sdk.api import get_centml_client
+
+def load_deployment_name(config_path):
+    try:
+        with open(config_path, "r") as f:
+            config = json.load(f)
+            name = config.get("deployment_name", "").strip()
+            if not name:
+                raise ValueError("Missing 'deployment_name' in config.")
+            return name
+    except Exception as e:
+        print(f"❌ Failed to read config file: {e}")
+        sys.exit(1)
+
+def delete_if_exists(cclient, deployment_name):
+    print(f"\n📋 Searching for deployment named: '{deployment_name}' (case-insensitive)")
+
+    try:
+        deployments = cclient._api.get_deployments_deployments_get().results
+    except Exception as e:
+        print(f"❌ Failed to list deployments: {e}")
+        sys.exit(1)
+
+    matched = next(
+        (d for d in deployments if getattr(d, "name", "").strip().lower() == deployment_name.lower()),
+        None
+    )
+
+    if not matched:
+        print("ℹ️ No matching deployment found.")
+        return
+
+    print(f"🗑️ Deleting deployment '{matched.name}' (id={matched.id})...")
+    try:
+        cclient.delete(matched.id)
+        print("✅ Deployment deleted successfully.")
+    except Exception as e:
+        print(f"❌ Failed to delete deployment: {e}")
+        raise
+
+def main():
+    if len(sys.argv) != 2:
+        print("Usage: python3 delete_deployment.py <config_file.json>")
+        sys.exit(1)
+
+    config_path = sys.argv[1]
+
+    with get_centml_client() as cclient:
+        deployment_name = load_deployment_name(config_path)
+        delete_if_exists(cclient, deployment_name)
+
+if __name__ == "__main__":
+    main()
+
diff --git a/examples/sdk/llm-endpoints/centml-tools/scripts/deploy_model.py b/examples/sdk/llm-endpoints/centml-tools/scripts/deploy_model.py
new file mode 100644
index 0000000..4880cce
--- /dev/null
+++ b/examples/sdk/llm-endpoints/centml-tools/scripts/deploy_model.py
@@ -0,0 +1,208 @@
+import json
+import os
+import sys
+from centml.sdk.api import get_centml_client
+from centml.sdk import CreateCServeV2DeploymentRequest, CServeV2Recipe
+from platform_api_python_client.exceptions import BadRequestException
+
+def list_prebuilt_recipes(model_name, cclient):
+    print(f"\n🔍 Looking up prebuilt recipes for model: {model_name}")
+    recipes = cclient.get_cserve_recipe(model=model_name)
+    labeled = []
+
+    for variant in recipes:
+        for label in dir(variant):
+            if not label.startswith("_"):
+                config = getattr(variant, label, None)
+                if config and hasattr(config, "recipe"):
+                    print(f"{len(labeled)}. {label} - hardware ID {config.hardware_instance_id}")
+                    labeled.append(config)
+    return labeled
+
+def write_template_file(filename, config_data):
+    with open(filename, "w") as f:
+        json.dump(config_data, f, indent=2)
+    print(f"📁 Template written to {filename}")
+
+def validate_hardware_and_cluster(config, cclient):
+    hardware_instances = cclient.get_hardware_instances()
+    for h in hardware_instances:
+        if h.id == config["hardware_instance_id"] and h.cluster_id == config["cluster_id"]:
+            return True
+    return False
+
+def load_or_create_config(filename, cclient):
+    if not os.path.exists(filename):
+        print(f"❌ Config file {filename} not found.")
+        template = {
+            "model": "",
+            "deployment_name": "",
+            "hardware_instance_id": 0,
+            "cluster_id": 0,
+            "min_scale": 1,
+            "max_scale": 1,
+            "recipe": {
+                "model": "",
+                "is_embedding_model": False,
+                "additional_properties": {
+                    "revision": None,
+                    "seed": 0,
+                    "dtype": "auto",
+                    "tokenizer": "",
+                    "block_size": 32,
+                    "swap_space": 0,
+                    "download_dir": None,
+                    "gpu_mem_util": 0.9,
+                    "max_num_seqs": 1024,
+                    "quantization": None,
+                    "max_model_len": None,
+                    "tokenizer_mode": "auto",
+                    "use_flashinfer": True,
+                    "eager_execution": False,
+                    "engine_managers": ["localhost:6061"],
+                    "dist_init_method": None,
+                    "num_scheduler_steps": 1,
+                    "tensor_parallel_size": 1,
+                    "environment_variables": {
+                        "apply": {
+                            "NCCL_SHM_DISABLE": 1
+                        }
+                    },
+                    "max_seq_len_to_capture": None,
+                    "pipeline_parallel_size": 1,
+                    "distributed_executor_backend": "uni"
+                }
+            }
+        }
+        write_template_file(filename, template)
+        use_prebuilt = input("Would you like to use a prebuilt recipe instead? (y/n): ").strip().lower()
+        if use_prebuilt == "y":
+            model = input("Enter the model name: ").strip()
+            options = list_prebuilt_recipes(model, cclient)
+            if not options:
+                print("⚠️ No prebuilt recipes found. Please edit the template and try again.")
+                sys.exit(1)
+            index = int(input("Select a recipe by number: ").strip())
+            selected = options[index]
+            config_data = {
+                "model": selected.recipe.model,
+                "deployment_name": input("Enter a deployment name (max 20 chars): ").strip(),
+                "hardware_instance_id": selected.hardware_instance_id,
+                "cluster_id": cclient.get_cluster_id(selected.hardware_instance_id),
+                "min_scale": 0,
+                "max_scale": 1,
+                "recipe": json.loads(selected.recipe.model_dump_json())
+            }
+            write_template_file(filename, config_data)
+            return config_data
+        else:
+            print("📝 Please edit the generated template with the correct values and re-run.")
+            sys.exit(1)
+
+    with open(filename, "r") as f:
+        try:
+            config = json.load(f)
+        except json.JSONDecodeError:
+            print("❌ Invalid JSON. Please fix the file and try again.")
+            sys.exit(1)
+
+    required_fields = ("model", "deployment_name", "cluster_id", "hardware_instance_id", "min_scale", "max_scale")
+    if not all(k in config and config[k] is not None for k in required_fields):
+        print("⚠️ Missing required fields in config. Please edit the file and try again.")
+        sys.exit(1)
+
+    if not isinstance(config["min_scale"], int) or not isinstance(config["max_scale"], int):
+        print("❌ min_scale and max_scale must be integers.")
+        sys.exit(1)
+
+    if not validate_hardware_and_cluster(config, cclient):
+        print("❌ Invalid hardware_instance_id and cluster_id combination.")
+        print("\n📋 Available Hardware Instances:")
+        for h in cclient.get_hardware_instances():
+            print(
+                f"id={h.id} name='{h.name}' gpu_type='{h.gpu_type}' num_gpu={h.num_gpu} "
+                f"cpu={h.cpu} memory={h.memory} cost_per_hr={h.cost_per_hr} "
+                f"cluster_id={h.cluster_id} provider='{h.provider}' "
+                f"num_accelerators={h.num_accelerators} accelerator_memory={h.accelerator_memory}"
+            )
+
+        print("\n🤖 Suggested valid options from prebuilt recipes:")
+        try:
+            prebuilt_options = cclient.get_cserve_recipe(model=config["model"])
+            for variant in prebuilt_options:
+                for label in dir(variant):
+                    if not label.startswith("_"):
+                        option = getattr(variant, label, None)
+                        if option and hasattr(option, "hardware_instance_id"):
+                            cluster_id = cclient.get_cluster_id(option.hardware_instance_id)
+                            print(f" - hardware_instance_id: {option.hardware_instance_id}, cluster_id: {cluster_id} ({label})")
+        except Exception as e:
+            print("⚠️ Could not retrieve prebuilt recipes for suggestions.")
+
+        sys.exit(1)
+
+    return config
+
+def deploy_model(config, cclient):
+    print(f"\n🚀 Deploying model: {config['model']} as '{config['deployment_name']}'")
+
+    request = CreateCServeV2DeploymentRequest(
+        name=config["deployment_name"],
+        cluster_id=config["cluster_id"],
+        hardware_instance_id=config["hardware_instance_id"],
+        recipe=CServeV2Recipe(**config["recipe"]),
+        min_scale=config.get("min_scale", 1),
+        max_scale=config.get("max_scale", 1),
+        env_vars={}
+    )
+
+    print("\n📋 Checking all existing deployments (all types)...")
+    try:
+        deployments = cclient._api.get_deployments_deployments_get().results
+    except Exception as e:
+        print(f"❌ Failed to fetch deployments: {e}")
+        sys.exit(1)
+
+    print("\n📋 Raw deployments returned:")
+    for d in deployments:
+        print(f"  - name: '{getattr(d, 'name', '')}' (id={getattr(d, 'id', '')}, type={getattr(d, 'type', '')})")
+
+    target = config["deployment_name"].strip().lower()
+    matched = next(
+        (d for d in deployments if getattr(d, "name", "").strip().lower() == target),
+        None
+    )
+
+    if matched:
+        print(f"\n🔄 Deployment '{config['deployment_name']}' exists (ID: {matched.id}). Proceeding with update...")
+        try:
+            response = cclient.update_cserve(matched.id, request)
+            print("✅ Deployment updated successfully.")
+        except Exception as e:
+            print(f"❌ Failed to update deployment: {e}")
+            raise
+    else:
+        print(f"\n🆕 Deployment '{config['deployment_name']}' not found. Proceeding with creation...")
+        try:
+            response = cclient.create_cserve(request)
+            print("✅ Deployment created successfully.")
+        except Exception as e:
+            print(f"❌ Failed to create deployment: {e}")
+            raise
+
+    print("📦 Deployment ID:", response.id)
+    print("🔧 You can now monitor or manage your deployment using the CentML dashboard or CLI.")
+
+def main():
+    if len(sys.argv) < 2:
+        print("📁 No config file provided. Defaulting to 'config.json'...")
+        config_file = "config.json"
+    else:
+        config_file = sys.argv[1]
+
+    with get_centml_client() as cclient:
+        config = load_or_create_config(config_file, cclient)
+        deploy_model(config, cclient)
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/sdk/llm-endpoints/centml-tools/scripts/inspect_model.py b/examples/sdk/llm-endpoints/centml-tools/scripts/inspect_model.py
new file mode 100644
index 0000000..09e60f6
--- /dev/null
+++ b/examples/sdk/llm-endpoints/centml-tools/scripts/inspect_model.py
@@ -0,0 +1,90 @@
+import sys
+import json
+import os
+from centml.sdk.api import get_centml_client
+from centml.sdk import CServeV2Recipe
+
+
+def print_hardware_details(cclient, instance_id):
+    hardware = next((h for h in cclient.get_hardware_instances() if h.id == instance_id), None)
+    if hardware:
+        print(f"  🧠 Hardware ID {hardware.id}:")
+        print(
+            f"    name='{hardware.name}' gpu_type='{hardware.gpu_type}' num_gpu={hardware.num_gpu} "
+            f"cpu={hardware.cpu} memory={hardware.memory} cost_per_hr={hardware.cost_per_hr} "
+            f"cluster_id={hardware.cluster_id} provider='{hardware.provider}' "
+            f"num_accelerators={hardware.num_accelerators} accelerator_memory={hardware.accelerator_memory}"
+        )
+    else:
+        print(f"  ⚠️ Hardware ID {instance_id} not found.")
+
+
+def print_all_config_variants(recipe_variant, cclient):
+    for label in dir(recipe_variant):
+        if label.startswith("_"):
+            continue
+        config = getattr(recipe_variant, label, None)
+        if config and hasattr(config, "recipe"):
+            print(f"\n🔧 Prebuilt Configuration: {label}")
+            print(f"  Model: {config.recipe.model}")
+            print(f"  Hardware Instance ID: {config.hardware_instance_id}")
+            print_hardware_details(cclient, config.hardware_instance_id)
+            print("  Recipe:")
+            try:
+                print(config.recipe.model_dump())
+            except AttributeError:
+                print(config.recipe.dict())
+
+
+def print_all_hardware(cclient):
+    print("\n📋 All Available Hardware Instances:")
+    for h in cclient.get_hardware_instances():
+        print(
+            f"id={h.id} name='{h.name}' gpu_type='{h.gpu_type}' num_gpu={h.num_gpu} "
+            f"cpu={h.cpu} memory={h.memory} cost_per_hr={h.cost_per_hr} "
+            f"cluster_id={h.cluster_id} provider='{h.provider}' "
+            f"num_accelerators={h.num_accelerators} accelerator_memory={h.accelerator_memory}"
+        )
+
+
+def main():
+    model_name = None
+    config_path = None
+
+    for arg in sys.argv[1:]:
+        if arg.startswith("CONFIG="):
+            config_path = arg.split("=", 1)[1]
+        elif arg.endswith(".json") and os.path.isfile(arg):
+            config_path = arg
+        elif os.path.isfile(arg):
+            config_path = arg
+        else:
+            model_name = arg  # fallback to direct model name
+
+    if config_path:
+        try:
+            with open(config_path, "r") as f:
+                config = json.load(f)
+                model_name = config.get("model")
+        except Exception as e:
+            print(f"❌ Failed to read config: {e}")
+            sys.exit(1)
+
+    if not model_name:
+        print("❌ Usage: python3 inspect_model.py <model_name> or CONFIG=<config.json>")
+        sys.exit(1)
+
+    with get_centml_client() as cclient:
+        print(f"\n🔍 Inspecting model: {model_name}")
+        recipes = cclient.get_cserve_recipe(model=model_name)
+
+        for variant in recipes:
+            print_all_config_variants(variant, cclient)
+
+        print_all_hardware(cclient)
+
+
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/sdk/serverless/call_serverless.py b/examples/sdk/serverless/call_serverless.py
new file mode 100644
index 0000000..b34f733
--- /dev/null
+++ b/examples/sdk/serverless/call_serverless.py
@@ -0,0 +1,27 @@
+from openai import OpenAI
+
+client = OpenAI(
+    api_key="Your serverless API key",
+    base_url="https://api.centml.com/openai/v1",
+)
+
+# Define your question
+user_question = "How does CentML improve your AIOps?"
+
+completion = client.chat.completions.create(
+    model="meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
+    messages=[
+        {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "user", "content": user_question}  # Add the user's question
+    ],
+    max_tokens=2000,
+    temperature=0.7,
+    top_p=1,
+    n=1,
+    stream=False,
+    frequency_penalty=0,
+    presence_penalty=0.5,
+    stop=[]
+)
+
+print(completion.choices[0].message)
\ No newline at end of file