diff --git a/examples/.DS_Store b/examples/.DS_Store new file mode 100644 index 0000000..dc6a861 Binary files /dev/null and b/examples/.DS_Store differ diff --git a/examples/sdk/.DS_Store b/examples/sdk/.DS_Store new file mode 100644 index 0000000..3558a46 Binary files /dev/null and b/examples/sdk/.DS_Store differ diff --git a/examples/sdk/general-inference/create_private_inference_auth_bearer_token.py b/examples/sdk/general-inference/create_private_inference_auth_bearer_token.py new file mode 100644 index 0000000..9d0ec66 --- /dev/null +++ b/examples/sdk/general-inference/create_private_inference_auth_bearer_token.py @@ -0,0 +1,36 @@ +import centml +from centml.sdk.api import get_centml_client +from centml.sdk import DeploymentType, CreateInferenceDeploymentRequest, UserVaultType + + + +def main(): + with get_centml_client() as cclient: + token = cclient.get_user_vault(UserVaultType.BEARER_TOKENS) + request = CreateInferenceDeploymentRequest( + name="vllm", + cluster_id=1000, + hardware_instance_id=1000, + image_url="vllm", + port=8080, + min_scale=1, + max_scale=1, + endpoint_bearer_token=token["general-inference"], #token must exist in vault + ) + response = cclient.create_inference(request) + print("Create deployment response: ", response) + + ### Get deployment details + deployment = cclient.get_inference(response.id) + print("Deployment details: ", deployment) + + ''' + ### Pause the deployment + cclient.pause(deployment.id) + + ### Delete the deployment + cclient.delete(deployment.id) + ''' + +if __name__ == "__main__": + main() diff --git a/examples/sdk/create_inference.py b/examples/sdk/general-inference/create_private_inference_auth_cert.py similarity index 88% rename from examples/sdk/create_inference.py rename to examples/sdk/general-inference/create_private_inference_auth_cert.py index 8af4d20..b7173e4 100644 --- a/examples/sdk/create_inference.py +++ b/examples/sdk/general-inference/create_private_inference_auth_cert.py @@ -8,14 +8,14 @@ def main(): certs = cclient.get_user_vault(UserVaultType.CERTIFICATES) request = CreateInferenceDeploymentRequest( - name="nginx", + name="nginx-cert-private", cluster_id=1000, hardware_instance_id=1000, image_url="nginxinc/nginx-unprivileged", port=8080, min_scale=1, max_scale=1, - endpoint_certificate_authority=certs["my_cert"], + endpoint_certificate_authority=certs["my_cert"], #Cert must exist in vault ) response = cclient.create_inference(request) print("Create deployment response: ", response) diff --git a/examples/sdk/general-inference/create_public_inference.py b/examples/sdk/general-inference/create_public_inference.py new file mode 100644 index 0000000..05dceb4 --- /dev/null +++ b/examples/sdk/general-inference/create_public_inference.py @@ -0,0 +1,34 @@ +import centml +from centml.sdk.api import get_centml_client +from centml.sdk import DeploymentType, CreateInferenceDeploymentRequest, UserVaultType + + +def main(): + with get_centml_client() as cclient: + + request = CreateInferenceDeploymentRequest( + name="nginx", + cluster_id=1000, + hardware_instance_id=1000, + image_url="nginxinc/nginx-unprivileged", + port=8080, + min_scale=1, + max_scale=1, + ) + response = cclient.create_inference(request) + print("Create deployment response: ", response) + + ### Get deployment details + deployment = cclient.get_inference(response.id) + print("Deployment details: ", deployment) + + ''' + ### Pause the deployment + cclient.pause(deployment.id) + + ### Delete the deployment + cclient.delete(deployment.id) + ''' + +if __name__ == "__main__": + main() diff --git a/examples/sdk/llm-endpoints/basic-examples/endpoint/ENDPOINT_README.MD b/examples/sdk/llm-endpoints/basic-examples/endpoint/ENDPOINT_README.MD new file mode 100644 index 0000000..e84e579 --- /dev/null +++ b/examples/sdk/llm-endpoints/basic-examples/endpoint/ENDPOINT_README.MD @@ -0,0 +1,58 @@ +# ๐Ÿš€ CentML Endpoint Creator + +This script creates a CentML CServe V2 deployment for a given model, using either the fastest available hardware or a default hardware configuration. This is meant to be an example to help users work with the CentML SDK. + +--- + +## ๐Ÿ“„ Script: `create_endpoint.py` + +### ๐Ÿ”ง What it does + +- Fetches the fastest available CServe recipe for the specified model +- Builds a deployment request with appropriate cluster and hardware info +- Optionally modifies recipe properties (e.g. `max_num_seqs`) +- Submits the deployment via the CentML SDK +- Prints the deployment response and metadata + +--- + +## ๐Ÿงฐ Requirements + +- Python 3.8+ +- [CentML Python SDK](https://pypi.org/project/centml/) + +Install: + +```bash +pip install centml +``` +## Default behavior: + + Uses the fastest recipe from get_cserve_recipe(...) + + Falls back to hardcoded cluster ID 1001 in get_default_cserve_config(...) if needed + +You can adjust the model and deployment name here: +```python +qwen_config = get_fastest_cserve_config( + cclient, + name="qwen-fastest", + model="Qwen/Qwen2-VL-7B-Instruct" +) +``` +Or use the default config instead: + +```python +qwen_config = get_default_cserve_config( + cclient, + name="qwen-default", + model="Qwen/Qwen2-VL-7B-Instruct" +) +``` +๐Ÿงช Running the Script + +`python3 create_endpoint.py` + +๐Ÿ“ฌ Questions? + +Reach out to the CentML team or maintainers if you encounter unexpected recipe/hardware mismatches. diff --git a/examples/sdk/create_cserve.py b/examples/sdk/llm-endpoints/basic-examples/endpoint/create_endpoint.py similarity index 100% rename from examples/sdk/create_cserve.py rename to examples/sdk/llm-endpoints/basic-examples/endpoint/create_endpoint.py diff --git a/examples/sdk/llm-endpoints/centml-tools/Makefile b/examples/sdk/llm-endpoints/centml-tools/Makefile new file mode 100644 index 0000000..07f04ce --- /dev/null +++ b/examples/sdk/llm-endpoints/centml-tools/Makefile @@ -0,0 +1,23 @@ +CONFIG ?= test_config.json +NAME ?= + +PYTHON := /opt/homebrew/bin/python3.11 + + +.PHONY: help deploy delete inspect + +help: + @echo "Available commands:" + @echo " make deploy CONFIG=" + @echo " make delete CONFIG=" + @echo " make inspect NAME= or CONFIG=" + +deploy: + @echo "Using Python: $(PYTHON)" + $(PYTHON) scripts/deploy_model.py $(CONFIG) + +delete: + $(PYTHON) scripts/delete_deployment.py $(CONFIG) + +inspect: + $(PYTHON) scripts/inspect_model.py $(CONFIG) diff --git a/examples/sdk/llm-endpoints/centml-tools/README.MD b/examples/sdk/llm-endpoints/centml-tools/README.MD new file mode 100644 index 0000000..ff6a8b9 --- /dev/null +++ b/examples/sdk/llm-endpoints/centml-tools/README.MD @@ -0,0 +1,22 @@ +# ๐Ÿง  CentML Deployment Tools + +This repository provides a simple Makefile interface to deploy, inspect, and delete model deployments using the CentML SDK. + +## ๐Ÿ“ฆ Prerequisites + +- Python 3.11 (or update the `PYTHON` path in the Makefile) +- The CentML Python SDK installed: + ```bash + pip install centml +## ๐Ÿ› ๏ธ Usage +Run any of the following commands from the project root: +### ๐Ÿš€ Deploy a Model +`make deploy CONFIG=test_config.json` +Deploys a model based on the configuration in test_config.json. +### ๐Ÿ” Inspect a Model +`make inspect CONFIG="model_name" +Displays model about the deployment with name model_name. +Or, if you have the config file: +`make inspect CONFIG=test_config.json` +### Register a Deployment +WIP diff --git a/examples/sdk/llm-endpoints/centml-tools/scripts/README.md b/examples/sdk/llm-endpoints/centml-tools/scripts/README.md new file mode 100644 index 0000000..4f1dab3 --- /dev/null +++ b/examples/sdk/llm-endpoints/centml-tools/scripts/README.md @@ -0,0 +1,148 @@ +# ๐Ÿง  CentML Deployment Toolkit + +This directory contains Python utilities to manage CentML model deployments, query available hardware, and clean up resources. + +--- + +## ๐Ÿ“ฆ Tools Overview + +| Script | Description | +|-------------------------|-----------------------------------------------------------------------------| +| `deploy_model.py` | Creates or updates a model deployment from a config JSON | +| `delete_deployment.py` | Deletes a deployment by name using a config JSON | +| `inspect_model.py` | Lists all available hardware and deployment recipes for a given model | +| `create_endpoint.py` | Fetches the fastest CServe recipe for the specified model and deploys it | + + +## ๐Ÿš€ Deployment Script + +### ๐Ÿ“„ File: `deploy_model.py` + +This script deploys a model to CentML using the CServe V2 API. It will: + +1. Load a JSON configuration file. +2. Validate cluster and hardware instance availability. +3. Check if the deployment already exists: + - If it does, it updates it. + - If it doesn't, it creates a new deployment. + +### โœ… JSON Config Example: + +```json +{ + "model": "meta-llama/Llama-3.2-3B-Instruct", + "deployment_name": "sample", + "hardware_instance_id": 1086, + "cluster_id": 1001, + "min_scale": 1, + "max_scale": 1, + "recipe": { + "model": "meta-llama/Llama-3.2-3B-Instruct", + "is_embedding_model": false, + "additional_properties": { + "tokenizer": "meta-llama/Llama-3.2-3B-Instruct", + "dtype": "auto", + "tensor_parallel_size": 1 + } + } +} + + +### Usage + +`python3 deploy_model.py ` + +## Deletion Script + +### ๐Ÿ“„ File: `delete_deployment.py` +This script deletes an existing deployment if one with the provided deployment_name exists. + +It does not raise an error if the deployment isn't found โ€” it exits cleanly. Uses the same JSON config as the deploy_model. + + +### Usage +`python3 delete_deployment.py ` + + +## ๐Ÿ” Inspect Model Script +### ๐Ÿ“„ File: `inspect_model.py` +Inspects available deployment recipes and hardware for a specific model. + +### Usage +`python3 inspect_model.py meta-llama/Llama-3.2-3B-Instruct` +The script will: + + List all recipe variants (e.g., fastest, cheapest) + + Print detailed hardware specs for each variant + + Display all available hardware options + + +## ๐Ÿ“„ File: `create_endpoint.py` + +### ๐Ÿ”ง What it does + +- Fetches the fastest available CServe recipe for the specified model +- Builds a deployment request with appropriate cluster and hardware info +- Optionally modifies recipe properties (e.g. `max_num_seqs`) +- Submits the deployment via the CentML SDK +- Prints the deployment response and metadata + +--- + +## ๐Ÿงฐ Requirements + +- Python 3.8+ +- [CentML Python SDK](https://pypi.org/project/centml/) + +Install: + +```bash +pip install centml +``` +## Default behavior: + + Uses the fastest recipe from get_cserve_recipe(...) + + Falls back to hardcoded cluster ID 1001 in get_default_cserve_config(...) if needed + +You can adjust the model and deployment name here: +```python +qwen_config = get_fastest_cserve_config( + cclient, + name="qwen-fastest", + model="Qwen/Qwen2-VL-7B-Instruct" +) +``` +Or use the default config instead: + +```python +qwen_config = get_default_cserve_config( + cclient, + name="qwen-default", + model="Qwen/Qwen2-VL-7B-Instruct" +) +``` +๐Ÿงช Running the Script + +`python3 create_endpoint.py` + + + + + +## ๐Ÿงฐ Prerequisites + +* Python 3.8+ +* CentML Python SDK +* Valid CentML credentials (e.g., via environment or local config) + +### Install Dependencies +`pip install centml` + + + +๐Ÿ“ฌ Questions? + +Reach out to the CentML team or maintainers if you encounter unexpected recipe/hardware mismatches. diff --git a/examples/sdk/llm-endpoints/centml-tools/scripts/delete_deployment.py b/examples/sdk/llm-endpoints/centml-tools/scripts/delete_deployment.py new file mode 100644 index 0000000..e674b1a --- /dev/null +++ b/examples/sdk/llm-endpoints/centml-tools/scripts/delete_deployment.py @@ -0,0 +1,56 @@ +import json +import sys +from centml.sdk.api import get_centml_client + +def load_deployment_name(config_path): + try: + with open(config_path, "r") as f: + config = json.load(f) + name = config.get("deployment_name", "").strip() + if not name: + raise ValueError("Missing 'deployment_name' in config.") + return name + except Exception as e: + print(f"โŒ Failed to read config file: {e}") + sys.exit(1) + +def delete_if_exists(cclient, deployment_name): + print(f"\n๐Ÿ“‹ Searching for deployment named: '{deployment_name}' (case-insensitive)") + + try: + deployments = cclient._api.get_deployments_deployments_get().results + except Exception as e: + print(f"โŒ Failed to list deployments: {e}") + sys.exit(1) + + matched = next( + (d for d in deployments if getattr(d, "name", "").strip().lower() == deployment_name.lower()), + None + ) + + if not matched: + print("โ„น๏ธ No matching deployment found.") + return + + print(f"๐Ÿ—‘๏ธ Deleting deployment '{matched.name}' (id={matched.id})...") + try: + cclient.delete(matched.id) + print("โœ… Deployment deleted successfully.") + except Exception as e: + print(f"โŒ Failed to delete deployment: {e}") + raise + +def main(): + if len(sys.argv) != 2: + print("Usage: python3 delete_deployment.py ") + sys.exit(1) + + config_path = sys.argv[1] + + with get_centml_client() as cclient: + deployment_name = load_deployment_name(config_path) + delete_if_exists(cclient, deployment_name) + +if __name__ == "__main__": + main() + diff --git a/examples/sdk/llm-endpoints/centml-tools/scripts/deploy_model.py b/examples/sdk/llm-endpoints/centml-tools/scripts/deploy_model.py new file mode 100644 index 0000000..4880cce --- /dev/null +++ b/examples/sdk/llm-endpoints/centml-tools/scripts/deploy_model.py @@ -0,0 +1,208 @@ +import json +import os +import sys +from centml.sdk.api import get_centml_client +from centml.sdk import CreateCServeV2DeploymentRequest, CServeV2Recipe +from platform_api_python_client.exceptions import BadRequestException + +def list_prebuilt_recipes(model_name, cclient): + print(f"\n๐Ÿ” Looking up prebuilt recipes for model: {model_name}") + recipes = cclient.get_cserve_recipe(model=model_name) + labeled = [] + + for variant in recipes: + for label in dir(variant): + if not label.startswith("_"): + config = getattr(variant, label, None) + if config and hasattr(config, "recipe"): + print(f"{len(labeled)}. {label} - hardware ID {config.hardware_instance_id}") + labeled.append(config) + return labeled + +def write_template_file(filename, config_data): + with open(filename, "w") as f: + json.dump(config_data, f, indent=2) + print(f"๐Ÿ“ Template written to {filename}") + +def validate_hardware_and_cluster(config, cclient): + hardware_instances = cclient.get_hardware_instances() + for h in hardware_instances: + if h.id == config["hardware_instance_id"] and h.cluster_id == config["cluster_id"]: + return True + return False + +def load_or_create_config(filename, cclient): + if not os.path.exists(filename): + print(f"โŒ Config file {filename} not found.") + template = { + "model": "", + "deployment_name": "", + "hardware_instance_id": 0, + "cluster_id": 0, + "min_scale": 1, + "max_scale": 1, + "recipe": { + "model": "", + "is_embedding_model": False, + "additional_properties": { + "revision": None, + "seed": 0, + "dtype": "auto", + "tokenizer": "", + "block_size": 32, + "swap_space": 0, + "download_dir": None, + "gpu_mem_util": 0.9, + "max_num_seqs": 1024, + "quantization": None, + "max_model_len": None, + "tokenizer_mode": "auto", + "use_flashinfer": True, + "eager_execution": False, + "engine_managers": ["localhost:6061"], + "dist_init_method": None, + "num_scheduler_steps": 1, + "tensor_parallel_size": 1, + "environment_variables": { + "apply": { + "NCCL_SHM_DISABLE": 1 + } + }, + "max_seq_len_to_capture": None, + "pipeline_parallel_size": 1, + "distributed_executor_backend": "uni" + } + } + } + write_template_file(filename, template) + use_prebuilt = input("Would you like to use a prebuilt recipe instead? (y/n): ").strip().lower() + if use_prebuilt == "y": + model = input("Enter the model name: ").strip() + options = list_prebuilt_recipes(model, cclient) + if not options: + print("โš ๏ธ No prebuilt recipes found. Please edit the template and try again.") + sys.exit(1) + index = int(input("Select a recipe by number: ").strip()) + selected = options[index] + config_data = { + "model": selected.recipe.model, + "deployment_name": input("Enter a deployment name (max 20 chars): ").strip(), + "hardware_instance_id": selected.hardware_instance_id, + "cluster_id": cclient.get_cluster_id(selected.hardware_instance_id), + "min_scale": 0, + "max_scale": 1, + "recipe": json.loads(selected.recipe.model_dump_json()) + } + write_template_file(filename, config_data) + return config_data + else: + print("๐Ÿ“ Please edit the generated template with the correct values and re-run.") + sys.exit(1) + + with open(filename, "r") as f: + try: + config = json.load(f) + except json.JSONDecodeError: + print("โŒ Invalid JSON. Please fix the file and try again.") + sys.exit(1) + + required_fields = ("model", "deployment_name", "cluster_id", "hardware_instance_id", "min_scale", "max_scale") + if not all(k in config and config[k] is not None for k in required_fields): + print("โš ๏ธ Missing required fields in config. Please edit the file and try again.") + sys.exit(1) + + if not isinstance(config["min_scale"], int) or not isinstance(config["max_scale"], int): + print("โŒ min_scale and max_scale must be integers.") + sys.exit(1) + + if not validate_hardware_and_cluster(config, cclient): + print("โŒ Invalid hardware_instance_id and cluster_id combination.") + print("\n๐Ÿ“‹ Available Hardware Instances:") + for h in cclient.get_hardware_instances(): + print( + f"id={h.id} name='{h.name}' gpu_type='{h.gpu_type}' num_gpu={h.num_gpu} " + f"cpu={h.cpu} memory={h.memory} cost_per_hr={h.cost_per_hr} " + f"cluster_id={h.cluster_id} provider='{h.provider}' " + f"num_accelerators={h.num_accelerators} accelerator_memory={h.accelerator_memory}" + ) + + print("\n๐Ÿค– Suggested valid options from prebuilt recipes:") + try: + prebuilt_options = cclient.get_cserve_recipe(model=config["model"]) + for variant in prebuilt_options: + for label in dir(variant): + if not label.startswith("_"): + option = getattr(variant, label, None) + if option and hasattr(option, "hardware_instance_id"): + cluster_id = cclient.get_cluster_id(option.hardware_instance_id) + print(f" - hardware_instance_id: {option.hardware_instance_id}, cluster_id: {cluster_id} ({label})") + except Exception as e: + print("โš ๏ธ Could not retrieve prebuilt recipes for suggestions.") + + sys.exit(1) + + return config + +def deploy_model(config, cclient): + print(f"\n๐Ÿš€ Deploying model: {config['model']} as '{config['deployment_name']}'") + + request = CreateCServeV2DeploymentRequest( + name=config["deployment_name"], + cluster_id=config["cluster_id"], + hardware_instance_id=config["hardware_instance_id"], + recipe=CServeV2Recipe(**config["recipe"]), + min_scale=config.get("min_scale", 1), + max_scale=config.get("max_scale", 1), + env_vars={} + ) + + print("\n๐Ÿ“‹ Checking all existing deployments (all types)...") + try: + deployments = cclient._api.get_deployments_deployments_get().results + except Exception as e: + print(f"โŒ Failed to fetch deployments: {e}") + sys.exit(1) + + print("\n๐Ÿ“‹ Raw deployments returned:") + for d in deployments: + print(f" - name: '{getattr(d, 'name', '')}' (id={getattr(d, 'id', '')}, type={getattr(d, 'type', '')})") + + target = config["deployment_name"].strip().lower() + matched = next( + (d for d in deployments if getattr(d, "name", "").strip().lower() == target), + None + ) + + if matched: + print(f"\n๐Ÿ”„ Deployment '{config['deployment_name']}' exists (ID: {matched.id}). Proceeding with update...") + try: + response = cclient.update_cserve(matched.id, request) + print("โœ… Deployment updated successfully.") + except Exception as e: + print(f"โŒ Failed to update deployment: {e}") + raise + else: + print(f"\n๐Ÿ†• Deployment '{config['deployment_name']}' not found. Proceeding with creation...") + try: + response = cclient.create_cserve(request) + print("โœ… Deployment created successfully.") + except Exception as e: + print(f"โŒ Failed to create deployment: {e}") + raise + + print("๐Ÿ“ฆ Deployment ID:", response.id) + print("๐Ÿ”ง You can now monitor or manage your deployment using the CentML dashboard or CLI.") + +def main(): + if len(sys.argv) < 2: + print("๐Ÿ“ No config file provided. Defaulting to 'config.json'...") + config_file = "config.json" + else: + config_file = sys.argv[1] + + with get_centml_client() as cclient: + config = load_or_create_config(config_file, cclient) + deploy_model(config, cclient) + +if __name__ == "__main__": + main() diff --git a/examples/sdk/llm-endpoints/centml-tools/scripts/inspect_model.py b/examples/sdk/llm-endpoints/centml-tools/scripts/inspect_model.py new file mode 100644 index 0000000..09e60f6 --- /dev/null +++ b/examples/sdk/llm-endpoints/centml-tools/scripts/inspect_model.py @@ -0,0 +1,90 @@ +import sys +import json +import os +from centml.sdk.api import get_centml_client +from centml.sdk import CServeV2Recipe + + +def print_hardware_details(cclient, instance_id): + hardware = next((h for h in cclient.get_hardware_instances() if h.id == instance_id), None) + if hardware: + print(f" ๐Ÿง  Hardware ID {hardware.id}:") + print( + f" name='{hardware.name}' gpu_type='{hardware.gpu_type}' num_gpu={hardware.num_gpu} " + f"cpu={hardware.cpu} memory={hardware.memory} cost_per_hr={hardware.cost_per_hr} " + f"cluster_id={hardware.cluster_id} provider='{hardware.provider}' " + f"num_accelerators={hardware.num_accelerators} accelerator_memory={hardware.accelerator_memory}" + ) + else: + print(f" โš ๏ธ Hardware ID {instance_id} not found.") + + +def print_all_config_variants(recipe_variant, cclient): + for label in dir(recipe_variant): + if label.startswith("_"): + continue + config = getattr(recipe_variant, label, None) + if config and hasattr(config, "recipe"): + print(f"\n๐Ÿ”ง Prebuilt Configuration: {label}") + print(f" Model: {config.recipe.model}") + print(f" Hardware Instance ID: {config.hardware_instance_id}") + print_hardware_details(cclient, config.hardware_instance_id) + print(" Recipe:") + try: + print(config.recipe.model_dump()) + except AttributeError: + print(config.recipe.dict()) + + +def print_all_hardware(cclient): + print("\n๐Ÿ“‹ All Available Hardware Instances:") + for h in cclient.get_hardware_instances(): + print( + f"id={h.id} name='{h.name}' gpu_type='{h.gpu_type}' num_gpu={h.num_gpu} " + f"cpu={h.cpu} memory={h.memory} cost_per_hr={h.cost_per_hr} " + f"cluster_id={h.cluster_id} provider='{h.provider}' " + f"num_accelerators={h.num_accelerators} accelerator_memory={h.accelerator_memory}" + ) + + +def main(): + model_name = None + config_path = None + + for arg in sys.argv[1:]: + if arg.startswith("CONFIG="): + config_path = arg.split("=", 1)[1] + elif arg.endswith(".json") and os.path.isfile(arg): + config_path = arg + elif os.path.isfile(arg): + config_path = arg + else: + model_name = arg # fallback to direct model name + + if config_path: + try: + with open(config_path, "r") as f: + config = json.load(f) + model_name = config.get("model") + except Exception as e: + print(f"โŒ Failed to read config: {e}") + sys.exit(1) + + if not model_name: + print("โŒ Usage: python3 inspect_model.py or CONFIG=") + sys.exit(1) + + with get_centml_client() as cclient: + print(f"\n๐Ÿ” Inspecting model: {model_name}") + recipes = cclient.get_cserve_recipe(model=model_name) + + for variant in recipes: + print_all_config_variants(variant, cclient) + + print_all_hardware(cclient) + + + + +if __name__ == "__main__": + main() diff --git a/examples/sdk/serverless/call_serverless.py b/examples/sdk/serverless/call_serverless.py new file mode 100644 index 0000000..b34f733 --- /dev/null +++ b/examples/sdk/serverless/call_serverless.py @@ -0,0 +1,27 @@ +from openai import OpenAI + +client = OpenAI( + api_key="Your serverless API key", + base_url="https://api.centml.com/openai/v1", +) + +# Define your question +user_question = "How does CentML improve your AIOps?" + +completion = client.chat.completions.create( + model="meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": user_question} # Add the user's question + ], + max_tokens=2000, + temperature=0.7, + top_p=1, + n=1, + stream=False, + frequency_penalty=0, + presence_penalty=0.5, + stop=[] +) + +print(completion.choices[0].message) \ No newline at end of file