diff --git a/engine/cli/utils/download_progress.cc b/engine/cli/utils/download_progress.cc index 37920e516..07f91adb4 100644 --- a/engine/cli/utils/download_progress.cc +++ b/engine/cli/utils/download_progress.cc @@ -17,10 +17,6 @@ namespace { std::string Repo2Engine(const std::string& r) { if (r == kLlamaRepo) { return kLlamaEngine; - } else if (r == kOnnxRepo) { - return kOnnxEngine; - } else if (r == kTrtLlmRepo) { - return kTrtLlmEngine; } return r; }; diff --git a/engine/controllers/engines.cc b/engine/controllers/engines.cc index 257aaaf2b..1934df3aa 100644 --- a/engine/controllers/engines.cc +++ b/engine/controllers/engines.cc @@ -13,10 +13,6 @@ namespace { std::string NormalizeEngine(const std::string& engine) { if (engine == kLlamaEngine) { return kLlamaRepo; - } else if (engine == kOnnxEngine) { - return kOnnxRepo; - } else if (engine == kTrtLlmEngine) { - return kTrtLlmRepo; } return engine; }; diff --git a/engine/controllers/models.cc b/engine/controllers/models.cc index fb9cc7e22..ac1f55d8f 100644 --- a/engine/controllers/models.cc +++ b/engine/controllers/models.cc @@ -297,9 +297,7 @@ void Models::GetModel(const HttpRequestPtr& req, fs::path(model_entry.value().path_to_model_yaml)) .string()); auto model_config = yaml_handler.GetModelConfig(); - if (model_config.engine == kOnnxEngine || - model_config.engine == kLlamaEngine || - model_config.engine == kTrtLlmEngine) { + if (model_config.engine == kLlamaEngine) { auto ret = model_config.ToJsonString(); auto resp = cortex_utils::CreateCortexHttpTextAsJsonResponse(ret); resp->setStatusCode(drogon::k200OK); @@ -379,9 +377,7 @@ void Models::UpdateModel(const HttpRequestPtr& req, yaml_handler.ModelConfigFromFile(yaml_fp.string()); config::ModelConfig model_config = yaml_handler.GetModelConfig(); std::string message; - if (model_config.engine == kOnnxEngine || - model_config.engine == kLlamaEngine || - model_config.engine == kTrtLlmEngine) { + if (model_config.engine == kLlamaEngine) { model_config.FromJson(json_body); yaml_handler.UpdateModelConfig(model_config); yaml_handler.WriteYamlFile(yaml_fp.string()); diff --git a/engine/services/engine_service.cc b/engine/services/engine_service.cc index 6938a3005..93cd8605c 100644 --- a/engine/services/engine_service.cc +++ b/engine/services/engine_service.cc @@ -27,19 +27,16 @@ namespace { std::string GetSuitableCudaVersion(const std::string& engine, const std::string& cuda_driver_version) { auto suitable_toolkit_version = ""; - if (engine == kTrtLlmRepo || engine == kTrtLlmEngine) { - // for tensorrt-llm, we need to download cuda toolkit v12.4 - suitable_toolkit_version = "12.4"; - } else { - // llamacpp - auto cuda_driver_semver = - semantic_version_utils::SplitVersion(cuda_driver_version); - if (cuda_driver_semver.major == 11) { - suitable_toolkit_version = "11.7"; - } else if (cuda_driver_semver.major == 12) { - suitable_toolkit_version = "12.0"; - } + + // llamacpp + auto cuda_driver_semver = + semantic_version_utils::SplitVersion(cuda_driver_version); + if (cuda_driver_semver.major == 11) { + suitable_toolkit_version = "11.7"; + } else if (cuda_driver_semver.major == 12) { + suitable_toolkit_version = "12.0"; } + return suitable_toolkit_version; } @@ -47,10 +44,6 @@ std::string GetSuitableCudaVersion(const std::string& engine, std::string NormalizeEngine(const std::string& engine) { if (engine == kLlamaEngine) { return kLlamaRepo; - } else if (engine == kOnnxEngine) { - return kOnnxRepo; - } else if (engine == kTrtLlmEngine) { - return kTrtLlmRepo; } return engine; }; @@ -58,10 +51,6 @@ std::string NormalizeEngine(const std::string& engine) { std::string Repo2Engine(const std::string& r) { if (r == kLlamaRepo) { return kLlamaEngine; - } else if (r == kOnnxRepo) { - return kOnnxEngine; - } else if (r == kTrtLlmRepo) { - return kTrtLlmEngine; } return r; }; @@ -69,10 +58,6 @@ std::string Repo2Engine(const std::string& r) { std::string GetEnginePath(std::string_view e) { if (e == kLlamaRepo) { return kLlamaLibPath; - } else if (e == kOnnxRepo) { - return kOnnxLibPath; - } else if (e == kTrtLlmRepo) { - return kTensorrtLlmPath; } return kLlamaLibPath; }; @@ -85,13 +70,6 @@ cpp::result EngineService::InstallEngineAsync( CTL_INF("InstallEngineAsync: " << ne << ", " << version << ", " << variant_name.value_or("")); auto os = hw_inf_.sys_inf->os; - if (os == kMacOs && (ne == kOnnxRepo || ne == kTrtLlmRepo)) { - return cpp::fail("Engine " + ne + " is not supported on macOS"); - } - - if (os == kLinuxOs && ne == kOnnxRepo) { - return cpp::fail("Engine " + ne + " is not supported on Linux"); - } auto result = DownloadEngine(ne, version, variant_name); if (result.has_error()) { @@ -386,9 +364,8 @@ cpp::result EngineService::DownloadEngine( cpp::result EngineService::DownloadCuda( const std::string& engine, bool async) { - if (hw_inf_.sys_inf->os == "mac" || engine == kOnnxRepo || - engine == kOnnxEngine) { - // mac and onnx engine does not require cuda toolkit + if (hw_inf_.sys_inf->os == "mac") { + // mac does not require cuda toolkit return true; } @@ -453,13 +430,7 @@ cpp::result EngineService::DownloadCuda( std::string EngineService::GetMatchedVariant( const std::string& engine, const std::vector& variants) { std::string matched_variant; - if (engine == kTrtLlmRepo || engine == kTrtLlmEngine) { - matched_variant = engine_matcher_utils::ValidateTensorrtLlm( - variants, hw_inf_.sys_inf->os, hw_inf_.cuda_driver_version); - } else if (engine == kOnnxRepo || engine == kOnnxEngine) { - matched_variant = engine_matcher_utils::ValidateOnnx( - variants, hw_inf_.sys_inf->os, hw_inf_.sys_inf->arch); - } else if (engine == kLlamaRepo || engine == kLlamaEngine) { + if (engine == kLlamaRepo || engine == kLlamaEngine) { auto suitable_avx = engine_matcher_utils::GetSuitableAvxVariant(hw_inf_.cpu_inf); matched_variant = engine_matcher_utils::Validate( @@ -638,13 +609,6 @@ cpp::result, std::string> EngineService::GetInstalledEngineVariants(const std::string& engine) const { auto ne = NormalizeEngine(engine); auto os = hw_inf_.sys_inf->os; - if (os == kMacOs && (ne == kOnnxRepo || ne == kTrtLlmRepo)) { - return cpp::fail("Engine " + engine + " is not supported on macOS"); - } - - if (os == kLinuxOs && ne == kOnnxRepo) { - return cpp::fail("Engine " + engine + " is not supported on Linux"); - } auto engines_variants_dir = file_manager_utils::GetEnginesContainerPath() / ne; @@ -954,13 +918,7 @@ cpp::result EngineService::IsEngineReady( } auto os = hw_inf_.sys_inf->os; - if (os == kMacOs && (ne == kOnnxRepo || ne == kTrtLlmRepo)) { - return cpp::fail("Engine " + engine + " is not supported on macOS"); - } - if (os == kLinuxOs && ne == kOnnxRepo) { - return cpp::fail("Engine " + engine + " is not supported on Linux"); - } auto installed_variants = GetInstalledEngineVariants(engine); if (installed_variants.has_error()) { return cpp::fail(installed_variants.error()); diff --git a/engine/utils/engine_constants.h b/engine/utils/engine_constants.h index 9392ede35..35368c519 100644 --- a/engine/utils/engine_constants.h +++ b/engine/utils/engine_constants.h @@ -1,9 +1,6 @@ #pragma once -constexpr const auto kOnnxEngine = "onnxruntime"; constexpr const auto kLlamaEngine = "llama-cpp"; -constexpr const auto kTrtLlmEngine = "tensorrt-llm"; - constexpr const auto kPythonEngine = "python-engine"; constexpr const auto kOpenAiEngine = "openai"; @@ -14,15 +11,11 @@ constexpr const auto kRemote = "remote"; constexpr const auto kLocal = "local"; -constexpr const auto kOnnxRepo = "cortex.onnx"; constexpr const auto kLlamaRepo = "cortex.llamacpp"; -constexpr const auto kTrtLlmRepo = "cortex.tensorrt-llm"; constexpr const auto kPythonRuntimeRepo = "cortex.python"; constexpr const auto kLlamaLibPath = "./engines/cortex.llamacpp"; constexpr const auto kPythonRuntimeLibPath = "/engines/cortex.python"; -constexpr const auto kOnnxLibPath = "/engines/cortex.onnx"; -constexpr const auto kTensorrtLlmPath = "/engines/cortex.tensorrt-llm"; // other constants constexpr auto static kHuggingFaceHost = "huggingface.co";