diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 32ddf80999..0a08c0e2ed 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -145,7 +145,9 @@ set( MIOpen_Source
     rnn_api.cpp
     rnn/rnn_util.cpp
     rnn/Solutions/rnn_transformer.cpp
+    softmax.cpp
     softmax_api.cpp
+    softmax/problem_description.cpp
     solution.cpp
     solver.cpp
     solver/activ/bwd_0.cpp
@@ -253,6 +255,7 @@ set( MIOpen_Source
     solver/pooling/backwardNd.cpp
     solver/reduce/forward_argmax.cpp
     solver/reduce/forward_sum.cpp
+    solver/softmax/softmax.cpp
     subbuffers.cpp
     sum_api.cpp
     target_properties.cpp
@@ -565,7 +568,6 @@ if( MIOPEN_BACKEND MATCHES "OpenCL" OR MIOPEN_BACKEND STREQUAL "HIPOC" OR MIOPEN
         ocl/mloNorm.cpp
         ocl/pooling_ocl.cpp
         ocl/tensorocl.cpp
-        ocl/softmaxocl.cpp
         ocl/rnnocl.cpp
         ocl/utilocl.cpp
         ocl/ctcocl.cpp
diff --git a/src/include/miopen/ctc.hpp b/src/include/miopen/ctc.hpp
index 0757814e47..546c22de6f 100644
--- a/src/include/miopen/ctc.hpp
+++ b/src/include/miopen/ctc.hpp
@@ -56,7 +56,7 @@ struct CTCLossDescriptor : miopenCTCLossDescriptor
                                    const int* inputLengths,
                                    miopenCTCLossAlgo_t algo) const;
 
-    void CTCLoss(const Handle& handle,
+    void CTCLoss(Handle& handle,
                  const TensorDescriptor& probsDesc,
                  ConstData_t probs,
                  const int* labels,
diff --git a/src/include/miopen/softmax.hpp b/src/include/miopen/softmax.hpp
index 62dcb7d579..b4f8909908 100644
--- a/src/include/miopen/softmax.hpp
+++ b/src/include/miopen/softmax.hpp
@@ -34,7 +34,7 @@ namespace miopen {
 struct Handle;
 struct TensorDescriptor;
 
-miopenStatus_t SoftmaxForward(const Handle& handle,
+miopenStatus_t SoftmaxForward(Handle& handle,
                               const void* alpha,
                               const void* beta,
                               const TensorDescriptor& xDesc,
@@ -46,7 +46,7 @@ miopenStatus_t SoftmaxForward(const Handle& handle,
                               int x_offset = 0,
                               int y_offset = 0);
 
-miopenStatus_t SoftmaxBackward(const Handle& handle,
+miopenStatus_t SoftmaxBackward(Handle& handle,
                                const void* alpha,
                                const TensorDescriptor& yDesc,
                                ConstData_t y,
diff --git a/src/include/miopen/softmax/invoke_params.hpp b/src/include/miopen/softmax/invoke_params.hpp
new file mode 100644
index 0000000000..c2792929ab
--- /dev/null
+++ b/src/include/miopen/softmax/invoke_params.hpp
@@ -0,0 +1,147 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+
+#pragma once
+
+#include <miopen/invoke_params.hpp>
+#include <miopen/tensor.hpp>
+
+namespace miopen {
+namespace softmax {
+
+struct InvokeParams : public miopen::InvokeParams
+{
+    InvokeParams(const void* alpha_,
+                 const void* beta_,
+                 const TensorDescriptor& xDesc_,
+                 ConstData_t x_,
+                 const TensorDescriptor& yDesc_,
+                 Data_t y_,
+                 miopenSoftmaxAlgorithm_t algorithm_,
+                 miopenSoftmaxMode_t mode_,
+                 int x_offset_ = 0,
+                 int y_offset_ = 0)
+        : algorithm(algorithm_),
+          mode(mode_),
+
+          xdxDesc(xDesc_),
+          x(x_),
+          dx(nullptr),
+
+          yDesc(yDesc_),
+          forward_y(y_),
+          backward_y(nullptr),
+
+          dy(nullptr),
+
+          xdx_offset(x_offset_),
+          y_offset(y_offset_),
+          dy_offset(0)
+    {
+        InitializeAlphaBeta(alpha_, beta_);
+    }
+
+    InvokeParams(const void* alpha_,
+                 const void* beta_,
+                 const TensorDescriptor& yDesc_,
+                 ConstData_t y_,
+                 const TensorDescriptor& dyDesc_,
+                 ConstData_t dy_,
+                 const TensorDescriptor& dxDesc_,
+                 Data_t dx_,
+                 miopenSoftmaxAlgorithm_t algorithm_,
+                 miopenSoftmaxMode_t mode_,
+                 int y_offset_,
+                 int dy_offset_,
+                 int dx_offset_)
+        : algorithm(algorithm_),
+          mode(mode_),
+
+          xdxDesc(dxDesc_),
+          x(nullptr),
+          dx(dx_),
+
+          yDesc(yDesc_),
+          forward_y(nullptr),
+          backward_y(y_),
+
+          dyDesc(dyDesc_),
+          dy(dy_),
+
+          xdx_offset(dx_offset_),
+          y_offset(y_offset_),
+          dy_offset(dy_offset_)
+    {
+        InitializeAlphaBeta(alpha_, beta_);
+    }
+
+    std::size_t GetWorkspaceSize() const { return 0; }
+    Data_t GetWorkspace() const { return nullptr; }
+
+public:
+    float alpha;
+    float beta;
+    miopenSoftmaxAlgorithm_t algorithm;
+    miopenSoftmaxMode_t mode;
+
+    // xdxDesc is used for both forward and backward
+    TensorDescriptor xdxDesc;
+    ConstData_t x;
+    Data_t dx;
+
+    TensorDescriptor yDesc;
+    Data_t forward_y;
+    ConstData_t backward_y;
+
+    // backward specific part
+    TensorDescriptor dyDesc;
+    ConstData_t dy;
+
+    // xdx_offset is used for both forward and backward
+    int xdx_offset;
+    int y_offset;
+    int dy_offset;
+
+private:
+    void InitializeAlphaBeta(const void* alpha_, const void* beta_)
+    {
+        alpha = 0.0f;
+        beta  = 0.0f;
+
+        if(alpha_ != nullptr)
+        {
+            alpha = *(static_cast<const float*>(alpha_));
+        }
+
+        if(beta_ != nullptr)
+        {
+            beta = *(static_cast<const float*>(beta_));
+        }
+    }
+};
+
+} // namespace softmax
+} // namespace miopen
diff --git a/src/include/miopen/softmax/problem_description.hpp b/src/include/miopen/softmax/problem_description.hpp
new file mode 100644
index 0000000000..9a5e7150b4
--- /dev/null
+++ b/src/include/miopen/softmax/problem_description.hpp
@@ -0,0 +1,147 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+
+#pragma once
+
+#include <miopen/problem_description_base.hpp>
+#include <miopen/tensor.hpp>
+
+namespace miopen {
+
+struct NetworkConfig;
+
+namespace softmax {
+
+struct ProblemDescription : ProblemDescriptionBase
+{
+    // softmax forward constructor
+    ProblemDescription(const void* alpha_,
+                       const void* beta_,
+                       const TensorDescriptor& xDesc_,
+                       const TensorDescriptor& yDesc_,
+                       miopenSoftmaxAlgorithm_t algorithm_,
+                       miopenSoftmaxMode_t mode_)
+        : isForward(true),
+          xdxDesc(xDesc_),
+          yDesc(yDesc_),
+
+          algorithm(algorithm_),
+          mode(mode_)
+    {
+        CheckAndAssignAlphaBeta(alpha_, beta_);
+
+        if(xdxDesc.GetType() != yDesc.GetType())
+        {
+            MIOPEN_THROW(miopenStatusBadParm, "Tensor types do not match.");
+        }
+
+        if(xdxDesc.GetLengths() != yDesc.GetLengths())
+        {
+            MIOPEN_THROW(miopenStatusBadParm, "Tensor dimension lengths do not match.");
+        }
+    }
+
+    ProblemDescription(const void* alpha_,
+                       const void* beta_,
+                       const TensorDescriptor& yDesc_,
+                       const TensorDescriptor& dyDesc_,
+                       const TensorDescriptor& dxDesc_,
+                       miopenSoftmaxAlgorithm_t algorithm_,
+                       miopenSoftmaxMode_t mode_)
+        : isForward(false),
+          xdxDesc(dxDesc_),
+          yDesc(yDesc_),
+          dyDesc(dyDesc_),
+          algorithm(algorithm_),
+          mode(mode_)
+    {
+        CheckAndAssignAlphaBeta(alpha_, beta_);
+
+        if(yDesc != dyDesc)
+        {
+            MIOPEN_THROW(miopenStatusBadParm);
+        }
+
+        if(xdxDesc.GetType() != dyDesc.GetType())
+        {
+            MIOPEN_THROW(miopenStatusBadParm, "Tensor types do not match.");
+        }
+
+        if(xdxDesc.GetLengths() != dyDesc.GetLengths())
+        {
+            MIOPEN_THROW(miopenStatusBadParm, "Tensor dimension lengths do not match.");
+        }
+    }
+
+    bool IsForward() const { return isForward; }
+    miopenSoftmaxAlgorithm_t GetAlgorithm() const { return algorithm; }
+    miopenSoftmaxMode_t GetMode() const { return mode; }
+    float GetAlpha() const { return alpha; }
+    float GetBeta() const { return beta; }
+
+    // for forward
+    const TensorDescriptor& GetXDesc() const { return xdxDesc; }
+    const TensorDescriptor& GetYDesc() const { return yDesc; }
+
+    // for backward
+    const TensorDescriptor& GetdYDesc() const { return dyDesc; }
+    const TensorDescriptor& GetdXDesc() const { return xdxDesc; }
+
+    NetworkConfig MakeNetworkConfig() const override;
+
+private:
+    void CheckAndAssignAlphaBeta(const void* alpha_, const void* beta_)
+    {
+        if(alpha_ == nullptr)
+        {
+            MIOPEN_THROW(miopenStatusBadParm, "Alpha value is nullptr");
+        }
+
+        if(beta_ == nullptr)
+        {
+            MIOPEN_THROW(miopenStatusBadParm, "Beta value is nullptr");
+        }
+
+        alpha = *(static_cast<const float*>(alpha_));
+        beta  = *(static_cast<const float*>(beta_));
+    }
+
+    const bool isForward;
+
+    float alpha;
+    float beta;
+
+    // for forward xDesc is stored in xdxDesc, for backward dxDesc is stored in xdxDesc
+    TensorDescriptor xdxDesc;
+    TensorDescriptor yDesc;
+    TensorDescriptor dyDesc;
+
+    const miopenSoftmaxAlgorithm_t algorithm;
+    const miopenSoftmaxMode_t mode;
+};
+
+} // namespace softmax
+} // namespace miopen
diff --git a/src/include/miopen/softmax/solvers.hpp b/src/include/miopen/softmax/solvers.hpp
new file mode 100644
index 0000000000..acbcca4d72
--- /dev/null
+++ b/src/include/miopen/softmax/solvers.hpp
@@ -0,0 +1,62 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+
+#pragma once
+
+#include <miopen/solver.hpp>
+#include <miopen/softmax/problem_description.hpp>
+
+#include <utility>
+
+namespace miopen {
+
+namespace solver {
+
+namespace softmax {
+
+using SoftmaxSolver = NonTunableSolverBase<ExecutionContext, miopen::softmax::ProblemDescription>;
+
+struct Softmax final : SoftmaxSolver
+{
+    const std::string& SolverDbId() const override { return GetSolverDbId<Softmax>(); }
+
+    bool IsApplicable(const ExecutionContext& context,
+                      const miopen::softmax::ProblemDescription& problem) const override;
+
+    ConvSolution GetSolution(const ExecutionContext& context,
+                             const miopen::softmax::ProblemDescription& problem) const override;
+
+    std::size_t GetWorkspaceSize(const ExecutionContext& context,
+                                 const miopen::softmax::ProblemDescription& problem) const override;
+
+    bool MayNeedWorkspace() const override { return false; }
+};
+
+} // namespace softmax
+
+} // namespace solver
+
+} // namespace miopen
diff --git a/src/ocl/ctcocl.cpp b/src/ocl/ctcocl.cpp
index d594544403..f38c4c65d7 100644
--- a/src/ocl/ctcocl.cpp
+++ b/src/ocl/ctcocl.cpp
@@ -44,7 +44,7 @@
 
 namespace miopen {
 
-void CTCLossDescriptor::CTCLoss(const Handle& handle,
+void CTCLossDescriptor::CTCLoss(Handle& handle,
                                 const TensorDescriptor& probsDesc,
                                 ConstData_t probs,
                                 const int* labels,
diff --git a/src/ocl/softmaxocl.cpp b/src/ocl/softmaxocl.cpp
deleted file mode 100644
index 38e53413e7..0000000000
--- a/src/ocl/softmaxocl.cpp
+++ /dev/null
@@ -1,613 +0,0 @@
-/*******************************************************************************
- *
- * MIT License
- *
- * Copyright (c) 2017 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- *******************************************************************************/
-#include <miopen/kernel_cache.hpp>
-#include <miopen/softmax.hpp>
-#include <miopen/float_equal.hpp>
-#include <miopen/check_numerics.hpp>
-#include <miopen/tensor.hpp>
-
-namespace miopen {
-
-int nextPow2(int v)
-{
-
-    if(v == 1)
-    {
-        return (v << 1);
-    }
-    else
-    {
-        v--;
-        v |= v >> 1;
-        v |= v >> 2;
-        v |= v >> 4;
-        v |= v >> 8;
-        v |= v >> 16;
-        v++;
-        return v;
-    }
-}
-
-miopenStatus_t SoftmaxForward(const Handle& handle,
-                              const void* alpha,
-                              const void* beta,
-                              const TensorDescriptor& xDesc,
-                              ConstData_t x,
-                              const TensorDescriptor& yDesc,
-                              Data_t y,
-                              miopenSoftmaxAlgorithm_t algorithm,
-                              miopenSoftmaxMode_t mode,
-                              int x_offset,
-                              int y_offset)
-{
-    if(x == nullptr || y == nullptr)
-    {
-        MIOPEN_THROW(miopenStatusBadParm, "Null pointer for tensor.");
-    }
-
-    if(xDesc.GetType() != yDesc.GetType())
-    {
-        MIOPEN_THROW(miopenStatusBadParm, "Tensor types do not match.");
-    }
-
-    if(xDesc.GetLengths() != yDesc.GetLengths())
-    {
-        MIOPEN_THROW(miopenStatusBadParm, "Tensor dimension lengths do not match.");
-    }
-
-    int n, c, h, w;
-    std::tie(n, c, h, w) = tien<4>(yDesc.GetLengths());
-
-    int in_nstr, in_cstr, in_hstr;
-    std::tie(in_nstr, in_cstr, in_hstr, std::ignore) = tien<4>(xDesc.GetStrides());
-
-    int out_nstr, out_cstr, out_hstr;
-    std::tie(out_nstr, out_cstr, out_hstr, std::ignore) = tien<4>(yDesc.GetStrides());
-
-    // using workgroup size of 256 by default
-    int grid_size   = mode == MIOPEN_SOFTMAX_MODE_INSTANCE ? n : n * h * w;
-    int spatial_dim = mode == MIOPEN_SOFTMAX_MODE_INSTANCE ? 1 : h * w;
-    int vector_size = mode == MIOPEN_SOFTMAX_MODE_INSTANCE ? c * h * w : c;
-    // num_spatial_dims or pixels each workgroup can compute
-    int num_batch = vector_size < 256 ? nextPow2(256 / vector_size) : 1;
-
-    const std::vector<size_t> vld{256, 1, 1};
-
-    bool usefp16 = false;
-    bool usefp32 = true;
-    if(yDesc.GetType() == miopenHalf)
-    {
-        usefp16 = true;
-        usefp32 = false;
-    }
-
-    auto alpha_fp = *(static_cast<const float*>(alpha));
-    auto beta_fp  = *(static_cast<const float*>(beta));
-
-    // See Kernels/MIOpenSoftmax.cl for description
-    if(num_batch == 1)
-    { // CSR-Vector like approach
-
-        // Control the max. number of workgroups launched so that we do not
-        // start getting workgroup scheduling overheads
-        size_t workgroups = std::min(grid_size, 64 * 40 * 8);
-        const std::vector<size_t> vgd{workgroups * vld[0], 1, 1};
-
-        std::string algo_name = "SoftmaxForwardOneBatch";
-        std::string network_config =
-            "sfmfwd-n" + std::to_string(num_batch) + "half" +
-            std::to_string(static_cast<int>(usefp16)) + "float" +
-            std::to_string(static_cast<int>(usefp32)) + "g" + std::to_string(vgd[0]) + "l" +
-            std::to_string(vld[0]) + "dim" + std::to_string(spatial_dim) + "grid" +
-            std::to_string(grid_size) + "wg" + std::to_string(workgroups) + "v" +
-            std::to_string(vector_size) + "xpk" +
-            std::to_string(static_cast<int>(xDesc.IsPacked())) + "ypk" +
-            std::to_string(static_cast<int>(yDesc.IsPacked())) + "a" + std::to_string(alpha_fp) +
-            "b" + std::to_string(beta_fp) + "algo" + std::to_string(static_cast<int>(algorithm)) +
-            "mode" + std::to_string(static_cast<int>(mode));
-
-        auto&& kernels = handle.GetKernels(algo_name, network_config);
-
-        if(!kernels.empty())
-        {
-            kernels.front()(x,
-                            y,
-                            vector_size,
-                            grid_size,
-                            spatial_dim,
-                            h,
-                            w,
-                            in_nstr,
-                            in_cstr,
-                            in_hstr,
-                            out_nstr,
-                            out_cstr,
-                            out_hstr,
-                            x_offset,
-                            y_offset,
-                            alpha_fp,
-                            beta_fp);
-        }
-        else
-        {
-            std::string program_name = "MIOpenSoftmax.cl";
-            std::string kernel_name  = "SoftmaxForward";
-
-            // compile parameters
-            std::string parms = "-DNUM_BATCH=" + std::to_string(num_batch) +
-                                " -DMIOPEN_USE_FP16=" + std::to_string(static_cast<int>(usefp16)) +
-                                " -DMIOPEN_USE_FP32=" + std::to_string(static_cast<int>(usefp32));
-
-            if(algorithm == MIOPEN_SOFTMAX_LOG)
-                parms += " -DUSE_SOFTMAX_LOG=1";
-            else if(algorithm == MIOPEN_SOFTMAX_FAST)
-                parms += " -DUSE_SOFTMAX_FAST=1";
-            else
-                parms += " -DUSE_SOFTMAX_ACCURATE=1";
-
-            if(mode == MIOPEN_SOFTMAX_MODE_INSTANCE)
-                parms += " -DUSE_SOFTMAX_MODE_INSTANCE=1";
-            else
-                parms += " -DUSE_SOFTMAX_MODE_CHANNEL=1";
-
-            parms += " -DRUN_FORWARD=1";
-            parms += " -DIS_INPUT_PACKED=" + std::to_string(static_cast<int>(xDesc.IsPacked())) +
-                     " -DIS_OUTPUT_PACKED=" + std::to_string(static_cast<int>(yDesc.IsPacked()));
-
-            if(!float_equal(alpha_fp, 1.0))
-                parms += " -DUSE_ALPHA=1";
-
-            if(!float_equal(beta_fp, 0))
-                parms += " -DUSE_BETA=1";
-
-            handle.AddKernel(algo_name, network_config, program_name, kernel_name, vld, vgd, parms)(
-                x,
-                y,
-                vector_size,
-                grid_size,
-                spatial_dim,
-                h,
-                w,
-                in_nstr,
-                in_cstr,
-                in_hstr,
-                out_nstr,
-                out_cstr,
-                out_hstr,
-                x_offset,
-                y_offset,
-                alpha_fp,
-                beta_fp);
-        }
-    }
-    else
-    { // CSR-Stream like approach
-
-        // num_threads iterating over channels for one spatial_dim
-        int batch_size = 256 / num_batch;
-        // num_channels each threads iterates over to cover all the channels
-        int u_batch_size = (vector_size > batch_size) ? nextPow2(vector_size / batch_size) : 1;
-
-        size_t workgroups =
-            (grid_size % num_batch == 0) ? (grid_size / num_batch) : (grid_size / num_batch + 1);
-        const std::vector<size_t> vgd{workgroups * vld[0], 1, 1};
-
-        if((u_batch_size + 1) * 256 > 65536 && yDesc.GetType() == miopenHalf)
-            MIOPEN_THROW(miopenStatusBadParm, "Exceed local memory capacity");
-
-        std::string algo_name = "SoftmaxForwardMultiBatch";
-        std::string network_config =
-            "sfmfwd-n" + std::to_string(num_batch) + "half" +
-            std::to_string(static_cast<int>(usefp16)) + "float" +
-            std::to_string(static_cast<int>(usefp32)) + "g" + std::to_string(vgd[0]) + "l" +
-            std::to_string(vld[0]) + "dim" + std::to_string(spatial_dim) + "grid" +
-            std::to_string(grid_size) + "wg" + std::to_string(workgroups) + "v" +
-            std::to_string(vector_size) + "ubatch" + std::to_string(u_batch_size) + "batch" +
-            std::to_string(batch_size) + "xpk" +
-            std::to_string(static_cast<int>(xDesc.IsPacked())) + "ypk" +
-            std::to_string(static_cast<int>(yDesc.IsPacked())) + "a" + std::to_string(alpha_fp) +
-            "b" + std::to_string(beta_fp) + "algo" + std::to_string(static_cast<int>(algorithm)) +
-            "mode" + std::to_string(static_cast<int>(mode));
-
-        auto&& kernels = handle.GetKernels(algo_name, network_config);
-
-        if(!kernels.empty())
-        {
-            kernels.front()(x,
-                            y,
-                            vector_size,
-                            grid_size,
-                            spatial_dim,
-                            h,
-                            w,
-                            in_nstr,
-                            in_cstr,
-                            in_hstr,
-                            out_nstr,
-                            out_cstr,
-                            out_hstr,
-                            x_offset,
-                            y_offset,
-                            alpha_fp,
-                            beta_fp);
-        }
-        else
-        {
-            std::string program_name = "MIOpenSoftmax.cl";
-            std::string kernel_name  = "SoftmaxForward";
-            std::string parms        = "-DNUM_BATCH=" + std::to_string(num_batch) +
-                                " -DBATCH_SIZE=" + std::to_string(batch_size) +
-                                " -DU_BATCH_SIZE=" + std::to_string(u_batch_size) +
-                                " -DMIOPEN_USE_FP16=" + std::to_string(static_cast<int>(usefp16)) +
-                                " -DMIOPEN_USE_FP32=" + std::to_string(static_cast<int>(usefp32));
-
-            if(algorithm == MIOPEN_SOFTMAX_LOG)
-                parms += " -DUSE_SOFTMAX_LOG=1";
-            else if(algorithm == MIOPEN_SOFTMAX_FAST)
-                parms += " -DUSE_SOFTMAX_FAST=1";
-            else
-                parms += " -DUSE_SOFTMAX_ACCURATE=1";
-
-            if(mode == MIOPEN_SOFTMAX_MODE_INSTANCE)
-                parms += " -DUSE_SOFTMAX_MODE_INSTANCE=1";
-            else
-                parms += " -DUSE_SOFTMAX_MODE_CHANNEL=1";
-
-            parms += " -DRUN_FORWARD=1";
-            parms += " -DIS_INPUT_PACKED=" + std::to_string(static_cast<int>(xDesc.IsPacked())) +
-                     " -DIS_OUTPUT_PACKED=" + std::to_string(static_cast<int>(yDesc.IsPacked()));
-
-            if(!float_equal(alpha_fp, 1.0))
-                parms += " -DUSE_ALPHA=1";
-
-            if(!float_equal(beta_fp, 0))
-                parms += " -DUSE_BETA=1";
-
-            handle.AddKernel(algo_name, network_config, program_name, kernel_name, vld, vgd, parms)(
-                x,
-                y,
-                vector_size,
-                grid_size,
-                spatial_dim,
-                h,
-                w,
-                in_nstr,
-                in_cstr,
-                in_hstr,
-                out_nstr,
-                out_cstr,
-                out_hstr,
-                x_offset,
-                y_offset,
-                alpha_fp,
-                beta_fp);
-        }
-    }
-    if(miopen::CheckNumericsEnabled())
-    {
-        miopen::checkNumericsOutput(handle, yDesc, y);
-    }
-    return miopenStatusSuccess;
-}
-
-miopenStatus_t SoftmaxBackward(const Handle& handle,
-                               const void* alpha,
-                               const TensorDescriptor& yDesc,
-                               ConstData_t y,
-                               const TensorDescriptor& dyDesc,
-                               ConstData_t dy,
-                               const void* beta,
-                               const TensorDescriptor& dxDesc,
-                               Data_t dx,
-                               miopenSoftmaxAlgorithm_t algorithm,
-                               miopenSoftmaxMode_t mode,
-                               int y_offset,
-                               int dy_offset,
-                               int dx_offset)
-{
-    if(dx == nullptr || y == nullptr || dy == nullptr)
-    {
-        MIOPEN_THROW(miopenStatusBadParm, "Null pointer for tensor.");
-    }
-
-    if(yDesc != dyDesc)
-    {
-        MIOPEN_THROW(miopenStatusBadParm);
-    }
-
-    if(dxDesc.GetType() != dyDesc.GetType())
-    {
-        MIOPEN_THROW(miopenStatusBadParm, "Tensor types do not match.");
-    }
-
-    if(dxDesc.GetLengths() != dyDesc.GetLengths())
-    {
-        MIOPEN_THROW(miopenStatusBadParm, "Tensor dimension lengths do not match.");
-    }
-
-    if(miopen::CheckNumericsEnabled())
-    {
-        miopen::checkNumericsInput(handle, yDesc, y);
-    }
-
-    int n, c, h, w;
-    std::tie(n, c, h, w) = tien<4>(dxDesc.GetLengths());
-
-    int din_nstr, din_cstr, din_hstr;
-    std::tie(din_nstr, din_cstr, din_hstr, std::ignore) = tien<4>(dxDesc.GetStrides());
-
-    int dout_nstr, dout_cstr, dout_hstr;
-    std::tie(dout_nstr, dout_cstr, dout_hstr, std::ignore) = tien<4>(dyDesc.GetStrides());
-
-    int out_nstr, out_cstr, out_hstr;
-    std::tie(out_nstr, out_cstr, out_hstr, std::ignore) = tien<4>(yDesc.GetStrides());
-
-    // using workgroup size of 256 by default
-    int grid_size   = mode == MIOPEN_SOFTMAX_MODE_INSTANCE ? n : n * h * w;
-    int spatial_dim = mode == MIOPEN_SOFTMAX_MODE_INSTANCE ? 1 : h * w;
-    int vector_size = mode == MIOPEN_SOFTMAX_MODE_INSTANCE ? c * h * w : c;
-    // num_spatial_dims or pixels each workgroup can compute
-    int num_batch = vector_size < 256 ? nextPow2(256 / vector_size) : 1;
-
-    const std::vector<size_t> vld{256, 1, 1};
-
-    bool usefp16 = false;
-    bool usefp32 = true;
-    if(yDesc.GetType() == miopenHalf)
-    {
-        usefp16 = true;
-        usefp32 = false;
-    }
-
-    auto alpha_fp = *(static_cast<const float*>(alpha));
-    auto beta_fp  = *(static_cast<const float*>(beta));
-
-    // See Kernels/MIOpenSoftmax.cl for description
-    if(num_batch == 1)
-    { // CSR-Vector like approach
-
-        // Control the max. number of workgroups launched so that we do not
-        // start getting workgroup scheduling overheads
-        size_t workgroups = std::min(grid_size, 64 * 40 * 8);
-        const std::vector<size_t> vgd{workgroups * vld[0], 1, 1};
-
-        std::string algo_name = "SoftmaxBackwardOneBatch";
-        std::string network_config =
-            "sfmbwd-n" + std::to_string(num_batch) + "half" +
-            std::to_string(static_cast<int>(usefp16)) + "float" +
-            std::to_string(static_cast<int>(usefp32)) + "g" + std::to_string(vgd[0]) + "l" +
-            std::to_string(vld[0]) + "dim" + std::to_string(spatial_dim) + "grid" +
-            std::to_string(grid_size) + "wg" + std::to_string(workgroups) + "v" +
-            std::to_string(vector_size) + "ypk" +
-            std::to_string(static_cast<int>(yDesc.IsPacked())) + "dypk" +
-            std::to_string(static_cast<int>(dyDesc.IsPacked())) + "dxpk" +
-            std::to_string(static_cast<int>(dxDesc.IsPacked())) + "a" + std::to_string(alpha_fp) +
-            "b" + std::to_string(beta_fp) + "algo" + std::to_string(static_cast<int>(algorithm)) +
-            "mode" + std::to_string(static_cast<int>(mode));
-
-        auto&& kernels = handle.GetKernels(algo_name, network_config);
-
-        if(!kernels.empty())
-        {
-            kernels.front()(y,
-                            dy,
-                            dx,
-                            vector_size,
-                            grid_size,
-                            spatial_dim,
-                            h,
-                            w,
-                            out_nstr,
-                            out_cstr,
-                            out_hstr,
-                            dout_nstr,
-                            dout_cstr,
-                            dout_hstr,
-                            din_nstr,
-                            din_cstr,
-                            din_hstr,
-                            y_offset,
-                            dy_offset,
-                            dx_offset,
-                            alpha_fp,
-                            beta_fp);
-        }
-        else
-        {
-            std::string program_name = "MIOpenSoftmax.cl";
-            std::string kernel_name  = "SoftmaxBackward";
-            std::string parms        = "-DNUM_BATCH=" + std::to_string(num_batch) +
-                                " -DMIOPEN_USE_FP16=" + std::to_string(static_cast<int>(usefp16)) +
-                                " -DMIOPEN_USE_FP32=" + std::to_string(static_cast<int>(usefp32));
-
-            if(algorithm == MIOPEN_SOFTMAX_LOG)
-                parms += " -DUSE_SOFTMAX_LOG=1";
-            else if(algorithm == MIOPEN_SOFTMAX_FAST)
-                parms += " -DUSE_SOFTMAX_FAST=1";
-            else
-                parms += " -DUSE_SOFTMAX_ACCURATE=1";
-
-            if(mode == MIOPEN_SOFTMAX_MODE_INSTANCE)
-                parms += " -DUSE_SOFTMAX_MODE_INSTANCE=1";
-            else
-                parms += " -DUSE_SOFTMAX_MODE_CHANNEL=1";
-
-            parms += " -DRUN_FORWARD=0";
-            parms += " -DIS_OUTPUT_PACKED=" + std::to_string(static_cast<int>(yDesc.IsPacked())) +
-                     " -DIS_DOUTPUT_PACKED=" + std::to_string(static_cast<int>(dyDesc.IsPacked())) +
-                     " -DIS_DINPUT_PACKED=" + std::to_string(static_cast<int>(dxDesc.IsPacked()));
-
-            if(!float_equal(alpha_fp, 1.0))
-                parms += " -DUSE_ALPHA=1";
-
-            if(!float_equal(beta_fp, 0))
-                parms += " -DUSE_BETA=1";
-
-            handle.AddKernel(algo_name, network_config, program_name, kernel_name, vld, vgd, parms)(
-                y,
-                dy,
-                dx,
-                vector_size,
-                grid_size,
-                spatial_dim,
-                h,
-                w,
-                out_nstr,
-                out_cstr,
-                out_hstr,
-                dout_nstr,
-                dout_cstr,
-                dout_hstr,
-                din_nstr,
-                din_cstr,
-                din_hstr,
-                y_offset,
-                dy_offset,
-                dx_offset,
-                alpha_fp,
-                beta_fp);
-        }
-    }
-    else
-    { // CSR-Stream like approach
-        int batch_size   = 256 / num_batch;
-        int u_batch_size = (vector_size > batch_size) ? nextPow2(vector_size / batch_size) : 1;
-        size_t workgroups =
-            (grid_size % num_batch == 0) ? (grid_size / num_batch) : (grid_size / num_batch + 1);
-        const std::vector<size_t> vgd{workgroups * vld[0], 1, 1};
-
-        if((2 * u_batch_size + 1) * 256 > 65536 && yDesc.GetType() == miopenHalf)
-            MIOPEN_THROW(miopenStatusBadParm, "Exceed local memory capacity");
-
-        std::string algo_name = "SoftmaxBackwardMultiBatch";
-        std::string network_config =
-            "sfmbwd-n" + std::to_string(num_batch) + "half" +
-            std::to_string(static_cast<int>(usefp16)) + "float" +
-            std::to_string(static_cast<int>(usefp32)) + "g" + std::to_string(vgd[0]) + "l" +
-            std::to_string(vld[0]) + "dim" + std::to_string(spatial_dim) + "grid" +
-            std::to_string(grid_size) + "wg" + std::to_string(workgroups) + "v" +
-            std::to_string(vector_size) + "ubatch" + std::to_string(u_batch_size) + "batch" +
-            std::to_string(batch_size) + "ypk" +
-            std::to_string(static_cast<int>(yDesc.IsPacked())) + "dypk" +
-            std::to_string(static_cast<int>(dyDesc.IsPacked())) + "dxpk" +
-            std::to_string(static_cast<int>(dxDesc.IsPacked())) + "a" + std::to_string(alpha_fp) +
-            "b" + std::to_string(beta_fp) + "algo" + std::to_string(static_cast<int>(algorithm)) +
-            "mode" + std::to_string(static_cast<int>(mode));
-
-        auto&& kernels = handle.GetKernels(algo_name, network_config);
-
-        if(!kernels.empty())
-        {
-            kernels.front()(y,
-                            dy,
-                            dx,
-                            vector_size,
-                            grid_size,
-                            spatial_dim,
-                            h,
-                            w,
-                            out_nstr,
-                            out_cstr,
-                            out_hstr,
-                            dout_nstr,
-                            dout_cstr,
-                            dout_hstr,
-                            din_nstr,
-                            din_cstr,
-                            din_hstr,
-                            y_offset,
-                            dy_offset,
-                            dx_offset,
-                            alpha_fp,
-                            beta_fp);
-        }
-        else
-        {
-            std::string program_name = "MIOpenSoftmax.cl";
-            std::string kernel_name  = "SoftmaxBackward";
-            std::string parms        = "-DNUM_BATCH=" + std::to_string(num_batch) +
-                                " -DBATCH_SIZE=" + std::to_string(batch_size) +
-                                " -DU_BATCH_SIZE=" + std::to_string(u_batch_size) +
-                                " -DMIOPEN_USE_FP16=" + std::to_string(static_cast<int>(usefp16)) +
-                                " -DMIOPEN_USE_FP32=" + std::to_string(static_cast<int>(usefp32));
-
-            if(algorithm == MIOPEN_SOFTMAX_LOG)
-                parms += " -DUSE_SOFTMAX_LOG=1";
-            else if(algorithm == MIOPEN_SOFTMAX_FAST)
-                parms += " -DUSE_SOFTMAX_FAST=1";
-            else
-                parms += " -DUSE_SOFTMAX_ACCURATE=1";
-
-            if(mode == MIOPEN_SOFTMAX_MODE_INSTANCE)
-                parms += " -DUSE_SOFTMAX_MODE_INSTANCE=1";
-            else
-                parms += " -DUSE_SOFTMAX_MODE_CHANNEL=1";
-
-            parms += " -DRUN_FORWARD=0";
-            parms += " -DIS_OUTPUT_PACKED=" + std::to_string(static_cast<int>(yDesc.IsPacked())) +
-                     " -DIS_DOUTPUT_PACKED=" + std::to_string(static_cast<int>(dyDesc.IsPacked())) +
-                     " -DIS_DINPUT_PACKED=" + std::to_string(static_cast<int>(dxDesc.IsPacked()));
-
-            if(!float_equal(alpha_fp, 1.0))
-                parms += " -DUSE_ALPHA=1";
-
-            if(!float_equal(beta_fp, 0))
-                parms += " -DUSE_BETA=1";
-
-            handle.AddKernel(algo_name, network_config, program_name, kernel_name, vld, vgd, parms)(
-                y,
-                dy,
-                dx,
-                vector_size,
-                grid_size,
-                spatial_dim,
-                h,
-                w,
-                out_nstr,
-                out_cstr,
-                out_hstr,
-                dout_nstr,
-                dout_cstr,
-                dout_hstr,
-                din_nstr,
-                din_cstr,
-                din_hstr,
-                y_offset,
-                dy_offset,
-                dx_offset,
-                alpha_fp,
-                beta_fp);
-        }
-    }
-    if(miopen::CheckNumericsEnabled())
-    {
-        miopen::checkNumericsOutput(handle, dxDesc, dx);
-    }
-
-    return miopenStatusSuccess;
-}
-
-} // namespace miopen
diff --git a/src/softmax.cpp b/src/softmax.cpp
new file mode 100644
index 0000000000..6ac501ad71
--- /dev/null
+++ b/src/softmax.cpp
@@ -0,0 +1,107 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+#include <miopen/kernel_cache.hpp>
+#include <miopen/softmax.hpp>
+#include <miopen/float_equal.hpp>
+#include <miopen/check_numerics.hpp>
+#include <miopen/tensor.hpp>
+
+#include <miopen/softmax/invoke_params.hpp>
+#include <miopen/softmax/solvers.hpp>
+#include <miopen/find_solution.hpp>
+
+namespace miopen {
+
+miopenStatus_t SoftmaxForward(Handle& handle,
+                              const void* alpha,
+                              const void* beta,
+                              const TensorDescriptor& xDesc,
+                              ConstData_t x,
+                              const TensorDescriptor& yDesc,
+                              Data_t y,
+                              miopenSoftmaxAlgorithm_t algorithm,
+                              miopenSoftmaxMode_t mode,
+                              int x_offset,
+                              int y_offset)
+{
+    if(x == nullptr || y == nullptr)
+    {
+        MIOPEN_THROW(miopenStatusBadParm, "Null pointer for tensor.");
+    }
+
+    const auto problem = softmax::ProblemDescription{alpha, beta, xDesc, yDesc, algorithm, mode};
+    const auto invoke_params =
+        softmax::InvokeParams{alpha, beta, xDesc, x, yDesc, y, algorithm, mode, x_offset, y_offset};
+    const auto algo    = AlgorithmName{"Softmax"};
+    const auto solvers = solver::SolverContainer<solver::softmax::Softmax>{};
+    solvers.ExecutePrimitive(handle, problem, algo, invoke_params);
+
+    return miopenStatusSuccess;
+}
+
+miopenStatus_t SoftmaxBackward(Handle& handle,
+                               const void* alpha,
+                               const TensorDescriptor& yDesc,
+                               ConstData_t y,
+                               const TensorDescriptor& dyDesc,
+                               ConstData_t dy,
+                               const void* beta,
+                               const TensorDescriptor& dxDesc,
+                               Data_t dx,
+                               miopenSoftmaxAlgorithm_t algorithm,
+                               miopenSoftmaxMode_t mode,
+                               int y_offset,
+                               int dy_offset,
+                               int dx_offset)
+{
+    if(dx == nullptr || y == nullptr || dy == nullptr)
+    {
+        MIOPEN_THROW(miopenStatusBadParm, "Null pointer for tensor.");
+    }
+
+    const auto problem =
+        softmax::ProblemDescription{alpha, beta, yDesc, dyDesc, dxDesc, algorithm, mode};
+    const auto invoke_params = softmax::InvokeParams{alpha,
+                                                     beta,
+                                                     yDesc,
+                                                     y,
+                                                     dyDesc,
+                                                     dy,
+                                                     dxDesc,
+                                                     dx,
+                                                     algorithm,
+                                                     mode,
+                                                     y_offset,
+                                                     dy_offset,
+                                                     dx_offset};
+    const auto algo          = AlgorithmName{"Softmax"};
+    const auto solvers       = solver::SolverContainer<solver::softmax::Softmax>{};
+    solvers.ExecutePrimitive(handle, problem, algo, invoke_params);
+
+    return miopenStatusSuccess;
+}
+
+} // namespace miopen
diff --git a/src/softmax/problem_description.cpp b/src/softmax/problem_description.cpp
new file mode 100644
index 0000000000..e40a725617
--- /dev/null
+++ b/src/softmax/problem_description.cpp
@@ -0,0 +1,90 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+
+#include <miopen/softmax/problem_description.hpp>
+#include <miopen/names.hpp>
+
+#include <sstream>
+
+namespace miopen {
+
+namespace softmax {
+
+NetworkConfig ProblemDescription::MakeNetworkConfig() const
+{
+    std::string network_config = "sfmfwd-";
+
+    if(isForward)
+    {
+        int n_x, c_x, h_x, w_x;
+        int n_y, c_y, h_y, w_y;
+
+        std::tie(n_x, c_x, h_x, w_x) = tien<4>(xdxDesc.GetLengths());
+        std::tie(n_y, c_y, h_y, w_y) = tien<4>(yDesc.GetLengths());
+
+        network_config += "n_x" + std::to_string(n_x) + "c_x" + std::to_string(c_x) + "h_x" +
+                          std::to_string(h_x) + "w_x" + std::to_string(w_x) +
+
+                          "n_y" + std::to_string(n_y) + "c_y" + std::to_string(c_y) + "h_y" +
+                          std::to_string(h_y) + "w_y" + std::to_string(w_y);
+
+        network_config += "xpk" + std::to_string(static_cast<int>(xdxDesc.IsPacked())) + "ypk" +
+                          std::to_string(static_cast<int>(yDesc.IsPacked()));
+    }
+    else
+    {
+        int n_y, c_y, h_y, w_y;
+        int n_dy, c_dy, h_dy, w_dy;
+        int n_dx, c_dx, h_dx, w_dx;
+
+        std::tie(n_y, c_y, h_y, w_y)     = tien<4>(yDesc.GetLengths());
+        std::tie(n_dy, c_dy, h_dy, w_dy) = tien<4>(dyDesc.GetLengths());
+        std::tie(n_dx, c_dx, h_dx, w_dx) = tien<4>(xdxDesc.GetLengths());
+
+        network_config += "n_y" + std::to_string(n_y) + "c_y" + std::to_string(c_y) + "h_y" +
+                          std::to_string(h_y) + "w_y" + std::to_string(w_y) +
+
+                          "n_dy" + std::to_string(n_dy) + "c_dy" + std::to_string(c_dy) + "h_dy" +
+                          std::to_string(h_dy) + "w_dy" + std::to_string(w_dy) +
+
+                          "n_dx" + std::to_string(n_dx) + "c_dx" + std::to_string(c_dx) + "h_dx" +
+                          std::to_string(h_dx) + "w_dx" + std::to_string(w_dx);
+
+        network_config += "ypk" + std::to_string(static_cast<int>(yDesc.IsPacked())) + "dypk" +
+                          std::to_string(static_cast<int>(dyDesc.IsPacked())) + "dxpk" +
+                          std::to_string(static_cast<int>(xdxDesc.IsPacked()));
+    }
+
+    network_config += "a" + std::to_string(alpha) + "b" + std::to_string(beta) + "algo" +
+                      std::to_string(static_cast<int>(algorithm)) + "mode" +
+                      std::to_string(static_cast<int>(mode));
+
+    return NetworkConfig{network_config};
+}
+
+} // namespace softmax
+
+} // namespace miopen
diff --git a/src/solver/softmax/softmax.cpp b/src/solver/softmax/softmax.cpp
new file mode 100644
index 0000000000..92b5d35548
--- /dev/null
+++ b/src/solver/softmax/softmax.cpp
@@ -0,0 +1,341 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+
+#include <miopen/softmax/solvers.hpp>
+
+#include <miopen/softmax/invoke_params.hpp>
+#include <miopen/datatype.hpp>
+#include <miopen/softmax.hpp>
+#include <miopen/kernel_build_params.hpp>
+#include <miopen/target_properties.hpp>
+#include <miopen/float_equal.hpp>
+
+namespace miopen {
+
+namespace solver {
+
+namespace softmax {
+
+int nextPow2(int v)
+{
+    if(v == 1)
+    {
+        return (v << 1);
+    }
+    else
+    {
+        v--;
+        v |= v >> 1;
+        v |= v >> 2;
+        v |= v >> 4;
+        v |= v >> 8;
+        v |= v >> 16;
+        v++;
+        return v;
+    }
+}
+
+void getParams(const TensorDescriptor& in_desc,
+               miopenSoftmaxMode_t in_mode,
+               int& out_n,
+               int& out_c,
+               int& out_h,
+               int& out_w,
+               int& out_grid_size,
+               int& out_spatial_dim,
+               int& out_vector_size,
+               int& out_num_batch,
+               bool& out_usefp16,
+               bool& out_usefp32,
+               std::vector<size_t>& out_vld,
+               std::vector<size_t>& out_vgd,
+               size_t& out_workgroups,
+               int& out_batch_size,
+               int& out_u_batch_size)
+{
+    std::tie(out_n, out_c, out_h, out_w) = tien<4>(in_desc.GetLengths());
+
+    // using workgroup size of 256 by default
+    out_grid_size   = in_mode == MIOPEN_SOFTMAX_MODE_INSTANCE ? out_n : out_n * out_h * out_w;
+    out_spatial_dim = in_mode == MIOPEN_SOFTMAX_MODE_INSTANCE ? 1 : out_h * out_w;
+    out_vector_size = in_mode == MIOPEN_SOFTMAX_MODE_INSTANCE ? out_c * out_h * out_w : out_c;
+    // num_spatial_dims or pixels each workgroup can compute
+
+    /// \todo Magic numbers
+    out_num_batch = out_vector_size < 256 ? nextPow2(256 / out_vector_size) : 1;
+
+    out_vld = {256, 1, 1};
+
+    out_usefp16 = false;
+    out_usefp32 = true;
+    if(in_desc.GetType() == miopenHalf)
+    {
+        out_usefp16 = true;
+        out_usefp32 = false;
+    }
+
+    if(out_num_batch == 1)
+    {
+        out_workgroups = std::min(out_grid_size, 64 * 40 * 8);
+        out_vgd        = {out_workgroups * out_vld[0], 1, 1};
+
+        out_batch_size   = 0;
+        out_u_batch_size = 0;
+    }
+    else
+    {
+        out_batch_size = 256 / out_num_batch;
+        // num_channels each threads iterates over to cover all the channels
+        out_u_batch_size =
+            (out_vector_size > out_batch_size) ? nextPow2(out_vector_size / out_batch_size) : 1;
+
+        out_workgroups = (out_grid_size % out_num_batch == 0) ? (out_grid_size / out_num_batch)
+                                                              : (out_grid_size / out_num_batch + 1);
+        out_vgd        = {out_workgroups * out_vld[0], 1, 1};
+    }
+}
+
+bool Softmax::IsApplicable(
+    [[maybe_unused]] const ExecutionContext& context,
+    [[maybe_unused]] const miopen::softmax::ProblemDescription& problem) const
+{
+    return true;
+}
+
+std::size_t
+Softmax::GetWorkspaceSize([[maybe_unused]] const ExecutionContext& context,
+                          [[maybe_unused]] const miopen::softmax::ProblemDescription& problem) const
+{
+    return 0;
+}
+
+ConvSolution Softmax::GetSolution([[maybe_unused]] const ExecutionContext& context,
+                                  const miopen::softmax::ProblemDescription& problem) const
+{
+    auto result = ConvSolution{miopenStatusSuccess};
+
+    auto xDesc = problem.GetXDesc();
+    auto yDesc = problem.GetYDesc();
+
+    auto dxDesc = problem.GetdXDesc();
+    auto dyDesc = problem.GetdYDesc();
+
+    auto alpha     = problem.GetAlpha();
+    auto beta      = problem.GetBeta();
+    auto mode      = problem.GetMode();
+    auto algorithm = problem.GetAlgorithm();
+
+    bool isForward = problem.IsForward();
+
+    int n, c, h, w;
+    // using workgroup size of 256 by default
+    int grid_size, spatial_dim, vector_size, num_batch;
+
+    std::vector<size_t> vld;
+    std::vector<size_t> vgd;
+
+    bool usefp16, usefp32;
+
+    size_t workgroups;
+    int batch_size;
+    int u_batch_size;
+
+    getParams(yDesc,
+              mode,
+              n,
+              c,
+              h,
+              w,
+              grid_size,
+              spatial_dim,
+              vector_size,
+              num_batch,
+              usefp16,
+              usefp32,
+              vld,
+              vgd,
+              workgroups,
+              batch_size,
+              u_batch_size);
+
+    if(num_batch > 1)
+    {
+        if(isForward)
+        {
+            /// \todo Magic numbers
+            if((u_batch_size + 1) * 256 > 65536 && yDesc.GetType() == miopenHalf)
+                MIOPEN_THROW(miopenStatusBadParm, "Exceed local memory capacity");
+        }
+        else
+        {
+            /// \todo Magic numbers
+            if((2 * u_batch_size + 1) * 256 > 65536 && yDesc.GetType() == miopenHalf)
+                MIOPEN_THROW(miopenStatusBadParm, "Exceed local memory capacity");
+        }
+    }
+
+    KernelBuildParameters build_params = KernelBuildParameters{{"NUM_BATCH", num_batch}};
+
+    if(num_batch > 1)
+    {
+        build_params.Define("BATCH_SIZE", batch_size);
+        build_params.Define("U_BATCH_SIZE", u_batch_size);
+    }
+
+    build_params.Define("MIOPEN_USE_FP16", static_cast<int>(usefp16));
+    build_params.Define("MIOPEN_USE_FP32", static_cast<int>(usefp32));
+
+    if(algorithm == MIOPEN_SOFTMAX_LOG)
+        build_params.Define("USE_SOFTMAX_LOG", 1);
+    else if(algorithm == MIOPEN_SOFTMAX_FAST)
+        build_params.Define("USE_SOFTMAX_FAST", 1);
+    else
+        build_params.Define("USE_SOFTMAX_ACCURATE", 1);
+
+    if(mode == MIOPEN_SOFTMAX_MODE_INSTANCE)
+        build_params.Define("USE_SOFTMAX_MODE_INSTANCE", 1);
+    else
+        build_params.Define("USE_SOFTMAX_MODE_CHANNEL", 1);
+
+    build_params.Define("RUN_FORWARD", isForward ? 1 : 0);
+
+    if(isForward)
+    {
+        build_params.Define("IS_INPUT_PACKED", static_cast<int>(xDesc.IsPacked()));
+        build_params.Define("IS_OUTPUT_PACKED", static_cast<int>(yDesc.IsPacked()));
+    }
+    else
+    {
+        build_params.Define("IS_OUTPUT_PACKED", static_cast<int>(yDesc.IsPacked()));
+        build_params.Define("IS_DOUTPUT_PACKED", static_cast<int>(dyDesc.IsPacked()));
+        build_params.Define("IS_DINPUT_PACKED", static_cast<int>(dxDesc.IsPacked()));
+    }
+
+    if(!float_equal(alpha, 1.0))
+        build_params.Define("USE_ALPHA", 1);
+
+    if(!float_equal(beta, 0))
+        build_params.Define("USE_BETA", 1);
+
+    auto kernel = KernelInfo{};
+
+    kernel.comp_options = build_params.GenerateFor(kbp::OpenCL{});
+
+    kernel.kernel_file = "MIOpenSoftmax.cl";
+
+    kernel.kernel_name = isForward ? "SoftmaxForward" : "SoftmaxBackward";
+
+    for(unsigned int i = 0; i < 2; ++i)
+    {
+        kernel.l_wk.push_back(vld[i]);
+        kernel.g_wk.push_back(vgd[i]);
+    }
+
+    if(isForward)
+    {
+        int in_nstr, in_cstr, in_hstr;
+        std::tie(in_nstr, in_cstr, in_hstr, std::ignore) = tien<4>(xDesc.GetStrides());
+
+        int out_nstr, out_cstr, out_hstr;
+        std::tie(out_nstr, out_cstr, out_hstr, std::ignore) = tien<4>(yDesc.GetStrides());
+
+        result.invoker_factory = [=](const std::vector<Kernel>& kernels) {
+            return [=](const Handle& handle_, const AnyInvokeParams& raw_params) {
+                decltype(auto) kernel = handle_.Run(kernels.front());
+                decltype(auto) params = raw_params.CastTo<miopen::softmax::InvokeParams>();
+
+                kernel(params.x,
+                       params.forward_y,
+                       vector_size,
+                       grid_size,
+                       spatial_dim,
+                       h,
+                       w,
+                       in_nstr,
+                       in_cstr,
+                       in_hstr,
+                       out_nstr,
+                       out_cstr,
+                       out_hstr,
+                       params.xdx_offset,
+                       params.y_offset,
+                       alpha,
+                       beta);
+            };
+        };
+    }
+    else
+    {
+        int din_nstr, din_cstr, din_hstr;
+        std::tie(din_nstr, din_cstr, din_hstr, std::ignore) = tien<4>(dxDesc.GetStrides());
+
+        int dout_nstr, dout_cstr, dout_hstr;
+        std::tie(dout_nstr, dout_cstr, dout_hstr, std::ignore) = tien<4>(dyDesc.GetStrides());
+
+        int out_nstr, out_cstr, out_hstr;
+        std::tie(out_nstr, out_cstr, out_hstr, std::ignore) = tien<4>(yDesc.GetStrides());
+
+        result.invoker_factory = [=](const std::vector<Kernel>& kernels) {
+            return [=](const Handle& handle_, const AnyInvokeParams& raw_params) {
+                decltype(auto) kernel = handle_.Run(kernels.front());
+                decltype(auto) params = raw_params.CastTo<miopen::softmax::InvokeParams>();
+
+                kernel(params.backward_y,
+                       params.dy,
+                       params.dx,
+                       vector_size,
+                       grid_size,
+                       spatial_dim,
+                       h,
+                       w,
+                       out_nstr,
+                       out_cstr,
+                       out_hstr,
+                       dout_nstr,
+                       dout_cstr,
+                       dout_hstr,
+                       din_nstr,
+                       din_cstr,
+                       din_hstr,
+                       params.y_offset,
+                       params.dy_offset,
+                       params.xdx_offset,
+                       alpha,
+                       beta);
+            };
+        };
+    }
+
+    result.construction_params.push_back(kernel);
+
+    return result;
+}
+
+} // namespace softmax
+
+} // namespace solver
+
+} // namespace miopen