diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 91fc16e54c88f..53c6b295fc5e1 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -7022,10 +7022,11 @@ static Function *emitTargetTaskProxyFunction(OpenMPIRBuilder &OMPBuilder,
   Function *KernelLaunchFunction = StaleCI->getCalledFunction();
 
   // StaleCI is the CallInst which is the call to the outlined
-  // target kernel launch function. If there are values that the
-  // outlined function uses then these are aggregated into a structure
-  // which is passed as the second argument. If not, then there's
-  // only one argument, the threadID. So, StaleCI can be
+  // target kernel launch function. If there are local live-in values
+  // that the outlined function uses then these are aggregated into a structure
+  // which is passed as the second argument. If there are no local live-in
+  // values or if all values used by the outlined kernel are global variables,
+  // then there's only one argument, the threadID. So, StaleCI can be
   //
   // %structArg = alloca { ptr, ptr }, align 8
   // %gep_ = getelementptr { ptr, ptr }, ptr %structArg, i32 0, i32 0
@@ -7063,6 +7064,8 @@ static Function *emitTargetTaskProxyFunction(OpenMPIRBuilder &OMPBuilder,
   // host and device.
   assert((!HasShareds || (StaleCI->arg_size() == 2)) &&
          "StaleCI with shareds should have exactly two arguments.");
+
+  Value *ThreadId = ProxyFn->getArg(0);
   if (HasShareds) {
     auto *ArgStructAlloca = dyn_cast<AllocaInst>(StaleCI->getArgOperand(1));
     assert(ArgStructAlloca &&
@@ -7073,7 +7076,6 @@ static Function *emitTargetTaskProxyFunction(OpenMPIRBuilder &OMPBuilder,
     AllocaInst *NewArgStructAlloca =
         Builder.CreateAlloca(ArgStructType, nullptr, "structArg");
     Value *TaskT = ProxyFn->getArg(1);
-    Value *ThreadId = ProxyFn->getArg(0);
     Value *SharedsSize =
         Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
 
@@ -7086,7 +7088,10 @@ static Function *emitTargetTaskProxyFunction(OpenMPIRBuilder &OMPBuilder,
         LoadShared->getPointerAlignment(M.getDataLayout()), SharedsSize);
 
     Builder.CreateCall(KernelLaunchFunction, {ThreadId, NewArgStructAlloca});
+  } else {
+    Builder.CreateCall(KernelLaunchFunction, {ThreadId});
   }
+
   Builder.CreateRetVoid();
   return ProxyFn;
 }
@@ -7229,11 +7234,23 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitTargetTask(
       Builder, AllocaIP, ToBeDeleted, TargetTaskAllocaIP, "global.tid", false));
 
   Builder.restoreIP(TargetTaskBodyIP);
-
   if (Error Err = TaskBodyCB(DeviceID, RTLoc, TargetTaskAllocaIP))
     return Err;
 
-  OI.ExitBB = Builder.saveIP().getBlock();
+  // The outliner (CodeExtractor) extract a sequence or vector of blocks that
+  // it is given. These blocks are enumerated by
+  // OpenMPIRBuilder::OutlineInfo::collectBlocks which expects the OI.ExitBlock
+  // to be outside the region. In other words, OI.ExitBlock is expected to be
+  // the start of the region after the outlining. We used to set OI.ExitBlock
+  // to the InsertBlock after TaskBodyCB is done. This is fine in most cases
+  // except when the task body is a single basic block. In that case,
+  // OI.ExitBlock is set to the single task body block and will get left out of
+  // the outlining process. So, simply create a new empty block to which we
+  // uncoditionally branch from where TaskBodyCB left off
+  OI.ExitBB = BasicBlock::Create(Builder.getContext(), "target.task.cont");
+  emitBlock(OI.ExitBB, Builder.GetInsertBlock()->getParent(),
+            /*IsFinished=*/true);
+
   OI.PostOutlineCB = [this, ToBeDeleted, Dependencies, HasNoWait,
                       DeviceID](Function &OutlinedFn) mutable {
     assert(OutlinedFn.getNumUses() == 1 &&
diff --git a/mlir/test/Target/LLVMIR/omptarget-depend-host-only.mlir b/mlir/test/Target/LLVMIR/omptarget-depend-host-only.mlir
index 621a206e18053..ece32bb5419c6 100644
--- a/mlir/test/Target/LLVMIR/omptarget-depend-host-only.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-depend-host-only.mlir
@@ -25,8 +25,29 @@ module attributes {omp.is_target_device = false} {
 // CHECK: define void @omp_target_depend_()
 // CHECK-NOT: define {{.*}} @
 // CHECK-NOT: call i32 @__tgt_target_kernel({{.*}})
+// CHECK:  call void @__kmpc_omp_task_begin_if0
+// CHECK-NEXT: call void @.omp_target_task_proxy_func
+// CHECK: call void @__kmpc_omp_task_complete_if0
+// https://github.com/llvm/llvm-project/issues/126949 exposes two issues
+// 1. Empty target task proxy functions
+// 2. When 1 fixed, it leads to a second problem of calling the omp target kernel twice
+//    Once via the target task proxy function and a second time after the target task is done.
+// The following checks check problem #2.
+// functions. The following checks tests the fix for this issue.
+// CHECK-NEXT:  br label %[[BLOCK_AFTER_OUTLINED_TARGET_TASK_BODY:.*]]
+// CHECK:[[BLOCK_AFTER_OUTLINED_TARGET_TASK_BODY]]:
+// CHECK-NEXT:  ret void
+
+// CHECK: define internal void @omp_target_depend_..omp_par
 // CHECK: call void @__omp_offloading_[[DEV:.*]]_[[FIL:.*]]_omp_target_depend__l[[LINE:.*]](ptr {{.*}})
+// CHECK-NEXT: br label %[[BLOCK_AFTER_TARGET_TASK_BODY:.*]]
+// CHECK: [[BLOCK_AFTER_TARGET_TASK_BODY]]:
 // CHECK-NEXT: ret void
 
+
 // CHECK: define internal void @__omp_offloading_[[DEV]]_[[FIL]]_omp_target_depend__l[[LINE]](ptr %[[ADDR_A:.*]])
 // CHECK: store i32 100, ptr %[[ADDR_A]], align 4
+
+// The following check test for the fix of problem #1 as described in https://github.com/llvm/llvm-project/issues/126949
+// CHECK: define internal void @.omp_target_task_proxy_func
+// CHECK: call void @omp_target_depend_..omp_par
diff --git a/mlir/test/Target/LLVMIR/omptarget-nowait-host-only.mlir b/mlir/test/Target/LLVMIR/omptarget-nowait-host-only.mlir
index 6b634226a3568..94d8d052d087e 100644
--- a/mlir/test/Target/LLVMIR/omptarget-nowait-host-only.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-nowait-host-only.mlir
@@ -20,10 +20,30 @@ module attributes {omp.is_target_device = false} {
 // CHECK: define void @omp_target_nowait_()
 // CHECK-NOT: define {{.*}} @
 // CHECK-NOT: call ptr @__kmpc_omp_target_task_alloc({{.*}})
+// CHECK:  call void @__kmpc_omp_task_begin_if0
+// CHECK-NEXT: call void @.omp_target_task_proxy_func
+// CHECK: call void @__kmpc_omp_task_complete_if0
+// https://github.com/llvm/llvm-project/issues/126949 exposes two issues
+// 1. Empty target task proxy functions
+// 2. When 1 fixed, it leads to a second problem of calling the omp target kernel twice
+//    Once via the target task proxy function and a second time after the target task is done.
+// The following checks check problem #2.
+// functions. The following checks tests the fix for this issue.
+// CHECK-NEXT:  br label %[[BLOCK_AFTER_OUTLINED_TARGET_TASK_BODY:.*]]
+// CHECK:[[BLOCK_AFTER_OUTLINED_TARGET_TASK_BODY]]:
+// CHECK-NEXT:  ret void
+
 // Verify that we directly emit a call to the "target" region's body from the
 // parent function of the the `omp.target` op.
+// CHECK: define internal void @omp_target_nowait_..omp_par
 // CHECK: call void @__omp_offloading_[[DEV:.*]]_[[FIL:.*]]_omp_target_nowait__l[[LINE:.*]](ptr {{.*}})
+// CHECK-NEXT: br label %[[BLOCK_AFTER_TARGET_TASK_BODY:.*]]
+// CHECK: [[BLOCK_AFTER_TARGET_TASK_BODY]]:
 // CHECK-NEXT: ret void
 
 // CHECK: define internal void @__omp_offloading_[[DEV]]_[[FIL]]_omp_target_nowait__l[[LINE]](ptr %[[ADDR_X:.*]])
 // CHECK: store float 5{{.*}}, ptr %[[ADDR_X]], align 4
+
+// The following check test for the fix of problem #1 as described in https://github.com/llvm/llvm-project/issues/126949
+// CHECK: define internal void @.omp_target_task_proxy_func
+// CHECK: call void @omp_target_nowait_..omp_par