From 1ce0284af1564808e4d08781c996802442f0a527 Mon Sep 17 00:00:00 2001
From: Siddhant Jain <siddhantjainofficial26@gmail.com>
Date: Sun, 22 Feb 2026 15:51:27 +0530
Subject: [PATCH 1/8] refactor(tests): consolidate KServe test infrastructure
 into two-file pattern

Merge scattered KServe test scripts into the canonical *_install.sh +
*_test.sh pattern matching all other components. Inline utils.py and
iris_input.json into kserve_sklearn_test.py, removing the tests/kserve/
subfolder entirely.

Key changes:
- Consolidate 3 test scripts into kserve_test.sh (6 tests)
- Add pathTemplate ConfigMap patch in kustomization.yaml
- Use source.namespaces mTLS AuthorizationPolicy
- Both path-based and host-based routing tested via curl
- Python SDK prediction test runs independently via pytest
- Remove accidental build artifacts (kubeflow-all.yaml)

Signed-off-by: Siddhant Jain <siddhantjainofficial26@gmail.com>
---
 .../full_kubeflow_integration_test.yaml       |   3 -
 .../kserve_models_web_application_test.yaml   |  10 +-
 .github/workflows/kserve_test.yaml            |   2 +-
 applications/kserve/kserve/kustomization.yaml |  30 +-
 tests/kserve/data/iris_input.json             |   6 -
 tests/kserve/kserve-external-access.yaml      |  38 ---
 tests/kserve/test_sklearn.py                  |  60 ----
 tests/kserve/utils.py                         | 125 ---------
 tests/kserve_complete_authentication_test.sh  | 127 ---------
 tests/kserve_models_web_application_test.sh   |  58 ----
 tests/kserve_setup_external_access.sh         |  58 ----
 tests/kserve_sklearn_test.py                  | 259 ++++++++++++++++++
 tests/kserve_test.sh                          | 184 ++++++++++---
 tests/kserve_test.yaml                        |  16 --
 tests/{kserve => }/requirements.txt           |   2 +-
 15 files changed, 444 insertions(+), 534 deletions(-)
 delete mode 100644 tests/kserve/data/iris_input.json
 delete mode 100644 tests/kserve/kserve-external-access.yaml
 delete mode 100644 tests/kserve/test_sklearn.py
 delete mode 100644 tests/kserve/utils.py
 delete mode 100755 tests/kserve_complete_authentication_test.sh
 delete mode 100755 tests/kserve_models_web_application_test.sh
 delete mode 100755 tests/kserve_setup_external_access.sh
 create mode 100644 tests/kserve_sklearn_test.py
 delete mode 100644 tests/kserve_test.yaml
 rename tests/{kserve => }/requirements.txt (77%)

diff --git a/.github/workflows/full_kubeflow_integration_test.yaml b/.github/workflows/full_kubeflow_integration_test.yaml
index a2351b7c37..f3a6a82c23 100644
--- a/.github/workflows/full_kubeflow_integration_test.yaml
+++ b/.github/workflows/full_kubeflow_integration_test.yaml
@@ -200,9 +200,6 @@ jobs:
     - name: Run KServe Test
       run: ./tests/kserve_test.sh ${KF_PROFILE}
 
-    - name: Test KServe Models Web Application API
-      run: ./tests/kserve_models_web_application_test.sh "${KF_PROFILE}"
-
     - name: Run Spark Test
       run: chmod u+x tests/*.sh && ./tests/spark_test.sh "${KF_PROFILE}"
 
diff --git a/.github/workflows/kserve_models_web_application_test.yaml b/.github/workflows/kserve_models_web_application_test.yaml
index 7a1d426c8f..1be3e3368a 100644
--- a/.github/workflows/kserve_models_web_application_test.yaml
+++ b/.github/workflows/kserve_models_web_application_test.yaml
@@ -6,6 +6,7 @@ on:
     - .github/workflows/kserve_models_web_application_test.yaml
     - applications/kserve/**
     - tests/kserve*
+
     - tests/istio*
     - common/istio*/**
     - common/knative/**
@@ -59,6 +60,11 @@ jobs:
     - name: Create KF Profile
       run: ./tests/kubeflow_profile_install.sh
 
+    - name: Setup python 3.12
+      uses: actions/setup-python@v4
+      with:
+        python-version: 3.12
+
     - name: Wait for All Pods to be Ready
       run: |
         kubectl wait --for=condition=Ready pods --all --all-namespaces --timeout=300s --field-selector=status.phase!=Succeeded
@@ -69,5 +75,5 @@ jobs:
     - name: Port-forward the istio-ingress gateway
       run: ./tests/port_forward_gateway.sh
 
-    - name: Test KServe Models Web Application API
-      run: ./tests/kserve_models_web_application_test.sh "${KF_PROFILE}"
+    - name: Run KServe tests
+      run: ./tests/kserve_test.sh "${KF_PROFILE}"
diff --git a/.github/workflows/kserve_test.yaml b/.github/workflows/kserve_test.yaml
index b615114a27..7f53a9e26e 100644
--- a/.github/workflows/kserve_test.yaml
+++ b/.github/workflows/kserve_test.yaml
@@ -6,7 +6,7 @@ on:
     - .github/workflows/kserve_test.yaml
     - applications/kserve/**
     - apps/kserve/**
-    - tests/kserve/**
+    - tests/kserve_*
     - tests/kserve_test.sh
     - tests/kserve_install.sh
     - common/istio*/**
diff --git a/applications/kserve/kserve/kustomization.yaml b/applications/kserve/kserve/kustomization.yaml
index b926a3f89f..0215392727 100644
--- a/applications/kserve/kserve/kustomization.yaml
+++ b/applications/kserve/kserve/kustomization.yaml
@@ -59,10 +59,38 @@ patches:
     kind: LLMInferenceServiceConfig
 
 # Delete the ValidatingWebhookConfiguration for LLM resources
-# Webhook server (llmisvc-webhook-server-service) is not running â†’ EOF errors
+# Webhook server (llmisvc-webhook-server-service) is not running — EOF errors
 - patch: |
     apiVersion: admissionregistration.k8s.io/v1
     kind: ValidatingWebhookConfiguration
     metadata:
       name: llminferenceserviceconfig.serving.kserve.io
     $patch: delete
+
+# Enable path-based routing via pathTemplate on the inferenceservice-config ConfigMap.
+# This allows KServe to auto-generate VirtualServices for path-based URLs
+# (/serving/<namespace>/<name>/...), eliminating manual VirtualService creation.
+# Ref: https://github.com/kserve/kserve/issues/2257
+# Ref: https://github.com/kserve/kserve/blob/master/config/configmap/inferenceservice.yaml#L389
+- patch: |
+    apiVersion: v1
+    kind: ConfigMap
+    metadata:
+      name: inferenceservice-config
+      namespace: kubeflow
+    data:
+      ingress: |-
+        {
+          "enableGatewayApi": false,
+          "kserveIngressGateway": "kserve/kserve-ingress-gateway",
+          "ingressGateway": "kubeflow/kubeflow-gateway",
+          "localGateway": "knative-serving/knative-local-gateway",
+          "localGatewayService": "knative-local-gateway.istio-system.svc.cluster.local",
+          "ingressDomain": "example.com",
+          "ingressClassName": "istio",
+          "domainTemplate": "{{ .Name }}-{{ .Namespace }}.{{ .IngressDomain }}",
+          "urlScheme": "http",
+          "disableIstioVirtualHost": false,
+          "disableIngressCreation": false,
+          "pathTemplate": "/serving/{{ .Namespace }}/{{ .Name }}"
+        }
diff --git a/tests/kserve/data/iris_input.json b/tests/kserve/data/iris_input.json
deleted file mode 100644
index 77839728a0..0000000000
--- a/tests/kserve/data/iris_input.json
+++ /dev/null
@@ -1,6 +0,0 @@
-{
-  "instances": [
-    [6.8,  2.8,  4.8,  1.4],
-    [6.0,  3.4,  4.5,  1.6]
-  ]
-}
diff --git a/tests/kserve/kserve-external-access.yaml b/tests/kserve/kserve-external-access.yaml
deleted file mode 100644
index 975c4baa07..0000000000
--- a/tests/kserve/kserve-external-access.yaml
+++ /dev/null
@@ -1,38 +0,0 @@
-# KServe External Access Configuration
-# This enables secure external access to KServe InferenceServices
-
-apiVersion: networking.istio.io/v1beta1
-kind: VirtualService
-metadata:
-  name: kserve-external-access
-  namespace: REPLACE_WITH_NAMESPACE  # e.g., kubeflow-user-example-com
-spec:
-  gateways:
-  - kubeflow/kubeflow-gateway  # External gateway
-  hosts:
-  - '*'
-  http:
-  # Path-based routing for KServe models
-  - match:
-    - uri:
-        prefix: /kserve/REPLACE_WITH_NAMESPACE/
-    rewrite:
-      uri: /
-    route:
-    - destination:
-        # Route through cluster-local-gateway (secured with JWT)
-        host: cluster-local-gateway.istio-system.svc.cluster.local
-      headers:
-        request:
-          set:
-            # Set the correct host header for the target service
-            Host: REPLACE_WITH_SERVICE_NAME.REPLACE_WITH_NAMESPACE.svc.cluster.local
-      weight: 100
-    timeout: 300s
-    # Optional: Add CORS headers for browser access
-    headers:
-      response:
-        add:
-          Access-Control-Allow-Origin: "*"
-          Access-Control-Allow-Methods: "GET, POST, OPTIONS"
-          Access-Control-Allow-Headers: "Authorization, Content-Type"
diff --git a/tests/kserve/test_sklearn.py b/tests/kserve/test_sklearn.py
deleted file mode 100644
index e72733d0ee..0000000000
--- a/tests/kserve/test_sklearn.py
+++ /dev/null
@@ -1,60 +0,0 @@
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-
-from kubernetes import client
-from kubernetes.client import V1ResourceRequirements
-
-from kserve import (
-    constants,
-    KServeClient,
-    V1beta1InferenceService,
-    V1beta1InferenceServiceSpec,
-    V1beta1PredictorSpec,
-    V1beta1SKLearnSpec,
-)
-from utils import KSERVE_TEST_NAMESPACE
-from utils import predict
-
-
-def test_sklearn_kserve():
-    service_name = "isvc-sklearn"
-    predictor = V1beta1PredictorSpec(
-        min_replicas=1,
-        sklearn=V1beta1SKLearnSpec(
-            storage_uri="gs://kfserving-examples/models/sklearn/1.0/model",
-            resources=V1ResourceRequirements(
-                requests={"cpu": "50m", "memory": "128Mi"},
-                limits={"cpu": "100m", "memory": "256Mi"},
-            ),
-        ),
-    )
-
-    isvc = V1beta1InferenceService(
-        api_version=constants.KSERVE_V1BETA1,
-        kind="InferenceService",
-        metadata=client.V1ObjectMeta(
-            name=service_name, namespace=KSERVE_TEST_NAMESPACE
-        ),
-        spec=V1beta1InferenceServiceSpec(predictor=predictor),
-    )
-
-    kserve_client = KServeClient(
-        config_file=os.environ.get("KUBECONFIG", "~/.kube/config")
-    )
-    kserve_client.create(isvc)
-    kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE)
-    res = predict(service_name, "./data/iris_input.json")
-    assert res["predictions"] == [1, 1]
-    kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
diff --git a/tests/kserve/utils.py b/tests/kserve/utils.py
deleted file mode 100644
index 36091a628a..0000000000
--- a/tests/kserve/utils.py
+++ /dev/null
@@ -1,125 +0,0 @@
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import json
-import logging
-import os
-import time
-from urllib.parse import urlparse
-
-import requests
-from kubernetes import client
-
-from kserve import KServeClient
-from kserve import constants
-
-logging.basicConfig(level=logging.INFO)
-
-KSERVE_NAMESPACE = "kserve"
-KSERVE_TEST_NAMESPACE = "kubeflow-user-example-com"
-MODEL_CLASS_NAME = "modelClass"
-
-
-class M2mTokenNotAvailable(Exception):
-    pass
-
-
-def get_cluster_ip(name="istio-ingressgateway", namespace="istio-system"):
-    api_instance = client.CoreV1Api(client.ApiClient())
-    service = api_instance.read_namespaced_service(name, namespace)
-    if service.status.load_balancer.ingress is None:
-        cluster_ip = service.spec.cluster_ip
-    else:
-        if service.status.load_balancer.ingress[0].hostname:
-            cluster_ip = service.status.load_balancer.ingress[0].hostname
-        else:
-            cluster_ip = service.status.load_balancer.ingress[0].ip
-    return os.environ.get("KSERVE_INGRESS_HOST_PORT", cluster_ip)
-
-
-def get_m2m_auth_token(env_name="KSERVE_M2M_TOKEN"):
-    try:
-        return os.environ[env_name]
-    except KeyError:
-        raise M2mTokenNotAvailable(env_name)
-
-
-def predict(
-    service_name,
-    input_json,
-    protocol_version="v1",
-    version=constants.KSERVE_V1BETA1_VERSION,
-    model_name=None,
-):
-    with open(input_json) as json_file:
-        data = json.load(json_file)
-
-        return predict_str(
-            service_name=service_name,
-            input_json=json.dumps(data),
-            protocol_version=protocol_version,
-            version=version,
-            model_name=model_name,
-        )
-
-
-def predict_str(
-    service_name,
-    input_json,
-    protocol_version="v1",
-    version=constants.KSERVE_V1BETA1_VERSION,
-    model_name=None,
-):
-    kfs_client = KServeClient(
-        config_file=os.environ.get("KUBECONFIG", "~/.kube/config")
-    )
-    isvc = kfs_client.get(
-        service_name,
-        namespace=KSERVE_TEST_NAMESPACE,
-        version=version,
-    )
-    # temporary sleep until this is fixed https://github.com/kserve/kserve/issues/604
-    time.sleep(10)
-    cluster_ip = get_cluster_ip()
-    host = f"{service_name}.{KSERVE_TEST_NAMESPACE}.example.com"
-    headers = {
-        "Host": host,
-        "Content-Type": "application/json",
-    }
-
-    try:
-        token = get_m2m_auth_token()
-        headers.update({"Authorization": f"Bearer {token}"})
-        logging.info("M2M Token Found.")
-    except M2mTokenNotAvailable:
-        logging.warning("M2M Token Not found, client authentication disabled.")
-
-    if model_name is None:
-        model_name = service_name
-
-    url = f"http://{cluster_ip}/v1/models/{model_name}:predict"
-    if protocol_version == "v2":
-        url = f"http://{cluster_ip}/v2/models/{model_name}/infer"
-
-    logging.info("Sending Header = %s", headers)
-    logging.info("Sending url = %s", url)
-    logging.info("Sending request data: %s", input_json)
-    response = requests.post(url, input_json, headers=headers)
-    logging.info(
-        "Got response code %s, content %s", response.status_code, response.content
-    )
-    if response.status_code == 200:
-        preds = json.loads(response.content.decode("utf-8"))
-        return preds
-    else:
-        response.raise_for_status()
diff --git a/tests/kserve_complete_authentication_test.sh b/tests/kserve_complete_authentication_test.sh
deleted file mode 100755
index 6e28d940cd..0000000000
--- a/tests/kserve_complete_authentication_test.sh
+++ /dev/null
@@ -1,127 +0,0 @@
-#!/bin/bash
-set -euo pipefail
-
-PRIMARY_NAMESPACE="kubeflow-user-example-com"
-ATTACKER_NAMESPACE="kubeflow-user-attacker"
-KSERVE_INGRESS_HOST_PORT="${KSERVE_INGRESS_HOST_PORT:-localhost:8080}"
-
-function setup_test_environment() {
-    kubectl create namespace $PRIMARY_NAMESPACE --dry-run=client -o yaml | kubectl apply -f -
-    kubectl create namespace $ATTACKER_NAMESPACE --dry-run=client -o yaml | kubectl apply -f -
-    kubectl create serviceaccount default-editor -n $PRIMARY_NAMESPACE --dry-run=client -o yaml | kubectl apply -f -
-    kubectl create serviceaccount attacker-sa -n $ATTACKER_NAMESPACE --dry-run=client -o yaml | kubectl apply -f -
-
-    cat <<EOF | kubectl apply -f -
-apiVersion: "serving.kserve.io/v1beta1"
-kind: "InferenceService"
-metadata:
-  name: "secure-sklearn"
-  namespace: $PRIMARY_NAMESPACE
-spec:
-  predictor:
-    sklearn:
-      storageUri: "gs://kfserving-examples/models/sklearn/1.0/model"
-      resources:
-        requests:
-          cpu: 50m
-          memory: 128Mi
-        limits:
-          cpu: 100m
-          memory: 256Mi
-EOF
-
-    kubectl wait --for=condition=Ready inferenceservice/secure-sklearn -n $PRIMARY_NAMESPACE --timeout=180s || true
-}
-
-function test_gateway_jwt_validation() {
-    RESPONSE=$(curl -s -o /dev/null -w "%{http_code}" \
-        -H "Host: secure-sklearn-predictor.$PRIMARY_NAMESPACE.svc.cluster.local" \
-        "http://$KSERVE_INGRESS_HOST_PORT/v1/models/secure-sklearn:predict" \
-        -d '{"instances": [[6.8, 2.8, 4.8, 1.4]]}' \
-        -H "Content-Type: application/json")
-
-    if [ "$RESPONSE" != "403" ]; then
-        exit 1
-    fi
-
-    RESPONSE=$(curl -s -o /dev/null -w "%{http_code}" \
-        -H "Host: secure-sklearn-predictor.$PRIMARY_NAMESPACE.svc.cluster.local" \
-        -H "Authorization: Bearer invalid-token-123" \
-        "http://$KSERVE_INGRESS_HOST_PORT/v1/models/secure-sklearn:predict" \
-        -d '{"instances": [[6.8, 2.8, 4.8, 1.4]]}' \
-        -H "Content-Type: application/json")
-
-    if [ "$RESPONSE" != "401" ] && [ "$RESPONSE" != "403" ]; then
-        exit 1
-    fi
-}
-
-function test_namespace_isolation() {
-    PRIMARY_TOKEN=$(kubectl -n $PRIMARY_NAMESPACE create token default-editor)
-    ATTACKER_TOKEN=$(kubectl -n $ATTACKER_NAMESPACE create token attacker-sa)
-
-    RESPONSE=$(curl -s -o /dev/null -w "%{http_code}" \
-        -H "Host: secure-sklearn-predictor.$PRIMARY_NAMESPACE.svc.cluster.local" \
-        -H "Authorization: Bearer $PRIMARY_TOKEN" \
-        "http://$KSERVE_INGRESS_HOST_PORT/v1/models/secure-sklearn:predict" \
-        -d '{"instances": [[6.8, 2.8, 4.8, 1.4]]}' \
-        -H "Content-Type: application/json")
-
-    if [ "$RESPONSE" != "200" ] && [ "$RESPONSE" != "404" ] && [ "$RESPONSE" != "503" ]; then
-        exit 1
-    fi
-
-    RESPONSE=$(curl -s -o /dev/null -w "%{http_code}" \
-        -H "Host: secure-sklearn-predictor.$PRIMARY_NAMESPACE.svc.cluster.local" \
-        -H "Authorization: Bearer $ATTACKER_TOKEN" \
-        "http://$KSERVE_INGRESS_HOST_PORT/v1/models/secure-sklearn:predict" \
-        -d '{"instances": [[6.8, 2.8, 4.8, 1.4]]}' \
-        -H "Content-Type: application/json")
-
-    if [ "$RESPONSE" != "200" ] && [ "$RESPONSE" != "404" ] && [ "$RESPONSE" != "503" ]; then
-        exit 1
-    fi
-}
-
-function test_external_access() {
-    PRIMARY_TOKEN=$(kubectl -n $PRIMARY_NAMESPACE create token default-editor)
-
-    curl -s -o /dev/null -w "%{http_code}" \
-        -H "Authorization: Bearer $PRIMARY_TOKEN" \
-        "http://$KSERVE_INGRESS_HOST_PORT/kserve/$PRIMARY_NAMESPACE/secure-sklearn/v1/models/secure-sklearn:predict" \
-        -d '{"instances": [[6.8, 2.8, 4.8, 1.4]]}' \
-        -H "Content-Type: application/json" > /dev/null || true
-}
-
-function test_internal_access() {
-    kubectl run test-client -n $PRIMARY_NAMESPACE --image=curlimages/curl --restart=Never -- sleep 3600 2>/dev/null || true
-    kubectl wait --for=condition=ready pod/test-client -n $PRIMARY_NAMESPACE --timeout=60s 2>/dev/null || return
-
-    PRIMARY_TOKEN=$(kubectl -n $PRIMARY_NAMESPACE create token default-editor)
-
-    kubectl exec -n $PRIMARY_NAMESPACE test-client -- \
-        curl -s -o /dev/null -w "%{http_code}" \
-        -H "Authorization: Bearer $PRIMARY_TOKEN" \
-        -H "Content-Type: application/json" \
-        "http://secure-sklearn-predictor.$PRIMARY_NAMESPACE.svc.cluster.local/v1/models/secure-sklearn:predict" \
-        -d '{"instances": [[6.8, 2.8, 4.8, 1.4]]}' 2>/dev/null || true
-
-    kubectl delete pod test-client -n $PRIMARY_NAMESPACE --ignore-not-found=true
-}
-
-function cleanup() {
-    kubectl delete namespace $ATTACKER_NAMESPACE --ignore-not-found=true
-    kubectl delete inferenceservice secure-sklearn -n $PRIMARY_NAMESPACE --ignore-not-found=true
-    kubectl delete pod test-client -n $PRIMARY_NAMESPACE --ignore-not-found=true
-}
-
-function main() {
-    setup_test_environment
-    test_gateway_jwt_validation
-    test_namespace_isolation
-    test_external_access
-    test_internal_access
-    cleanup
-}
-
-main
\ No newline at end of file
diff --git a/tests/kserve_models_web_application_test.sh b/tests/kserve_models_web_application_test.sh
deleted file mode 100755
index 42df46fd2d..0000000000
--- a/tests/kserve_models_web_application_test.sh
+++ /dev/null
@@ -1,58 +0,0 @@
-#!/bin/bash
-set -euxo pipefail
-
-
-KF_PROFILE=${1:-kubeflow-user-example-com}
-TOKEN="$(kubectl -n $KF_PROFILE create token default-editor)"
-BASE_URL="localhost:8080/kserve-endpoints"
-
-cat <<EOF | kubectl apply -f -
-apiVersion: "serving.kserve.io/v1beta1"
-kind: "InferenceService"
-metadata:
-  name: "sklearn-iris"
-  namespace: ${KF_PROFILE}
-spec:
-  predictor:
-    sklearn:
-      storageUri: "gs://kfserving-examples/models/sklearn/1.0/model"
-      resources:
-        requests:
-          cpu: "50m"
-          memory: "128Mi"
-        limits:
-          cpu: "100m"
-          memory: "256Mi"
-EOF
-
-kubectl wait --for=condition=Ready inferenceservice/sklearn-iris -n ${KF_PROFILE} --timeout=120s
-kubectl get inferenceservice sklearn-iris -n ${KF_PROFILE}
-
-# Get XSRF token for API calls
-curl -s "http://${BASE_URL}/" \
-  -H "Authorization: Bearer ${TOKEN}" \
-  -v -c /tmp/kserve_xcrf.txt 2>&1 | grep -i "set-cookie"
-XSRFTOKEN=$(grep XSRF-TOKEN /tmp/kserve_xcrf.txt | awk '{print $NF}')
-
-RESPONSE=$(curl -s --fail-with-body \
-  "${BASE_URL}/api/namespaces/${KF_PROFILE}/inferenceservices" \
-  -H "Authorization: Bearer ${TOKEN}" \
-  -H "X-XSRF-TOKEN: ${XSRFTOKEN}" \
-  -H "Cookie: XSRF-TOKEN=${XSRFTOKEN}")
-
-echo "$RESPONSE" | grep -q "sklearn-iris" || exit 1
-kubectl get inferenceservice sklearn-iris -n ${KF_PROFILE} || exit 1
-READY=$(kubectl get isvc sklearn-iris -n ${KF_PROFILE} -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}')
-[[ "$READY" == "True" ]] || {
-  echo "FAILURE: InferenceService Ready status is: $READY"
-  exit 1
-}
-
-kubectl delete inferenceservice sklearn-iris -n ${KF_PROFILE} || exit 1
-
-# Test unauthorized access
-TOKEN="$(kubectl -n default create token default)"
-BASE_URL="localhost:8080/kserve-endpoints"
-HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" "${BASE_URL}/api/namespaces/${KF_PROFILE}/inferenceservices" -H "Authorization: Bearer ${TOKEN}")
-[[ "$HTTP_CODE" == "403" || "$HTTP_CODE" == "401" ]] || { echo "FAILURE: Expected 401/403, got $HTTP_CODE"; exit 1; }
-echo "Test succeeded. Token from unauthorized ServiceAccount cannot list InferenceServices in $KF_PROFILE namespace."
diff --git a/tests/kserve_setup_external_access.sh b/tests/kserve_setup_external_access.sh
deleted file mode 100755
index 61d750b77d..0000000000
--- a/tests/kserve_setup_external_access.sh
+++ /dev/null
@@ -1,58 +0,0 @@
-#!/bin/bash
-# Script to setup external access for KServe testing
-
-set -euo pipefail
-
-NAMESPACE=${1:-kubeflow-user-example-com}
-SERVICE_NAME=${2:-test-sklearn}
-
-echo "Setting up external access for KServe..."
-echo "Namespace: $NAMESPACE"
-echo "Service: $SERVICE_NAME"
-echo
-
-# Create VirtualService for external access
-cat <<EOF | kubectl apply -f -
-apiVersion: networking.istio.io/v1beta1
-kind: VirtualService
-metadata:
-  name: ${SERVICE_NAME}-external-access
-  namespace: $NAMESPACE
-spec:
-  gateways:
-    - kubeflow/kubeflow-gateway
-  hosts:
-    - '*'
-  http:
-    - match:
-        - uri:
-            prefix: /kserve/$NAMESPACE/$SERVICE_NAME/
-      rewrite:
-        uri: /
-      route:
-        - destination:
-            host: cluster-local-gateway.istio-system.svc.cluster.local
-          headers:
-            request:
-              set:
-                Host: ${SERVICE_NAME}-predictor.${NAMESPACE}.svc.cluster.local
-          weight: 100
-      timeout: 300s
-      headers:
-        response:
-          add:
-            Access-Control-Allow-Origin: "*"
-            Access-Control-Allow-Methods: "GET, POST, OPTIONS"
-            Access-Control-Allow-Headers: "Authorization, Content-Type"
-EOF
-
-echo "External access configured for $SERVICE_NAME in $NAMESPACE"
-echo
-echo "Usage examples:"
-echo "External access URL: http://YOUR_CLUSTER_IP/kserve/$NAMESPACE/$SERVICE_NAME/v1/models/$SERVICE_NAME:predict"
-echo
-echo "Test command:"
-echo "curl -H \"Authorization: Bearer \$(kubectl -n $NAMESPACE create token default-editor)\" \\"
-echo "     -H \"Content-Type: application/json\" \\"
-echo "     \"http://localhost:8080/kserve/$NAMESPACE/$SERVICE_NAME/v1/models/$SERVICE_NAME:predict\" \\"
-echo "     -d '{\"instances\": [[6.8, 2.8, 4.8, 1.4]]}'"
\ No newline at end of file
diff --git a/tests/kserve_sklearn_test.py b/tests/kserve_sklearn_test.py
new file mode 100644
index 0000000000..f6aeb34f1b
--- /dev/null
+++ b/tests/kserve_sklearn_test.py
@@ -0,0 +1,259 @@
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""KServe sklearn prediction test.
+
+Deploys an sklearn InferenceService via the KServe Python SDK,
+waits for Ready, runs a prediction via host-based routing,
+asserts the output, and cleans up.
+
+This test is fully independent — it does not rely on any
+external deployment from kserve_test.sh.
+
+Because of the mesh-wide global-deny-all AuthorizationPolicy
+(common/istio/istio-install/base/deny_all_authorizationpolicy.yaml),
+the predictor pod's sidecar blocks all traffic by default.
+We create an ALLOW AuthorizationPolicy that permits traffic
+to the predictor pod. Security is maintained because the
+ingress gateway and cluster-local-gateway both validate the
+JWT via RequestAuthentication before forwarding.
+"""
+
+import json
+import logging
+import os
+import time
+
+import requests
+from kubernetes import client
+from kubernetes.client import V1ResourceRequirements
+
+from kserve import (
+    KServeClient,
+    V1beta1InferenceService,
+    V1beta1InferenceServiceSpec,
+    V1beta1PredictorSpec,
+    V1beta1SKLearnSpec,
+    constants,
+)
+
+logging.basicConfig(level=logging.INFO)
+
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+AUTHORIZATION_POLICY_NAME = "allow-isvc-sklearn"
+SERVICE_NAME = "isvc-sklearn"
+KSERVE_TEST_NAMESPACE = os.environ.get(
+    "KSERVE_TEST_NAMESPACE", "kubeflow-user-example-com"
+)
+
+IRIS_INPUT = {"instances": [[6.8, 2.8, 4.8, 1.4], [6.0, 3.4, 4.5, 1.6]]}
+
+
+# ---------------------------------------------------------------------------
+# Helpers (merged from tests/kserve/utils.py)
+# ---------------------------------------------------------------------------
+class M2mTokenNotAvailable(Exception):
+    pass
+
+
+def get_cluster_ip(name="istio-ingressgateway", namespace="istio-system"):
+    api_instance = client.CoreV1Api(client.ApiClient())
+    service = api_instance.read_namespaced_service(name, namespace)
+    if service.status.load_balancer.ingress is None:
+        cluster_ip = service.spec.cluster_ip
+    else:
+        if service.status.load_balancer.ingress[0].hostname:
+            cluster_ip = service.status.load_balancer.ingress[0].hostname
+        else:
+            cluster_ip = service.status.load_balancer.ingress[0].ip
+    return os.environ.get("KSERVE_INGRESS_HOST_PORT", cluster_ip)
+
+
+def get_m2m_auth_token(env_name="KSERVE_M2M_TOKEN"):
+    try:
+        return os.environ[env_name]
+    except KeyError:
+        raise M2mTokenNotAvailable(env_name)
+
+
+def predict(service_name, input_data):
+    """Send a prediction request using host-based routing.
+
+    Args:
+        service_name: Name of the InferenceService.
+        input_data: Dict payload (e.g. {"instances": [...]}).
+
+    Returns:
+        Parsed JSON response dict on HTTP 200.
+
+    Raises:
+        requests.HTTPError: On non-200 responses.
+    """
+    kfs_client = KServeClient(
+        config_file=os.environ.get("KUBECONFIG", "~/.kube/config")
+    )
+    kfs_client.get(
+        service_name,
+        namespace=KSERVE_TEST_NAMESPACE,
+        version=constants.KSERVE_V1BETA1_VERSION,
+    )
+    # Temporary sleep until https://github.com/kserve/kserve/issues/604
+    time.sleep(10)
+    cluster_ip = get_cluster_ip()
+
+    host = f"{service_name}.{KSERVE_TEST_NAMESPACE}.example.com"
+    headers = {
+        "Host": host,
+        "Content-Type": "application/json",
+    }
+
+    try:
+        token = get_m2m_auth_token()
+        headers["Authorization"] = f"Bearer {token}"
+        logging.info("M2M Token Found.")
+    except M2mTokenNotAvailable:
+        logging.warning("M2M Token Not found, client authentication disabled.")
+
+    url = f"http://{cluster_ip}/v1/models/{service_name}:predict"
+
+    logging.info("Sending Header = %s", headers)
+    logging.info("Sending url = %s", url)
+    logging.info("Sending request data: %s", input_data)
+    response = requests.post(url, json.dumps(input_data), headers=headers)
+    logging.info(
+        "Got response code %s, content %s", response.status_code, response.content
+    )
+    if response.status_code == 200:
+        return json.loads(response.content.decode("utf-8"))
+    else:
+        response.raise_for_status()
+
+
+# ---------------------------------------------------------------------------
+# AuthorizationPolicy helpers
+# ---------------------------------------------------------------------------
+def create_predictor_authorization_policy(namespace):
+    """Create an AuthorizationPolicy allowing traffic to the predictor pod.
+
+    This is needed because the global-deny-all AuthorizationPolicy in
+    istio-system blocks all mesh traffic by default.
+
+    We restrict by source namespace using mTLS identity. Security is
+    maintained because the ingress gateway and cluster-local-gateway
+    both validate the JWT before forwarding.
+    """
+    api = client.CustomObjectsApi()
+    ap_body = {
+        "apiVersion": "security.istio.io/v1beta1",
+        "kind": "AuthorizationPolicy",
+        "metadata": {
+            "name": AUTHORIZATION_POLICY_NAME,
+            "namespace": namespace,
+        },
+        "spec": {
+            "action": "ALLOW",
+            "rules": [
+                {
+                    "from": [
+                        {
+                            "source": {
+                                "namespaces": ["istio-system"],
+                            }
+                        }
+                    ]
+                }
+            ],
+            "selector": {
+                "matchLabels": {
+                    "serving.knative.dev/service": f"{SERVICE_NAME}-predictor",
+                }
+            },
+        },
+    }
+    api.create_namespaced_custom_object(
+        group="security.istio.io",
+        version="v1beta1",
+        namespace=namespace,
+        plural="authorizationpolicies",
+        body=ap_body,
+    )
+    logging.info("Created AuthorizationPolicy %s in %s", AUTHORIZATION_POLICY_NAME, namespace)
+
+
+def delete_predictor_authorization_policy(namespace):
+    """Delete the predictor AuthorizationPolicy."""
+    api = client.CustomObjectsApi()
+    try:
+        api.delete_namespaced_custom_object(
+            group="security.istio.io",
+            version="v1beta1",
+            namespace=namespace,
+            plural="authorizationpolicies",
+            name=AUTHORIZATION_POLICY_NAME,
+        )
+        logging.info("Deleted AuthorizationPolicy %s in %s", AUTHORIZATION_POLICY_NAME, namespace)
+    except client.exceptions.ApiException as e:
+        if e.status != 404:
+            raise
+
+
+# ---------------------------------------------------------------------------
+# Test
+# ---------------------------------------------------------------------------
+def test_sklearn_kserve():
+    predictor = V1beta1PredictorSpec(
+        min_replicas=1,
+        sklearn=V1beta1SKLearnSpec(
+            storage_uri="gs://kfserving-examples/models/sklearn/1.0/model",
+            resources=V1ResourceRequirements(
+                requests={"cpu": "50m", "memory": "128Mi"},
+                limits={"cpu": "100m", "memory": "256Mi"},
+            ),
+        ),
+    )
+
+    isvc = V1beta1InferenceService(
+        api_version=constants.KSERVE_V1BETA1,
+        kind="InferenceService",
+        metadata=client.V1ObjectMeta(
+            name=SERVICE_NAME, namespace=KSERVE_TEST_NAMESPACE
+        ),
+        spec=V1beta1InferenceServiceSpec(predictor=predictor),
+    )
+
+    kserve_client = KServeClient(
+        config_file=os.environ.get("KUBECONFIG", "~/.kube/config")
+    )
+
+    try:
+        # Create the AuthorizationPolicy BEFORE the Inference service
+        # when the predictor pod comes up
+        create_predictor_authorization_policy(KSERVE_TEST_NAMESPACE)
+
+        kserve_client.create(isvc)
+        kserve_client.wait_isvc_ready(
+            SERVICE_NAME, namespace=KSERVE_TEST_NAMESPACE
+        )
+
+        response = predict(SERVICE_NAME, IRIS_INPUT)
+        assert response["predictions"] == [1, 1]
+        logging.info(
+            "Python SDK prediction passed for %s in %s",
+            SERVICE_NAME,
+            KSERVE_TEST_NAMESPACE,
+        )
+    finally:
+        kserve_client.delete(SERVICE_NAME, KSERVE_TEST_NAMESPACE)
+        delete_predictor_authorization_policy(KSERVE_TEST_NAMESPACE)
diff --git a/tests/kserve_test.sh b/tests/kserve_test.sh
index e1007b89d1..1ac13c3ef5 100755
--- a/tests/kserve_test.sh
+++ b/tests/kserve_test.sh
@@ -3,51 +3,51 @@ set -euxo pipefail
 
 NAMESPACE=${1:-kubeflow-user-example-com}
 SCRIPT_DIRECTORY="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-TEST_DIRECTORY="${SCRIPT_DIRECTORY}/kserve"
 
 if ! command -v pytest &> /dev/null; then
-  pip install -r ${TEST_DIRECTORY}/requirements.txt
+  pip install -r ${SCRIPT_DIRECTORY}/requirements.txt
 fi
 
 export KSERVE_INGRESS_HOST_PORT=${KSERVE_INGRESS_HOST_PORT:-localhost:8080}
 export KSERVE_M2M_TOKEN="$(kubectl -n ${NAMESPACE} create token default-editor)"
 export KSERVE_TEST_NAMESPACE=${NAMESPACE}
 
-# Test 1: Model Inference via KServe SDK (pytest creates isvc-sklearn internally)
-if cd ${TEST_DIRECTORY}; then
-  pytest . -vs --log-level info || true
-fi
+# ============================================================
+# Test 1: Model Prediction via KServe Python SDK
+# ============================================================
+# Runs kserve_sklearn_test.py which independently deploys an sklearn
+# InferenceService, predicts via host-based routing, asserts the
+# output, and deletes the InferenceService.
+pytest "${SCRIPT_DIRECTORY}/kserve_sklearn_test.py" -vs --log-level info
 
-# Test 2: Path-based Routing & Ingress Gateway Security (VirtualService + AuthorizationPolicy)
+# ============================================================
+# Test 2: Ingress Gateway — Path-based & Host-based Routing (curl)
+# ============================================================
+# Re-deploy the InferenceService for bash/curl tests (pytest deleted it).
 cat <<EOF | kubectl apply -f -
-apiVersion: networking.istio.io/v1beta1
-kind: VirtualService
+apiVersion: "serving.kserve.io/v1beta1"
+kind: "InferenceService"
 metadata:
-  name: isvc-sklearn-path
+  name: "isvc-sklearn"
   namespace: ${NAMESPACE}
 spec:
-  gateways:
-    - kubeflow/kubeflow-gateway
-  hosts:
-    - '*'
-  http:
-    - match:
-        - uri:
-            prefix: /kserve/${NAMESPACE}/isvc-sklearn/
-      rewrite:
-        uri: /
-      route:
-        - destination:
-            host: cluster-local-gateway.istio-system.svc.cluster.local
-          headers:
-            request:
-              set:
-                Host: isvc-sklearn-predictor.${NAMESPACE}.svc.cluster.local
-          weight: 100
-      timeout: 300s
+  predictor:
+    sklearn:
+      storageUri: "gs://kfserving-examples/models/sklearn/1.0/model"
+      resources:
+        requests:
+          cpu: "50m"
+          memory: "128Mi"
+        limits:
+          cpu: "100m"
+          memory: "256Mi"
 EOF
 
-# WARNING: This policy allows ANY valid token from ANY kubeflow namespace to access this InferenceService.
+kubectl wait --for=condition=Ready inferenceservice/isvc-sklearn -n ${NAMESPACE} --timeout=300s
+
+# Allow traffic to the predictor pod. The ingress gateway and Cluster local gateway
+# have already validated the JWT before forwarding.
+# We restrict by source namespace using mTLS identity.
 cat <<EOF | kubectl apply -f -
 apiVersion: security.istio.io/v1beta1
 kind: AuthorizationPolicy
@@ -59,21 +59,30 @@ spec:
   rules:
   - from:
     - source:
-        requestPrincipals: ["*"]
+        namespaces: ["istio-system"]
   selector:
     matchLabels:
       serving.knative.dev/service: isvc-sklearn-predictor
 EOF
 
+# Wait for AuthorizationPolicy to propagate through Envoy
 sleep 60
 
+# --- Test 2a: PATH-BASED routing ---
+# Path-based routing uses the native pathTemplate (/serving/<ns>/<name>/)
+# configured in the inferenceservice-config ConfigMap patch
+# (applications/kserve/kserve/kustomization.yaml). KServe auto-generates
+# a VirtualService on kubeflow-gateway where M2M RequestAuthentication
+# validates the JWT.
+
 # Request without token should be rejected
 RESPONSE_NO_TOKEN=$(curl -s -o /dev/null -w "%{http_code}" \
  -H "Content-Type: application/json" \
- "http://${KSERVE_INGRESS_HOST_PORT}/kserve/${NAMESPACE}/isvc-sklearn/v1/models/isvc-sklearn:predict" \
+ "http://${KSERVE_INGRESS_HOST_PORT}/serving/${NAMESPACE}/isvc-sklearn/v1/models/isvc-sklearn:predict" \
  -d '{"instances": [[6.8, 2.8, 4.8, 1.4]]}')
 
 if [ "$RESPONSE_NO_TOKEN" != "403" ] && [ "$RESPONSE_NO_TOKEN" != "302" ]; then
+  echo "FAIL: Path-based: Expected 403/302 without token, got $RESPONSE_NO_TOKEN"
   exit 1
 fi
 
@@ -81,16 +90,104 @@ fi
 RESPONSE_WITH_TOKEN=$(curl -s -o /dev/null -w "%{http_code}" \
  -H "Authorization: Bearer ${KSERVE_M2M_TOKEN}" \
  -H "Content-Type: application/json" \
- "http://${KSERVE_INGRESS_HOST_PORT}/kserve/${NAMESPACE}/isvc-sklearn/v1/models/isvc-sklearn:predict" \
+ "http://${KSERVE_INGRESS_HOST_PORT}/serving/${NAMESPACE}/isvc-sklearn/v1/models/isvc-sklearn:predict" \
  -d '{"instances": [[6.8, 2.8, 4.8, 1.4], [6.0, 3.4, 4.5, 1.6]]}')
 
 if [ "$RESPONSE_WITH_TOKEN" != "200" ] && [ "$RESPONSE_WITH_TOKEN" != "404" ] && [ "$RESPONSE_WITH_TOKEN" != "503" ]; then
+  echo "FAIL: Path-based: Expected 200/404/503 with token, got $RESPONSE_WITH_TOKEN"
+  exit 1
+fi
+
+# --- Test 2b: HOST-BASED routing (security verification) ---
+HOST_HEADER="Host: isvc-sklearn.${NAMESPACE}.example.com"
+
+# Request without token should be rejected
+RESPONSE_HOST_NO_TOKEN=$(curl -s -o /dev/null -w "%{http_code}" \
+ -H "${HOST_HEADER}" \
+ -H "Content-Type: application/json" \
+ "http://${KSERVE_INGRESS_HOST_PORT}/v1/models/isvc-sklearn:predict" \
+ -d '{"instances": [[6.8, 2.8, 4.8, 1.4]]}')
+
+if [ "$RESPONSE_HOST_NO_TOKEN" != "403" ] && [ "$RESPONSE_HOST_NO_TOKEN" != "302" ]; then
+  echo "FAIL: Host-based: Expected 403/302 without token, got $RESPONSE_HOST_NO_TOKEN"
+  exit 1
+fi
+
+# Request with valid token should succeed (the AuthorizationPolicy
+# restricts traffic by source namespace via mTLS identity)
+RESPONSE_HOST_WITH_TOKEN=$(curl -s -o /dev/null -w "%{http_code}" \
+ -H "Authorization: Bearer ${KSERVE_M2M_TOKEN}" \
+ -H "${HOST_HEADER}" \
+ -H "Content-Type: application/json" \
+ "http://${KSERVE_INGRESS_HOST_PORT}/v1/models/isvc-sklearn:predict" \
+ -d '{"instances": [[6.8, 2.8, 4.8, 1.4], [6.0, 3.4, 4.5, 1.6]]}')
+
+if [ "$RESPONSE_HOST_WITH_TOKEN" != "200" ] && [ "$RESPONSE_HOST_WITH_TOKEN" != "404" ] && [ "$RESPONSE_HOST_WITH_TOKEN" != "503" ]; then
+  echo "FAIL: Host-based: Expected 200/404/503 with token, got $RESPONSE_HOST_WITH_TOKEN"
   exit 1
 fi
 
+# ============================================================
+# Test 3: KServe Models Web Application API
+# ============================================================
 kubectl wait --for=condition=Available --timeout=300s -n kubeflow deployment/kserve-models-web-app
 
-# Knative Service authentication via cluster-local-gateway
+TOKEN="$(kubectl -n ${NAMESPACE} create token default-editor)"
+BASE_URL="localhost:8080/kserve-endpoints"
+
+cat <<EOF | kubectl apply -f -
+apiVersion: "serving.kserve.io/v1beta1"
+kind: "InferenceService"
+metadata:
+  name: "sklearn-iris"
+  namespace: ${NAMESPACE}
+spec:
+  predictor:
+    sklearn:
+      storageUri: "gs://kfserving-examples/models/sklearn/1.0/model"
+      resources:
+        requests:
+          cpu: "50m"
+          memory: "128Mi"
+        limits:
+          cpu: "100m"
+          memory: "256Mi"
+EOF
+
+kubectl wait --for=condition=Ready inferenceservice/sklearn-iris -n ${NAMESPACE} --timeout=120s
+kubectl get inferenceservice sklearn-iris -n ${NAMESPACE}
+
+# Get XSRF token for API calls
+curl -s "http://${BASE_URL}/" \
+  -H "Authorization: Bearer ${TOKEN}" \
+  -v -c /tmp/kserve_xcrf.txt 2>&1 | grep -i "set-cookie"
+XSRFTOKEN=$(grep XSRF-TOKEN /tmp/kserve_xcrf.txt | awk '{print $NF}')
+
+RESPONSE=$(curl -s --fail-with-body \
+  "${BASE_URL}/api/namespaces/${NAMESPACE}/inferenceservices" \
+  -H "Authorization: Bearer ${TOKEN}" \
+  -H "X-XSRF-TOKEN: ${XSRFTOKEN}" \
+  -H "Cookie: XSRF-TOKEN=${XSRFTOKEN}")
+
+echo "$RESPONSE" | grep -q "sklearn-iris" || exit 1
+kubectl get inferenceservice sklearn-iris -n ${NAMESPACE} || exit 1
+READY=$(kubectl get isvc sklearn-iris -n ${NAMESPACE} -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}')
+[[ "$READY" == "True" ]] || {
+  echo "FAILURE: InferenceService sklearn-iris Ready status is: $READY"
+  exit 1
+}
+
+kubectl delete inferenceservice sklearn-iris -n ${NAMESPACE} || exit 1
+
+# Test unauthorized access to models web application
+UNAUTH_TOKEN="$(kubectl -n default create token default)"
+HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" "${BASE_URL}/api/namespaces/${NAMESPACE}/inferenceservices" -H "Authorization: Bearer ${UNAUTH_TOKEN}")
+[[ "$HTTP_CODE" == "403" || "$HTTP_CODE" == "401" ]] || { echo "FAILURE: Expected 401/403, got $HTTP_CODE"; exit 1; }
+echo "Models Web Application: Token from unauthorized ServiceAccount cannot list InferenceServices in $NAMESPACE namespace."
+
+# ============================================================
+# Test 4: Knative Service authentication via cluster-local-gateway
+# ============================================================
 cat <<EOF | kubectl apply -f -
 apiVersion: serving.knative.dev/v1
 kind: Service
@@ -120,6 +217,7 @@ RESPONSE=$(curl -s -o /dev/null -w "%{http_code}" \
     "http://${KSERVE_INGRESS_HOST_PORT}/")
 
 if [ "$RESPONSE" != "403" ]; then
+    echo "FAIL: Unauthenticated access should return 403, got $RESPONSE"
     exit 1
 fi
 
@@ -130,10 +228,13 @@ RESPONSE=$(curl -s -o /dev/null -w "%{http_code}" \
     "http://${KSERVE_INGRESS_HOST_PORT}/")
 
 if [ "$RESPONSE" != "401" ] && [ "$RESPONSE" != "403" ]; then
+    echo "FAIL: Invalid token should return 401/403, got $RESPONSE"
     exit 1
 fi
 
-# Test 3: Cluster-local-gateway requires authentication
+# ============================================================
+# Test 5: Cluster-local-gateway requires authentication
+# ============================================================
 kubectl port-forward -n istio-system svc/cluster-local-gateway 8081:80 &
 PF_PID=$!
 sleep 5
@@ -148,10 +249,13 @@ RESPONSE=$(curl -s -o /dev/null -w "%{http_code}" \
     "http://localhost:8081/")
 
 if [ "$RESPONSE" != "403" ]; then
+    echo "FAIL: Cluster-local-gateway unauthenticated access should return 403, got $RESPONSE"
     exit 1
 fi
 
-# Test 4: Namespace isolation - attacker in different namespace should NOT have access
+# ============================================================
+# Test 6: Namespace isolation - attacker should NOT have access
+# ============================================================
 ATTACKER_NAMESPACE="attacker-namespace"
 kubectl create namespace ${ATTACKER_NAMESPACE} --dry-run=client -o yaml | kubectl apply -f -
 
@@ -163,7 +267,7 @@ metadata:
   namespace: ${ATTACKER_NAMESPACE}
 EOF
 
-# Test 5: Unauthenticated request from attacker namespace should be REJECTED
+# Unauthenticated request from attacker namespace should be REJECTED
 RESPONSE=$(curl -s -o /dev/null -w "%{http_code}" \
     -H "Host: secure-model-predictor.${NAMESPACE}.svc.cluster.local" \
     "http://localhost:8081/")
@@ -173,7 +277,7 @@ if [ "$RESPONSE" == "200" ]; then
     exit 1
 fi
 
-# Test 6: Authenticated request from attacker namespace should ALSO be REJECTED
+# Authenticated request from attacker namespace should ALSO be REJECTED
 ATTACKER_TOKEN=$(kubectl -n ${ATTACKER_NAMESPACE} create token attacker-service-account)
 
 RESPONSE=$(curl -s -o /dev/null -w "%{http_code}" \
@@ -186,7 +290,11 @@ if [ "$RESPONSE" == "200" ]; then
     exit 1
 fi
 
+# ============================================================
+# Cleanup
+# ============================================================
 kill $PF_PID 2>/dev/null || true
 
 kubectl delete namespace ${ATTACKER_NAMESPACE} --ignore-not-found=true
 kubectl delete ksvc secure-model-predictor -n ${NAMESPACE} --ignore-not-found=true
+kubectl delete inferenceservice isvc-sklearn -n ${NAMESPACE} --ignore-not-found=true
diff --git a/tests/kserve_test.yaml b/tests/kserve_test.yaml
deleted file mode 100644
index 3d12cc117d..0000000000
--- a/tests/kserve_test.yaml
+++ /dev/null
@@ -1,16 +0,0 @@
-apiVersion: "serving.kserve.io/v1beta1"
-kind: "InferenceService"
-metadata:
-  name: "sklearn-iris"
-  namespace: "kubeflow-user-example-com"
-spec:
-  predictor:
-    sklearn:
-      resources:
-        limits:
-          cpu: "1"
-          memory: 2Gi
-        requests:
-          cpu: "0.1"
-          memory: 200M
-      storageUri: "gs://kfserving-examples/models/sklearn/1.0/model"
diff --git a/tests/kserve/requirements.txt b/tests/requirements.txt
similarity index 77%
rename from tests/kserve/requirements.txt
rename to tests/requirements.txt
index fbc04cb5b5..511e394b2f 100644
--- a/tests/kserve/requirements.txt
+++ b/tests/requirements.txt
@@ -1,4 +1,4 @@
 pytest>=7.0.0
-kserve>=0.15.0
+kserve>=0.16.0
 kubernetes>=18.20.0
 requests>=2.18.4

From 605d3aba247fe7811f92de57244bf9aa318c4635 Mon Sep 17 00:00:00 2001
From: Julius von Kohout <45896133+juliusvonkohout@users.noreply.github.com>
Date: Mon, 23 Feb 2026 13:34:29 +0100
Subject: [PATCH 2/8] Apply suggestion from @juliusvonkohout

Signed-off-by: Julius von Kohout <45896133+juliusvonkohout@users.noreply.github.com>
---
 .github/workflows/kserve_test.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/kserve_test.yaml b/.github/workflows/kserve_test.yaml
index 7f53a9e26e..f6d9ff4d10 100644
--- a/.github/workflows/kserve_test.yaml
+++ b/.github/workflows/kserve_test.yaml
@@ -6,7 +6,7 @@ on:
     - .github/workflows/kserve_test.yaml
     - applications/kserve/**
     - apps/kserve/**
-    - tests/kserve_*
+    - tests/kserve*
     - tests/kserve_test.sh
     - tests/kserve_install.sh
     - common/istio*/**

From 9300633835e8769bed699f6a4c8e21a34cdfe322 Mon Sep 17 00:00:00 2001
From: Siddhant Jain <siddhantjainofficial26@gmail.com>
Date: Mon, 23 Feb 2026 20:06:13 +0530
Subject: [PATCH 3/8] =?UTF-8?q?fix:=20address=20reviewer=20feedback=20?=
 =?UTF-8?q?=E2=80=94=20inline=20deps,=20revert=20AP=20to=20requestPrincipa?=
 =?UTF-8?q?ls?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Siddhant Jain <siddhantjainofficial26@gmail.com>
---
 tests/kserve_sklearn_test.py | 27 +++++++++++++++++++--------
 tests/kserve_test.sh         | 16 ++++++----------
 tests/requirements.txt       |  4 ----
 3 files changed, 25 insertions(+), 22 deletions(-)
 delete mode 100644 tests/requirements.txt

diff --git a/tests/kserve_sklearn_test.py b/tests/kserve_sklearn_test.py
index f6aeb34f1b..4d452982cb 100644
--- a/tests/kserve_sklearn_test.py
+++ b/tests/kserve_sklearn_test.py
@@ -24,14 +24,24 @@
 (common/istio/istio-install/base/deny_all_authorizationpolicy.yaml),
 the predictor pod's sidecar blocks all traffic by default.
 We create an ALLOW AuthorizationPolicy that permits traffic
-to the predictor pod. Security is maintained because the
-ingress gateway and cluster-local-gateway both validate the
-JWT via RequestAuthentication before forwarding.
+to the predictor pod using requestPrincipals: ["*"]. Security
+is maintained because the ingress gateway validates the JWT
+via RequestAuthentication before forwarding.
 """
 
+import os
+import sys
+
+# Install dependencies inline (replaces the deleted requirements.txt).
+# This ensures pytest, kserve SDK, and other deps are available when
+# the CI workflow calls this file via `pytest kserve_sklearn_test.py`.
+os.system(
+    f"{sys.executable} -m pip install -q"
+    " pytest>=7.0.0 kserve>=0.16.0 kubernetes>=18.20.0 requests>=2.18.4"
+)
+
 import json
 import logging
-import os
 import time
 
 import requests
@@ -150,9 +160,10 @@ def create_predictor_authorization_policy(namespace):
     This is needed because the global-deny-all AuthorizationPolicy in
     istio-system blocks all mesh traffic by default.
 
-    We restrict by source namespace using mTLS identity. Security is
-    maintained because the ingress gateway and cluster-local-gateway
-    both validate the JWT before forwarding.
+    We allow any request that carries a valid JWT principal
+    (requestPrincipals: ["*"]). Security is maintained because
+    the ingress gateway validates the JWT via RequestAuthentication
+    before forwarding.
     """
     api = client.CustomObjectsApi()
     ap_body = {
@@ -169,7 +180,7 @@ def create_predictor_authorization_policy(namespace):
                     "from": [
                         {
                             "source": {
-                                "namespaces": ["istio-system"],
+                                "requestPrincipals": ["*"],
                             }
                         }
                     ]
diff --git a/tests/kserve_test.sh b/tests/kserve_test.sh
index 1ac13c3ef5..14c4e3a648 100755
--- a/tests/kserve_test.sh
+++ b/tests/kserve_test.sh
@@ -4,10 +4,6 @@ set -euxo pipefail
 NAMESPACE=${1:-kubeflow-user-example-com}
 SCRIPT_DIRECTORY="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 
-if ! command -v pytest &> /dev/null; then
-  pip install -r ${SCRIPT_DIRECTORY}/requirements.txt
-fi
-
 export KSERVE_INGRESS_HOST_PORT=${KSERVE_INGRESS_HOST_PORT:-localhost:8080}
 export KSERVE_M2M_TOKEN="$(kubectl -n ${NAMESPACE} create token default-editor)"
 export KSERVE_TEST_NAMESPACE=${NAMESPACE}
@@ -18,7 +14,7 @@ export KSERVE_TEST_NAMESPACE=${NAMESPACE}
 # Runs kserve_sklearn_test.py which independently deploys an sklearn
 # InferenceService, predicts via host-based routing, asserts the
 # output, and deletes the InferenceService.
-pytest "${SCRIPT_DIRECTORY}/kserve_sklearn_test.py" -vs --log-level info
+python -m pytest "${SCRIPT_DIRECTORY}/kserve_sklearn_test.py" -vs --log-level info
 
 # ============================================================
 # Test 2: Ingress Gateway — Path-based & Host-based Routing (curl)
@@ -45,9 +41,9 @@ EOF
 
 kubectl wait --for=condition=Ready inferenceservice/isvc-sklearn -n ${NAMESPACE} --timeout=300s
 
-# Allow traffic to the predictor pod. The ingress gateway and Cluster local gateway
-# have already validated the JWT before forwarding.
-# We restrict by source namespace using mTLS identity.
+# Allow traffic to the predictor pod from any authenticated principal.
+# The ingress gateway and Cluster local gateway validate the JWT
+# via RequestAuthentication before forwarding.
 cat <<EOF | kubectl apply -f -
 apiVersion: security.istio.io/v1beta1
 kind: AuthorizationPolicy
@@ -59,7 +55,7 @@ spec:
   rules:
   - from:
     - source:
-        namespaces: ["istio-system"]
+        requestPrincipals: ["*"]
   selector:
     matchLabels:
       serving.knative.dev/service: isvc-sklearn-predictor
@@ -114,7 +110,7 @@ if [ "$RESPONSE_HOST_NO_TOKEN" != "403" ] && [ "$RESPONSE_HOST_NO_TOKEN" != "302
 fi
 
 # Request with valid token should succeed (the AuthorizationPolicy
-# restricts traffic by source namespace via mTLS identity)
+# allows any request with a valid JWT principal)
 RESPONSE_HOST_WITH_TOKEN=$(curl -s -o /dev/null -w "%{http_code}" \
  -H "Authorization: Bearer ${KSERVE_M2M_TOKEN}" \
  -H "${HOST_HEADER}" \
diff --git a/tests/requirements.txt b/tests/requirements.txt
deleted file mode 100644
index 511e394b2f..0000000000
--- a/tests/requirements.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-pytest>=7.0.0
-kserve>=0.16.0
-kubernetes>=18.20.0
-requests>=2.18.4

From db2e076798a8aa0c1154435255c8f7333cc60513 Mon Sep 17 00:00:00 2001
From: Julius von Kohout <45896133+juliusvonkohout@users.noreply.github.com>
Date: Mon, 23 Feb 2026 18:00:57 +0100
Subject: [PATCH 4/8] Apply suggestions from code review

Signed-off-by: Julius von Kohout <45896133+juliusvonkohout@users.noreply.github.com>
---
 tests/kserve_sklearn_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/kserve_sklearn_test.py b/tests/kserve_sklearn_test.py
index 4d452982cb..14bf452105 100644
--- a/tests/kserve_sklearn_test.py
+++ b/tests/kserve_sklearn_test.py
@@ -33,7 +33,7 @@
 import sys
 
 # Install dependencies inline (replaces the deleted requirements.txt).
-# This ensures pytest, kserve SDK, and other deps are available when
+# This ensures pytest, kserve SDK, and other dependencies are available when
 # the CI workflow calls this file via `pytest kserve_sklearn_test.py`.
 os.system(
     f"{sys.executable} -m pip install -q"

From 24a84fa30a4a5da157785415944716d4e76ae6b5 Mon Sep 17 00:00:00 2001
From: Siddhant Jain <siddhantjainofficial26@gmail.com>
Date: Mon, 23 Feb 2026 22:47:21 +0530
Subject: [PATCH 5/8] fix: bootstrap pytest before python -m pytest in
 kserve_test.sh

Signed-off-by: Siddhant Jain <siddhantjainofficial26@gmail.com>
---
 tests/kserve_test.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/kserve_test.sh b/tests/kserve_test.sh
index 14c4e3a648..51b27c4422 100755
--- a/tests/kserve_test.sh
+++ b/tests/kserve_test.sh
@@ -14,6 +14,7 @@ export KSERVE_TEST_NAMESPACE=${NAMESPACE}
 # Runs kserve_sklearn_test.py which independently deploys an sklearn
 # InferenceService, predicts via host-based routing, asserts the
 # output, and deletes the InferenceService.
+pip install -q pytest
 python -m pytest "${SCRIPT_DIRECTORY}/kserve_sklearn_test.py" -vs --log-level info
 
 # ============================================================

From f5abadd8e3a7419d396808a3f8c8f3cfa219542b Mon Sep 17 00:00:00 2001
From: Siddhant Jain <siddhantjainofficial26@gmail.com>
Date: Mon, 23 Feb 2026 23:52:51 +0530
Subject: [PATCH 6/8] fix: use permissive allow-all AuthorizationPolicy (rules:
 [{}]) for predictor pod

Signed-off-by: Siddhant Jain <siddhantjainofficial26@gmail.com>
---
 tests/kserve_sklearn_test.py | 32 +++++++++++++-------------------
 1 file changed, 13 insertions(+), 19 deletions(-)

diff --git a/tests/kserve_sklearn_test.py b/tests/kserve_sklearn_test.py
index 14bf452105..64f374045c 100644
--- a/tests/kserve_sklearn_test.py
+++ b/tests/kserve_sklearn_test.py
@@ -23,10 +23,9 @@
 Because of the mesh-wide global-deny-all AuthorizationPolicy
 (common/istio/istio-install/base/deny_all_authorizationpolicy.yaml),
 the predictor pod's sidecar blocks all traffic by default.
-We create an ALLOW AuthorizationPolicy that permits traffic
-to the predictor pod using requestPrincipals: ["*"]. Security
-is maintained because the ingress gateway validates the JWT
-via RequestAuthentication before forwarding.
+We create a permissive ALLOW AuthorizationPolicy (rules: [{}]) on the
+predictor pod. Security is enforced at the ingress gateway, which
+validates the JWT via RequestAuthentication before forwarding.
 """
 
 import os
@@ -160,10 +159,12 @@ def create_predictor_authorization_policy(namespace):
     This is needed because the global-deny-all AuthorizationPolicy in
     istio-system blocks all mesh traffic by default.
 
-    We allow any request that carries a valid JWT principal
-    (requestPrincipals: ["*"]). Security is maintained because
-    the ingress gateway validates the JWT via RequestAuthentication
-    before forwarding.
+    WARNING: This uses a permissive allow-all rule (rules: [{}]) because
+    the predictor pod's Envoy sidecar has no RequestAuthentication
+    configured, so requestPrincipals is always empty and a
+    principal-based rule would never match. Security is still enforced
+    at the ingress gateway, which validates the JWT via
+    RequestAuthentication before forwarding traffic.
     """
     api = client.CustomObjectsApi()
     ap_body = {
@@ -175,17 +176,10 @@ def create_predictor_authorization_policy(namespace):
         },
         "spec": {
             "action": "ALLOW",
-            "rules": [
-                {
-                    "from": [
-                        {
-                            "source": {
-                                "requestPrincipals": ["*"],
-                            }
-                        }
-                    ]
-                }
-            ],
+            # WARNING: allow-all rule — the predictor sidecar has no
+            # RequestAuthentication, so requestPrincipals: ["*"] cannot
+            # work here. Security is enforced at the ingress gateway.
+            "rules": [{}],
             "selector": {
                 "matchLabels": {
                     "serving.knative.dev/service": f"{SERVICE_NAME}-predictor",

From 4b238f2fe55f976d34a77bc6965f301c35a9a610 Mon Sep 17 00:00:00 2001
From: Siddhant Jain <siddhantjainofficial26@gmail.com>
Date: Tue, 24 Feb 2026 00:16:42 +0530
Subject: [PATCH 7/8] fix: use allow-all AuthorizationPolicy in bash test (same
 sidecar issue)

Signed-off-by: Siddhant Jain <siddhantjainofficial26@gmail.com>
---
 tests/kserve_test.sh | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/tests/kserve_test.sh b/tests/kserve_test.sh
index 51b27c4422..eb8727ed65 100755
--- a/tests/kserve_test.sh
+++ b/tests/kserve_test.sh
@@ -42,9 +42,9 @@ EOF
 
 kubectl wait --for=condition=Ready inferenceservice/isvc-sklearn -n ${NAMESPACE} --timeout=300s
 
-# Allow traffic to the predictor pod from any authenticated principal.
-# The ingress gateway and Cluster local gateway validate the JWT
-# via RequestAuthentication before forwarding.
+# WARNING: allow-all rule — the predictor sidecar has no RequestAuthentication,
+# so requestPrincipals: ["*"] cannot work here. Security is enforced at the
+# ingress gateway, which validates the JWT before forwarding traffic.
 cat <<EOF | kubectl apply -f -
 apiVersion: security.istio.io/v1beta1
 kind: AuthorizationPolicy
@@ -54,9 +54,7 @@ metadata:
 spec:
   action: ALLOW
   rules:
-  - from:
-    - source:
-        requestPrincipals: ["*"]
+  - {}
   selector:
     matchLabels:
       serving.knative.dev/service: isvc-sklearn-predictor

From bbf86a491c8d93d22c1060614ade423754d33bcf Mon Sep 17 00:00:00 2001
From: Siddhant Jain <siddhantjainofficial26@gmail.com>
Date: Wed, 25 Feb 2026 01:08:28 +0530
Subject: [PATCH 8/8] docs: update KServe Authentication section in
 oauth2-proxy README

Signed-off-by: Siddhant Jain <siddhantjainofficial26@gmail.com>
---
 common/oauth2-proxy/README.md | 140 ++++++++++++++++++++++++++++++++++
 1 file changed, 140 insertions(+)

diff --git a/common/oauth2-proxy/README.md b/common/oauth2-proxy/README.md
index 8c16b2b168..bc5940b5b2 100644
--- a/common/oauth2-proxy/README.md
+++ b/common/oauth2-proxy/README.md
@@ -275,6 +275,141 @@ for listing notebooks:
 
 ## KServe Authentication
 
+KServe inference endpoints are secured through a layered approach using
+Istio `RequestAuthentication` and `AuthorizationPolicy` resources.
+The examples below focus on machine-to-machine (M2M) access using service
+account tokens. Browser-based user access follows the general Kubeflow
+`oauth2-proxy` flow described above.
+
+### Traffic Flow and Security Layers
+
+Inference requests to KServe models pass through two security checkpoints:
+
+```
+client ──► istio-ingressgateway (istio-system) ──► predictor pod sidecar ──► predictor container
+              │                                         │
+              ▼                                         ▼
+         RequestAuthentication                   AuthorizationPolicy
+         (JWT validation)                        (access control)
+```
+
+1. **Ingress gateway** (`istio-system`): A `RequestAuthentication` resource
+   validates the JWT in the `Authorization: Bearer <token>` header. Requests
+   with an invalid token are rejected with `401`. Requests without any token
+   pass through (handled by the `AuthorizationPolicy` at the next layer).
+2. **Predictor pod sidecar**: An `AuthorizationPolicy` controls which
+   requests reach the model container.
+
+### Configuring AuthorizationPolicy for Predictor Pods
+
+Kubeflow ships a `global-deny-all` `AuthorizationPolicy` in `istio-system` that
+blocks all mesh traffic by default. To allow inference traffic to reach a
+predictor pod, you must create an `AuthorizationPolicy` in the model's
+namespace.
+
+The **intended** configuration uses `requestPrincipals: ["*"]`, which matches
+any request carrying a validated JWT principal:
+
+```yaml
+apiVersion: security.istio.io/v1beta1
+kind: AuthorizationPolicy
+metadata:
+  name: allow-isvc-sklearn
+  namespace: kubeflow-user-example-com
+spec:
+  action: ALLOW
+  rules:
+  - from:
+    - source:
+        requestPrincipals: ["*"]
+  selector:
+    matchLabels:
+      serving.knative.dev/service: isvc-sklearn-predictor
+```
+
+`requestPrincipals: ["*"]` relies on the JWT principal being propagated from
+the ingress gateway to the predictor pod's Envoy sidecar. This propagation
+depends on the Istio mTLS and `PeerAuthentication` configuration of the
+cluster. In environments where the principal is not propagated (e.g., some
+KinD-based CI setups), `requestPrincipals` will always be empty at the
+sidecar, and the rule will never match.
+
+In such environments, the CI tests use a permissive fallback:
+
+```yaml
+spec:
+  action: ALLOW
+  rules:
+  - {}   # allow-all: security is enforced at the ingress gateway
+  selector:
+    matchLabels:
+      serving.knative.dev/service: isvc-sklearn-predictor
+```
+
+> **Important:** `rules: [{}]` allows **all** traffic to the predictor pod,
+> including unauthenticated requests that bypass the ingress gateway.
+> This is acceptable in CI because the ingress gateway's
+> `RequestAuthentication` is the primary security boundary — it validates
+> the JWT **before** forwarding traffic to the predictor. However, in
+> production clusters with proper mTLS configuration, prefer
+> `requestPrincipals: ["*"]` for defense in depth.
+
+### Path-Based and Host-Based Routing
+
+KServe supports two routing modes for inference requests, both secured by the
+same authentication flow:
+
+| Mode | URL pattern | Configuration |
+|------|-------------|---------------|
+| Path-based | `http://<gateway>/serving/<namespace>/<name>/v1/models/<name>:predict` | `pathTemplate` in `inferenceservice-config` ConfigMap |
+| Host-based | `http://<gateway>/v1/models/<name>:predict` with `Host: <name>.<namespace>.example.com` | `domainTemplate` in `inferenceservice-config` ConfigMap |
+
+Path-based routing is configured via a kustomize patch on the
+`inferenceservice-config` ConfigMap
+(`applications/kserve/kserve/kustomization.yaml`):
+
+```json
+{
+  "pathTemplate": "/serving/{{ .Namespace }}/{{ .Name }}"
+}
+```
+
+KServe auto-generates a `VirtualService` on the `kubeflow-gateway` for each
+`InferenceService`, enabling both routing modes simultaneously.
+
+### KServe Models Web Application Authentication
+
+The KServe Models Web Application (`kserve-models-web-app`) uses the same
+XSRF + Bearer token authentication pattern as other Kubeflow web applications.
+API calls require:
+
+1. A valid XSRF token (obtained via cookie on the initial page load)
+2. A valid `Authorization: Bearer <token>` header
+3. The token's identity must have RBAC permissions in the target namespace
+
+Unauthorized service accounts (e.g., `default` SA from a different namespace)
+receive `401`/`403` when attempting to list `InferenceService` resources in a
+namespace they do not have access to.
+
+### CI Test Coverage
+
+The KServe test suite (`tests/kserve_test.sh`) validates the following
+authentication and security scenarios end-to-end in a KinD cluster:
+
+| # | Test | What is verified |
+|---|------|-----------------|
+| 1 | Model prediction via KServe Python SDK | InferenceService deployment, prediction, and cleanup using the `kserve` SDK with M2M token |
+| 2a | Path-based routing without token | Unauthenticated request returns `403`/`302` |
+| 2b | Host-based routing without token | Unauthenticated request returns `403`/`302` |
+| 2c | Path-based routing with valid token | Authenticated request returns `200` |
+| 2d | Host-based routing with valid token | Authenticated request returns `200` |
+| 3 | KServe Models Web App API | XSRF + auth flow, unauthorized SA gets `401`/`403` |
+| 4 | Knative Service auth via cluster-local-gateway | Unauthenticated and invalid-token requests are rejected |
+| 5 | Cluster-local-gateway authentication | Direct access without token returns `403` |
+| 6 | Namespace isolation | Cross-namespace attacker token is rejected |
+
+### Architecture Analysis (Future Improvements)
+
 The analysis of KServe auth capabilities suggests that while it's possible to limit access to only authenticated agents,
 there might be some improvements required to enable access only to authorized agents.
 
@@ -297,6 +432,11 @@ This is based on the following:
    > create an [Istio AuthorizationPolicy](https://istio.io/latest/docs/reference/config/security/authorization-policy/) to grant access to the pods or disable it
 
    Most probably some work is needed to enable authorized access to kserve models.
+3. Potential improvement: adding `source.namespaces` to the `AuthorizationPolicy`
+   to restrict access to traffic originating from specific namespaces (e.g.,
+   `istio-system`). This would provide an additional layer of security but
+   requires proper mTLS/PeerAuthentication configuration to propagate SPIFFE
+   identities correctly.
 
 ## Links