From 1ce0284af1564808e4d08781c996802442f0a527 Mon Sep 17 00:00:00 2001 From: Siddhant Jain Date: Sun, 22 Feb 2026 15:51:27 +0530 Subject: [PATCH 1/8] refactor(tests): consolidate KServe test infrastructure into two-file pattern Merge scattered KServe test scripts into the canonical *_install.sh + *_test.sh pattern matching all other components. Inline utils.py and iris_input.json into kserve_sklearn_test.py, removing the tests/kserve/ subfolder entirely. Key changes: - Consolidate 3 test scripts into kserve_test.sh (6 tests) - Add pathTemplate ConfigMap patch in kustomization.yaml - Use source.namespaces mTLS AuthorizationPolicy - Both path-based and host-based routing tested via curl - Python SDK prediction test runs independently via pytest - Remove accidental build artifacts (kubeflow-all.yaml) Signed-off-by: Siddhant Jain --- .../full_kubeflow_integration_test.yaml | 3 - .../kserve_models_web_application_test.yaml | 10 +- .github/workflows/kserve_test.yaml | 2 +- applications/kserve/kserve/kustomization.yaml | 30 +- tests/kserve/data/iris_input.json | 6 - tests/kserve/kserve-external-access.yaml | 38 --- tests/kserve/test_sklearn.py | 60 ---- tests/kserve/utils.py | 125 --------- tests/kserve_complete_authentication_test.sh | 127 --------- tests/kserve_models_web_application_test.sh | 58 ---- tests/kserve_setup_external_access.sh | 58 ---- tests/kserve_sklearn_test.py | 259 ++++++++++++++++++ tests/kserve_test.sh | 184 ++++++++++--- tests/kserve_test.yaml | 16 -- tests/{kserve => }/requirements.txt | 2 +- 15 files changed, 444 insertions(+), 534 deletions(-) delete mode 100644 tests/kserve/data/iris_input.json delete mode 100644 tests/kserve/kserve-external-access.yaml delete mode 100644 tests/kserve/test_sklearn.py delete mode 100644 tests/kserve/utils.py delete mode 100755 tests/kserve_complete_authentication_test.sh delete mode 100755 tests/kserve_models_web_application_test.sh delete mode 100755 tests/kserve_setup_external_access.sh create mode 100644 tests/kserve_sklearn_test.py delete mode 100644 tests/kserve_test.yaml rename tests/{kserve => }/requirements.txt (77%) diff --git a/.github/workflows/full_kubeflow_integration_test.yaml b/.github/workflows/full_kubeflow_integration_test.yaml index a2351b7c37..f3a6a82c23 100644 --- a/.github/workflows/full_kubeflow_integration_test.yaml +++ b/.github/workflows/full_kubeflow_integration_test.yaml @@ -200,9 +200,6 @@ jobs: - name: Run KServe Test run: ./tests/kserve_test.sh ${KF_PROFILE} - - name: Test KServe Models Web Application API - run: ./tests/kserve_models_web_application_test.sh "${KF_PROFILE}" - - name: Run Spark Test run: chmod u+x tests/*.sh && ./tests/spark_test.sh "${KF_PROFILE}" diff --git a/.github/workflows/kserve_models_web_application_test.yaml b/.github/workflows/kserve_models_web_application_test.yaml index 7a1d426c8f..1be3e3368a 100644 --- a/.github/workflows/kserve_models_web_application_test.yaml +++ b/.github/workflows/kserve_models_web_application_test.yaml @@ -6,6 +6,7 @@ on: - .github/workflows/kserve_models_web_application_test.yaml - applications/kserve/** - tests/kserve* + - tests/istio* - common/istio*/** - common/knative/** @@ -59,6 +60,11 @@ jobs: - name: Create KF Profile run: ./tests/kubeflow_profile_install.sh + - name: Setup python 3.12 + uses: actions/setup-python@v4 + with: + python-version: 3.12 + - name: Wait for All Pods to be Ready run: | kubectl wait --for=condition=Ready pods --all --all-namespaces --timeout=300s --field-selector=status.phase!=Succeeded @@ -69,5 +75,5 @@ jobs: - name: Port-forward the istio-ingress gateway run: ./tests/port_forward_gateway.sh - - name: Test KServe Models Web Application API - run: ./tests/kserve_models_web_application_test.sh "${KF_PROFILE}" + - name: Run KServe tests + run: ./tests/kserve_test.sh "${KF_PROFILE}" diff --git a/.github/workflows/kserve_test.yaml b/.github/workflows/kserve_test.yaml index b615114a27..7f53a9e26e 100644 --- a/.github/workflows/kserve_test.yaml +++ b/.github/workflows/kserve_test.yaml @@ -6,7 +6,7 @@ on: - .github/workflows/kserve_test.yaml - applications/kserve/** - apps/kserve/** - - tests/kserve/** + - tests/kserve_* - tests/kserve_test.sh - tests/kserve_install.sh - common/istio*/** diff --git a/applications/kserve/kserve/kustomization.yaml b/applications/kserve/kserve/kustomization.yaml index b926a3f89f..0215392727 100644 --- a/applications/kserve/kserve/kustomization.yaml +++ b/applications/kserve/kserve/kustomization.yaml @@ -59,10 +59,38 @@ patches: kind: LLMInferenceServiceConfig # Delete the ValidatingWebhookConfiguration for LLM resources -# Webhook server (llmisvc-webhook-server-service) is not running → EOF errors +# Webhook server (llmisvc-webhook-server-service) is not running — EOF errors - patch: | apiVersion: admissionregistration.k8s.io/v1 kind: ValidatingWebhookConfiguration metadata: name: llminferenceserviceconfig.serving.kserve.io $patch: delete + +# Enable path-based routing via pathTemplate on the inferenceservice-config ConfigMap. +# This allows KServe to auto-generate VirtualServices for path-based URLs +# (/serving///...), eliminating manual VirtualService creation. +# Ref: https://github.com/kserve/kserve/issues/2257 +# Ref: https://github.com/kserve/kserve/blob/master/config/configmap/inferenceservice.yaml#L389 +- patch: | + apiVersion: v1 + kind: ConfigMap + metadata: + name: inferenceservice-config + namespace: kubeflow + data: + ingress: |- + { + "enableGatewayApi": false, + "kserveIngressGateway": "kserve/kserve-ingress-gateway", + "ingressGateway": "kubeflow/kubeflow-gateway", + "localGateway": "knative-serving/knative-local-gateway", + "localGatewayService": "knative-local-gateway.istio-system.svc.cluster.local", + "ingressDomain": "example.com", + "ingressClassName": "istio", + "domainTemplate": "{{ .Name }}-{{ .Namespace }}.{{ .IngressDomain }}", + "urlScheme": "http", + "disableIstioVirtualHost": false, + "disableIngressCreation": false, + "pathTemplate": "/serving/{{ .Namespace }}/{{ .Name }}" + } diff --git a/tests/kserve/data/iris_input.json b/tests/kserve/data/iris_input.json deleted file mode 100644 index 77839728a0..0000000000 --- a/tests/kserve/data/iris_input.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "instances": [ - [6.8, 2.8, 4.8, 1.4], - [6.0, 3.4, 4.5, 1.6] - ] -} diff --git a/tests/kserve/kserve-external-access.yaml b/tests/kserve/kserve-external-access.yaml deleted file mode 100644 index 975c4baa07..0000000000 --- a/tests/kserve/kserve-external-access.yaml +++ /dev/null @@ -1,38 +0,0 @@ -# KServe External Access Configuration -# This enables secure external access to KServe InferenceServices - -apiVersion: networking.istio.io/v1beta1 -kind: VirtualService -metadata: - name: kserve-external-access - namespace: REPLACE_WITH_NAMESPACE # e.g., kubeflow-user-example-com -spec: - gateways: - - kubeflow/kubeflow-gateway # External gateway - hosts: - - '*' - http: - # Path-based routing for KServe models - - match: - - uri: - prefix: /kserve/REPLACE_WITH_NAMESPACE/ - rewrite: - uri: / - route: - - destination: - # Route through cluster-local-gateway (secured with JWT) - host: cluster-local-gateway.istio-system.svc.cluster.local - headers: - request: - set: - # Set the correct host header for the target service - Host: REPLACE_WITH_SERVICE_NAME.REPLACE_WITH_NAMESPACE.svc.cluster.local - weight: 100 - timeout: 300s - # Optional: Add CORS headers for browser access - headers: - response: - add: - Access-Control-Allow-Origin: "*" - Access-Control-Allow-Methods: "GET, POST, OPTIONS" - Access-Control-Allow-Headers: "Authorization, Content-Type" diff --git a/tests/kserve/test_sklearn.py b/tests/kserve/test_sklearn.py deleted file mode 100644 index e72733d0ee..0000000000 --- a/tests/kserve/test_sklearn.py +++ /dev/null @@ -1,60 +0,0 @@ -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os - -from kubernetes import client -from kubernetes.client import V1ResourceRequirements - -from kserve import ( - constants, - KServeClient, - V1beta1InferenceService, - V1beta1InferenceServiceSpec, - V1beta1PredictorSpec, - V1beta1SKLearnSpec, -) -from utils import KSERVE_TEST_NAMESPACE -from utils import predict - - -def test_sklearn_kserve(): - service_name = "isvc-sklearn" - predictor = V1beta1PredictorSpec( - min_replicas=1, - sklearn=V1beta1SKLearnSpec( - storage_uri="gs://kfserving-examples/models/sklearn/1.0/model", - resources=V1ResourceRequirements( - requests={"cpu": "50m", "memory": "128Mi"}, - limits={"cpu": "100m", "memory": "256Mi"}, - ), - ), - ) - - isvc = V1beta1InferenceService( - api_version=constants.KSERVE_V1BETA1, - kind="InferenceService", - metadata=client.V1ObjectMeta( - name=service_name, namespace=KSERVE_TEST_NAMESPACE - ), - spec=V1beta1InferenceServiceSpec(predictor=predictor), - ) - - kserve_client = KServeClient( - config_file=os.environ.get("KUBECONFIG", "~/.kube/config") - ) - kserve_client.create(isvc) - kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE) - res = predict(service_name, "./data/iris_input.json") - assert res["predictions"] == [1, 1] - kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE) diff --git a/tests/kserve/utils.py b/tests/kserve/utils.py deleted file mode 100644 index 36091a628a..0000000000 --- a/tests/kserve/utils.py +++ /dev/null @@ -1,125 +0,0 @@ -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import json -import logging -import os -import time -from urllib.parse import urlparse - -import requests -from kubernetes import client - -from kserve import KServeClient -from kserve import constants - -logging.basicConfig(level=logging.INFO) - -KSERVE_NAMESPACE = "kserve" -KSERVE_TEST_NAMESPACE = "kubeflow-user-example-com" -MODEL_CLASS_NAME = "modelClass" - - -class M2mTokenNotAvailable(Exception): - pass - - -def get_cluster_ip(name="istio-ingressgateway", namespace="istio-system"): - api_instance = client.CoreV1Api(client.ApiClient()) - service = api_instance.read_namespaced_service(name, namespace) - if service.status.load_balancer.ingress is None: - cluster_ip = service.spec.cluster_ip - else: - if service.status.load_balancer.ingress[0].hostname: - cluster_ip = service.status.load_balancer.ingress[0].hostname - else: - cluster_ip = service.status.load_balancer.ingress[0].ip - return os.environ.get("KSERVE_INGRESS_HOST_PORT", cluster_ip) - - -def get_m2m_auth_token(env_name="KSERVE_M2M_TOKEN"): - try: - return os.environ[env_name] - except KeyError: - raise M2mTokenNotAvailable(env_name) - - -def predict( - service_name, - input_json, - protocol_version="v1", - version=constants.KSERVE_V1BETA1_VERSION, - model_name=None, -): - with open(input_json) as json_file: - data = json.load(json_file) - - return predict_str( - service_name=service_name, - input_json=json.dumps(data), - protocol_version=protocol_version, - version=version, - model_name=model_name, - ) - - -def predict_str( - service_name, - input_json, - protocol_version="v1", - version=constants.KSERVE_V1BETA1_VERSION, - model_name=None, -): - kfs_client = KServeClient( - config_file=os.environ.get("KUBECONFIG", "~/.kube/config") - ) - isvc = kfs_client.get( - service_name, - namespace=KSERVE_TEST_NAMESPACE, - version=version, - ) - # temporary sleep until this is fixed https://github.com/kserve/kserve/issues/604 - time.sleep(10) - cluster_ip = get_cluster_ip() - host = f"{service_name}.{KSERVE_TEST_NAMESPACE}.example.com" - headers = { - "Host": host, - "Content-Type": "application/json", - } - - try: - token = get_m2m_auth_token() - headers.update({"Authorization": f"Bearer {token}"}) - logging.info("M2M Token Found.") - except M2mTokenNotAvailable: - logging.warning("M2M Token Not found, client authentication disabled.") - - if model_name is None: - model_name = service_name - - url = f"http://{cluster_ip}/v1/models/{model_name}:predict" - if protocol_version == "v2": - url = f"http://{cluster_ip}/v2/models/{model_name}/infer" - - logging.info("Sending Header = %s", headers) - logging.info("Sending url = %s", url) - logging.info("Sending request data: %s", input_json) - response = requests.post(url, input_json, headers=headers) - logging.info( - "Got response code %s, content %s", response.status_code, response.content - ) - if response.status_code == 200: - preds = json.loads(response.content.decode("utf-8")) - return preds - else: - response.raise_for_status() diff --git a/tests/kserve_complete_authentication_test.sh b/tests/kserve_complete_authentication_test.sh deleted file mode 100755 index 6e28d940cd..0000000000 --- a/tests/kserve_complete_authentication_test.sh +++ /dev/null @@ -1,127 +0,0 @@ -#!/bin/bash -set -euo pipefail - -PRIMARY_NAMESPACE="kubeflow-user-example-com" -ATTACKER_NAMESPACE="kubeflow-user-attacker" -KSERVE_INGRESS_HOST_PORT="${KSERVE_INGRESS_HOST_PORT:-localhost:8080}" - -function setup_test_environment() { - kubectl create namespace $PRIMARY_NAMESPACE --dry-run=client -o yaml | kubectl apply -f - - kubectl create namespace $ATTACKER_NAMESPACE --dry-run=client -o yaml | kubectl apply -f - - kubectl create serviceaccount default-editor -n $PRIMARY_NAMESPACE --dry-run=client -o yaml | kubectl apply -f - - kubectl create serviceaccount attacker-sa -n $ATTACKER_NAMESPACE --dry-run=client -o yaml | kubectl apply -f - - - cat < /dev/null || true -} - -function test_internal_access() { - kubectl run test-client -n $PRIMARY_NAMESPACE --image=curlimages/curl --restart=Never -- sleep 3600 2>/dev/null || true - kubectl wait --for=condition=ready pod/test-client -n $PRIMARY_NAMESPACE --timeout=60s 2>/dev/null || return - - PRIMARY_TOKEN=$(kubectl -n $PRIMARY_NAMESPACE create token default-editor) - - kubectl exec -n $PRIMARY_NAMESPACE test-client -- \ - curl -s -o /dev/null -w "%{http_code}" \ - -H "Authorization: Bearer $PRIMARY_TOKEN" \ - -H "Content-Type: application/json" \ - "http://secure-sklearn-predictor.$PRIMARY_NAMESPACE.svc.cluster.local/v1/models/secure-sklearn:predict" \ - -d '{"instances": [[6.8, 2.8, 4.8, 1.4]]}' 2>/dev/null || true - - kubectl delete pod test-client -n $PRIMARY_NAMESPACE --ignore-not-found=true -} - -function cleanup() { - kubectl delete namespace $ATTACKER_NAMESPACE --ignore-not-found=true - kubectl delete inferenceservice secure-sklearn -n $PRIMARY_NAMESPACE --ignore-not-found=true - kubectl delete pod test-client -n $PRIMARY_NAMESPACE --ignore-not-found=true -} - -function main() { - setup_test_environment - test_gateway_jwt_validation - test_namespace_isolation - test_external_access - test_internal_access - cleanup -} - -main \ No newline at end of file diff --git a/tests/kserve_models_web_application_test.sh b/tests/kserve_models_web_application_test.sh deleted file mode 100755 index 42df46fd2d..0000000000 --- a/tests/kserve_models_web_application_test.sh +++ /dev/null @@ -1,58 +0,0 @@ -#!/bin/bash -set -euxo pipefail - - -KF_PROFILE=${1:-kubeflow-user-example-com} -TOKEN="$(kubectl -n $KF_PROFILE create token default-editor)" -BASE_URL="localhost:8080/kserve-endpoints" - -cat <&1 | grep -i "set-cookie" -XSRFTOKEN=$(grep XSRF-TOKEN /tmp/kserve_xcrf.txt | awk '{print $NF}') - -RESPONSE=$(curl -s --fail-with-body \ - "${BASE_URL}/api/namespaces/${KF_PROFILE}/inferenceservices" \ - -H "Authorization: Bearer ${TOKEN}" \ - -H "X-XSRF-TOKEN: ${XSRFTOKEN}" \ - -H "Cookie: XSRF-TOKEN=${XSRFTOKEN}") - -echo "$RESPONSE" | grep -q "sklearn-iris" || exit 1 -kubectl get inferenceservice sklearn-iris -n ${KF_PROFILE} || exit 1 -READY=$(kubectl get isvc sklearn-iris -n ${KF_PROFILE} -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}') -[[ "$READY" == "True" ]] || { - echo "FAILURE: InferenceService Ready status is: $READY" - exit 1 -} - -kubectl delete inferenceservice sklearn-iris -n ${KF_PROFILE} || exit 1 - -# Test unauthorized access -TOKEN="$(kubectl -n default create token default)" -BASE_URL="localhost:8080/kserve-endpoints" -HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" "${BASE_URL}/api/namespaces/${KF_PROFILE}/inferenceservices" -H "Authorization: Bearer ${TOKEN}") -[[ "$HTTP_CODE" == "403" || "$HTTP_CODE" == "401" ]] || { echo "FAILURE: Expected 401/403, got $HTTP_CODE"; exit 1; } -echo "Test succeeded. Token from unauthorized ServiceAccount cannot list InferenceServices in $KF_PROFILE namespace." diff --git a/tests/kserve_setup_external_access.sh b/tests/kserve_setup_external_access.sh deleted file mode 100755 index 61d750b77d..0000000000 --- a/tests/kserve_setup_external_access.sh +++ /dev/null @@ -1,58 +0,0 @@ -#!/bin/bash -# Script to setup external access for KServe testing - -set -euo pipefail - -NAMESPACE=${1:-kubeflow-user-example-com} -SERVICE_NAME=${2:-test-sklearn} - -echo "Setting up external access for KServe..." -echo "Namespace: $NAMESPACE" -echo "Service: $SERVICE_NAME" -echo - -# Create VirtualService for external access -cat < /dev/null; then - pip install -r ${TEST_DIRECTORY}/requirements.txt + pip install -r ${SCRIPT_DIRECTORY}/requirements.txt fi export KSERVE_INGRESS_HOST_PORT=${KSERVE_INGRESS_HOST_PORT:-localhost:8080} export KSERVE_M2M_TOKEN="$(kubectl -n ${NAMESPACE} create token default-editor)" export KSERVE_TEST_NAMESPACE=${NAMESPACE} -# Test 1: Model Inference via KServe SDK (pytest creates isvc-sklearn internally) -if cd ${TEST_DIRECTORY}; then - pytest . -vs --log-level info || true -fi +# ============================================================ +# Test 1: Model Prediction via KServe Python SDK +# ============================================================ +# Runs kserve_sklearn_test.py which independently deploys an sklearn +# InferenceService, predicts via host-based routing, asserts the +# output, and deletes the InferenceService. +pytest "${SCRIPT_DIRECTORY}/kserve_sklearn_test.py" -vs --log-level info -# Test 2: Path-based Routing & Ingress Gateway Security (VirtualService + AuthorizationPolicy) +# ============================================================ +# Test 2: Ingress Gateway — Path-based & Host-based Routing (curl) +# ============================================================ +# Re-deploy the InferenceService for bash/curl tests (pytest deleted it). cat <//) +# configured in the inferenceservice-config ConfigMap patch +# (applications/kserve/kserve/kustomization.yaml). KServe auto-generates +# a VirtualService on kubeflow-gateway where M2M RequestAuthentication +# validates the JWT. + # Request without token should be rejected RESPONSE_NO_TOKEN=$(curl -s -o /dev/null -w "%{http_code}" \ -H "Content-Type: application/json" \ - "http://${KSERVE_INGRESS_HOST_PORT}/kserve/${NAMESPACE}/isvc-sklearn/v1/models/isvc-sklearn:predict" \ + "http://${KSERVE_INGRESS_HOST_PORT}/serving/${NAMESPACE}/isvc-sklearn/v1/models/isvc-sklearn:predict" \ -d '{"instances": [[6.8, 2.8, 4.8, 1.4]]}') if [ "$RESPONSE_NO_TOKEN" != "403" ] && [ "$RESPONSE_NO_TOKEN" != "302" ]; then + echo "FAIL: Path-based: Expected 403/302 without token, got $RESPONSE_NO_TOKEN" exit 1 fi @@ -81,16 +90,104 @@ fi RESPONSE_WITH_TOKEN=$(curl -s -o /dev/null -w "%{http_code}" \ -H "Authorization: Bearer ${KSERVE_M2M_TOKEN}" \ -H "Content-Type: application/json" \ - "http://${KSERVE_INGRESS_HOST_PORT}/kserve/${NAMESPACE}/isvc-sklearn/v1/models/isvc-sklearn:predict" \ + "http://${KSERVE_INGRESS_HOST_PORT}/serving/${NAMESPACE}/isvc-sklearn/v1/models/isvc-sklearn:predict" \ -d '{"instances": [[6.8, 2.8, 4.8, 1.4], [6.0, 3.4, 4.5, 1.6]]}') if [ "$RESPONSE_WITH_TOKEN" != "200" ] && [ "$RESPONSE_WITH_TOKEN" != "404" ] && [ "$RESPONSE_WITH_TOKEN" != "503" ]; then + echo "FAIL: Path-based: Expected 200/404/503 with token, got $RESPONSE_WITH_TOKEN" + exit 1 +fi + +# --- Test 2b: HOST-BASED routing (security verification) --- +HOST_HEADER="Host: isvc-sklearn.${NAMESPACE}.example.com" + +# Request without token should be rejected +RESPONSE_HOST_NO_TOKEN=$(curl -s -o /dev/null -w "%{http_code}" \ + -H "${HOST_HEADER}" \ + -H "Content-Type: application/json" \ + "http://${KSERVE_INGRESS_HOST_PORT}/v1/models/isvc-sklearn:predict" \ + -d '{"instances": [[6.8, 2.8, 4.8, 1.4]]}') + +if [ "$RESPONSE_HOST_NO_TOKEN" != "403" ] && [ "$RESPONSE_HOST_NO_TOKEN" != "302" ]; then + echo "FAIL: Host-based: Expected 403/302 without token, got $RESPONSE_HOST_NO_TOKEN" + exit 1 +fi + +# Request with valid token should succeed (the AuthorizationPolicy +# restricts traffic by source namespace via mTLS identity) +RESPONSE_HOST_WITH_TOKEN=$(curl -s -o /dev/null -w "%{http_code}" \ + -H "Authorization: Bearer ${KSERVE_M2M_TOKEN}" \ + -H "${HOST_HEADER}" \ + -H "Content-Type: application/json" \ + "http://${KSERVE_INGRESS_HOST_PORT}/v1/models/isvc-sklearn:predict" \ + -d '{"instances": [[6.8, 2.8, 4.8, 1.4], [6.0, 3.4, 4.5, 1.6]]}') + +if [ "$RESPONSE_HOST_WITH_TOKEN" != "200" ] && [ "$RESPONSE_HOST_WITH_TOKEN" != "404" ] && [ "$RESPONSE_HOST_WITH_TOKEN" != "503" ]; then + echo "FAIL: Host-based: Expected 200/404/503 with token, got $RESPONSE_HOST_WITH_TOKEN" exit 1 fi +# ============================================================ +# Test 3: KServe Models Web Application API +# ============================================================ kubectl wait --for=condition=Available --timeout=300s -n kubeflow deployment/kserve-models-web-app -# Knative Service authentication via cluster-local-gateway +TOKEN="$(kubectl -n ${NAMESPACE} create token default-editor)" +BASE_URL="localhost:8080/kserve-endpoints" + +cat <&1 | grep -i "set-cookie" +XSRFTOKEN=$(grep XSRF-TOKEN /tmp/kserve_xcrf.txt | awk '{print $NF}') + +RESPONSE=$(curl -s --fail-with-body \ + "${BASE_URL}/api/namespaces/${NAMESPACE}/inferenceservices" \ + -H "Authorization: Bearer ${TOKEN}" \ + -H "X-XSRF-TOKEN: ${XSRFTOKEN}" \ + -H "Cookie: XSRF-TOKEN=${XSRFTOKEN}") + +echo "$RESPONSE" | grep -q "sklearn-iris" || exit 1 +kubectl get inferenceservice sklearn-iris -n ${NAMESPACE} || exit 1 +READY=$(kubectl get isvc sklearn-iris -n ${NAMESPACE} -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}') +[[ "$READY" == "True" ]] || { + echo "FAILURE: InferenceService sklearn-iris Ready status is: $READY" + exit 1 +} + +kubectl delete inferenceservice sklearn-iris -n ${NAMESPACE} || exit 1 + +# Test unauthorized access to models web application +UNAUTH_TOKEN="$(kubectl -n default create token default)" +HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" "${BASE_URL}/api/namespaces/${NAMESPACE}/inferenceservices" -H "Authorization: Bearer ${UNAUTH_TOKEN}") +[[ "$HTTP_CODE" == "403" || "$HTTP_CODE" == "401" ]] || { echo "FAILURE: Expected 401/403, got $HTTP_CODE"; exit 1; } +echo "Models Web Application: Token from unauthorized ServiceAccount cannot list InferenceServices in $NAMESPACE namespace." + +# ============================================================ +# Test 4: Knative Service authentication via cluster-local-gateway +# ============================================================ cat </dev/null || true kubectl delete namespace ${ATTACKER_NAMESPACE} --ignore-not-found=true kubectl delete ksvc secure-model-predictor -n ${NAMESPACE} --ignore-not-found=true +kubectl delete inferenceservice isvc-sklearn -n ${NAMESPACE} --ignore-not-found=true diff --git a/tests/kserve_test.yaml b/tests/kserve_test.yaml deleted file mode 100644 index 3d12cc117d..0000000000 --- a/tests/kserve_test.yaml +++ /dev/null @@ -1,16 +0,0 @@ -apiVersion: "serving.kserve.io/v1beta1" -kind: "InferenceService" -metadata: - name: "sklearn-iris" - namespace: "kubeflow-user-example-com" -spec: - predictor: - sklearn: - resources: - limits: - cpu: "1" - memory: 2Gi - requests: - cpu: "0.1" - memory: 200M - storageUri: "gs://kfserving-examples/models/sklearn/1.0/model" diff --git a/tests/kserve/requirements.txt b/tests/requirements.txt similarity index 77% rename from tests/kserve/requirements.txt rename to tests/requirements.txt index fbc04cb5b5..511e394b2f 100644 --- a/tests/kserve/requirements.txt +++ b/tests/requirements.txt @@ -1,4 +1,4 @@ pytest>=7.0.0 -kserve>=0.15.0 +kserve>=0.16.0 kubernetes>=18.20.0 requests>=2.18.4 From 605d3aba247fe7811f92de57244bf9aa318c4635 Mon Sep 17 00:00:00 2001 From: Julius von Kohout <45896133+juliusvonkohout@users.noreply.github.com> Date: Mon, 23 Feb 2026 13:34:29 +0100 Subject: [PATCH 2/8] Apply suggestion from @juliusvonkohout Signed-off-by: Julius von Kohout <45896133+juliusvonkohout@users.noreply.github.com> --- .github/workflows/kserve_test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/kserve_test.yaml b/.github/workflows/kserve_test.yaml index 7f53a9e26e..f6d9ff4d10 100644 --- a/.github/workflows/kserve_test.yaml +++ b/.github/workflows/kserve_test.yaml @@ -6,7 +6,7 @@ on: - .github/workflows/kserve_test.yaml - applications/kserve/** - apps/kserve/** - - tests/kserve_* + - tests/kserve* - tests/kserve_test.sh - tests/kserve_install.sh - common/istio*/** From 9300633835e8769bed699f6a4c8e21a34cdfe322 Mon Sep 17 00:00:00 2001 From: Siddhant Jain Date: Mon, 23 Feb 2026 20:06:13 +0530 Subject: [PATCH 3/8] =?UTF-8?q?fix:=20address=20reviewer=20feedback=20?= =?UTF-8?q?=E2=80=94=20inline=20deps,=20revert=20AP=20to=20requestPrincipa?= =?UTF-8?q?ls?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Siddhant Jain --- tests/kserve_sklearn_test.py | 27 +++++++++++++++++++-------- tests/kserve_test.sh | 16 ++++++---------- tests/requirements.txt | 4 ---- 3 files changed, 25 insertions(+), 22 deletions(-) delete mode 100644 tests/requirements.txt diff --git a/tests/kserve_sklearn_test.py b/tests/kserve_sklearn_test.py index f6aeb34f1b..4d452982cb 100644 --- a/tests/kserve_sklearn_test.py +++ b/tests/kserve_sklearn_test.py @@ -24,14 +24,24 @@ (common/istio/istio-install/base/deny_all_authorizationpolicy.yaml), the predictor pod's sidecar blocks all traffic by default. We create an ALLOW AuthorizationPolicy that permits traffic -to the predictor pod. Security is maintained because the -ingress gateway and cluster-local-gateway both validate the -JWT via RequestAuthentication before forwarding. +to the predictor pod using requestPrincipals: ["*"]. Security +is maintained because the ingress gateway validates the JWT +via RequestAuthentication before forwarding. """ +import os +import sys + +# Install dependencies inline (replaces the deleted requirements.txt). +# This ensures pytest, kserve SDK, and other deps are available when +# the CI workflow calls this file via `pytest kserve_sklearn_test.py`. +os.system( + f"{sys.executable} -m pip install -q" + " pytest>=7.0.0 kserve>=0.16.0 kubernetes>=18.20.0 requests>=2.18.4" +) + import json import logging -import os import time import requests @@ -150,9 +160,10 @@ def create_predictor_authorization_policy(namespace): This is needed because the global-deny-all AuthorizationPolicy in istio-system blocks all mesh traffic by default. - We restrict by source namespace using mTLS identity. Security is - maintained because the ingress gateway and cluster-local-gateway - both validate the JWT before forwarding. + We allow any request that carries a valid JWT principal + (requestPrincipals: ["*"]). Security is maintained because + the ingress gateway validates the JWT via RequestAuthentication + before forwarding. """ api = client.CustomObjectsApi() ap_body = { @@ -169,7 +180,7 @@ def create_predictor_authorization_policy(namespace): "from": [ { "source": { - "namespaces": ["istio-system"], + "requestPrincipals": ["*"], } } ] diff --git a/tests/kserve_test.sh b/tests/kserve_test.sh index 1ac13c3ef5..14c4e3a648 100755 --- a/tests/kserve_test.sh +++ b/tests/kserve_test.sh @@ -4,10 +4,6 @@ set -euxo pipefail NAMESPACE=${1:-kubeflow-user-example-com} SCRIPT_DIRECTORY="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -if ! command -v pytest &> /dev/null; then - pip install -r ${SCRIPT_DIRECTORY}/requirements.txt -fi - export KSERVE_INGRESS_HOST_PORT=${KSERVE_INGRESS_HOST_PORT:-localhost:8080} export KSERVE_M2M_TOKEN="$(kubectl -n ${NAMESPACE} create token default-editor)" export KSERVE_TEST_NAMESPACE=${NAMESPACE} @@ -18,7 +14,7 @@ export KSERVE_TEST_NAMESPACE=${NAMESPACE} # Runs kserve_sklearn_test.py which independently deploys an sklearn # InferenceService, predicts via host-based routing, asserts the # output, and deletes the InferenceService. -pytest "${SCRIPT_DIRECTORY}/kserve_sklearn_test.py" -vs --log-level info +python -m pytest "${SCRIPT_DIRECTORY}/kserve_sklearn_test.py" -vs --log-level info # ============================================================ # Test 2: Ingress Gateway — Path-based & Host-based Routing (curl) @@ -45,9 +41,9 @@ EOF kubectl wait --for=condition=Ready inferenceservice/isvc-sklearn -n ${NAMESPACE} --timeout=300s -# Allow traffic to the predictor pod. The ingress gateway and Cluster local gateway -# have already validated the JWT before forwarding. -# We restrict by source namespace using mTLS identity. +# Allow traffic to the predictor pod from any authenticated principal. +# The ingress gateway and Cluster local gateway validate the JWT +# via RequestAuthentication before forwarding. cat <=7.0.0 -kserve>=0.16.0 -kubernetes>=18.20.0 -requests>=2.18.4 From db2e076798a8aa0c1154435255c8f7333cc60513 Mon Sep 17 00:00:00 2001 From: Julius von Kohout <45896133+juliusvonkohout@users.noreply.github.com> Date: Mon, 23 Feb 2026 18:00:57 +0100 Subject: [PATCH 4/8] Apply suggestions from code review Signed-off-by: Julius von Kohout <45896133+juliusvonkohout@users.noreply.github.com> --- tests/kserve_sklearn_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/kserve_sklearn_test.py b/tests/kserve_sklearn_test.py index 4d452982cb..14bf452105 100644 --- a/tests/kserve_sklearn_test.py +++ b/tests/kserve_sklearn_test.py @@ -33,7 +33,7 @@ import sys # Install dependencies inline (replaces the deleted requirements.txt). -# This ensures pytest, kserve SDK, and other deps are available when +# This ensures pytest, kserve SDK, and other dependencies are available when # the CI workflow calls this file via `pytest kserve_sklearn_test.py`. os.system( f"{sys.executable} -m pip install -q" From 24a84fa30a4a5da157785415944716d4e76ae6b5 Mon Sep 17 00:00:00 2001 From: Siddhant Jain Date: Mon, 23 Feb 2026 22:47:21 +0530 Subject: [PATCH 5/8] fix: bootstrap pytest before python -m pytest in kserve_test.sh Signed-off-by: Siddhant Jain --- tests/kserve_test.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/kserve_test.sh b/tests/kserve_test.sh index 14c4e3a648..51b27c4422 100755 --- a/tests/kserve_test.sh +++ b/tests/kserve_test.sh @@ -14,6 +14,7 @@ export KSERVE_TEST_NAMESPACE=${NAMESPACE} # Runs kserve_sklearn_test.py which independently deploys an sklearn # InferenceService, predicts via host-based routing, asserts the # output, and deletes the InferenceService. +pip install -q pytest python -m pytest "${SCRIPT_DIRECTORY}/kserve_sklearn_test.py" -vs --log-level info # ============================================================ From f5abadd8e3a7419d396808a3f8c8f3cfa219542b Mon Sep 17 00:00:00 2001 From: Siddhant Jain Date: Mon, 23 Feb 2026 23:52:51 +0530 Subject: [PATCH 6/8] fix: use permissive allow-all AuthorizationPolicy (rules: [{}]) for predictor pod Signed-off-by: Siddhant Jain --- tests/kserve_sklearn_test.py | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/tests/kserve_sklearn_test.py b/tests/kserve_sklearn_test.py index 14bf452105..64f374045c 100644 --- a/tests/kserve_sklearn_test.py +++ b/tests/kserve_sklearn_test.py @@ -23,10 +23,9 @@ Because of the mesh-wide global-deny-all AuthorizationPolicy (common/istio/istio-install/base/deny_all_authorizationpolicy.yaml), the predictor pod's sidecar blocks all traffic by default. -We create an ALLOW AuthorizationPolicy that permits traffic -to the predictor pod using requestPrincipals: ["*"]. Security -is maintained because the ingress gateway validates the JWT -via RequestAuthentication before forwarding. +We create a permissive ALLOW AuthorizationPolicy (rules: [{}]) on the +predictor pod. Security is enforced at the ingress gateway, which +validates the JWT via RequestAuthentication before forwarding. """ import os @@ -160,10 +159,12 @@ def create_predictor_authorization_policy(namespace): This is needed because the global-deny-all AuthorizationPolicy in istio-system blocks all mesh traffic by default. - We allow any request that carries a valid JWT principal - (requestPrincipals: ["*"]). Security is maintained because - the ingress gateway validates the JWT via RequestAuthentication - before forwarding. + WARNING: This uses a permissive allow-all rule (rules: [{}]) because + the predictor pod's Envoy sidecar has no RequestAuthentication + configured, so requestPrincipals is always empty and a + principal-based rule would never match. Security is still enforced + at the ingress gateway, which validates the JWT via + RequestAuthentication before forwarding traffic. """ api = client.CustomObjectsApi() ap_body = { @@ -175,17 +176,10 @@ def create_predictor_authorization_policy(namespace): }, "spec": { "action": "ALLOW", - "rules": [ - { - "from": [ - { - "source": { - "requestPrincipals": ["*"], - } - } - ] - } - ], + # WARNING: allow-all rule — the predictor sidecar has no + # RequestAuthentication, so requestPrincipals: ["*"] cannot + # work here. Security is enforced at the ingress gateway. + "rules": [{}], "selector": { "matchLabels": { "serving.knative.dev/service": f"{SERVICE_NAME}-predictor", From 4b238f2fe55f976d34a77bc6965f301c35a9a610 Mon Sep 17 00:00:00 2001 From: Siddhant Jain Date: Tue, 24 Feb 2026 00:16:42 +0530 Subject: [PATCH 7/8] fix: use allow-all AuthorizationPolicy in bash test (same sidecar issue) Signed-off-by: Siddhant Jain --- tests/kserve_test.sh | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/tests/kserve_test.sh b/tests/kserve_test.sh index 51b27c4422..eb8727ed65 100755 --- a/tests/kserve_test.sh +++ b/tests/kserve_test.sh @@ -42,9 +42,9 @@ EOF kubectl wait --for=condition=Ready inferenceservice/isvc-sklearn -n ${NAMESPACE} --timeout=300s -# Allow traffic to the predictor pod from any authenticated principal. -# The ingress gateway and Cluster local gateway validate the JWT -# via RequestAuthentication before forwarding. +# WARNING: allow-all rule — the predictor sidecar has no RequestAuthentication, +# so requestPrincipals: ["*"] cannot work here. Security is enforced at the +# ingress gateway, which validates the JWT before forwarding traffic. cat < Date: Wed, 25 Feb 2026 01:08:28 +0530 Subject: [PATCH 8/8] docs: update KServe Authentication section in oauth2-proxy README Signed-off-by: Siddhant Jain --- common/oauth2-proxy/README.md | 140 ++++++++++++++++++++++++++++++++++ 1 file changed, 140 insertions(+) diff --git a/common/oauth2-proxy/README.md b/common/oauth2-proxy/README.md index 8c16b2b168..bc5940b5b2 100644 --- a/common/oauth2-proxy/README.md +++ b/common/oauth2-proxy/README.md @@ -275,6 +275,141 @@ for listing notebooks: ## KServe Authentication +KServe inference endpoints are secured through a layered approach using +Istio `RequestAuthentication` and `AuthorizationPolicy` resources. +The examples below focus on machine-to-machine (M2M) access using service +account tokens. Browser-based user access follows the general Kubeflow +`oauth2-proxy` flow described above. + +### Traffic Flow and Security Layers + +Inference requests to KServe models pass through two security checkpoints: + +``` +client ──► istio-ingressgateway (istio-system) ──► predictor pod sidecar ──► predictor container + │ │ + ▼ ▼ + RequestAuthentication AuthorizationPolicy + (JWT validation) (access control) +``` + +1. **Ingress gateway** (`istio-system`): A `RequestAuthentication` resource + validates the JWT in the `Authorization: Bearer ` header. Requests + with an invalid token are rejected with `401`. Requests without any token + pass through (handled by the `AuthorizationPolicy` at the next layer). +2. **Predictor pod sidecar**: An `AuthorizationPolicy` controls which + requests reach the model container. + +### Configuring AuthorizationPolicy for Predictor Pods + +Kubeflow ships a `global-deny-all` `AuthorizationPolicy` in `istio-system` that +blocks all mesh traffic by default. To allow inference traffic to reach a +predictor pod, you must create an `AuthorizationPolicy` in the model's +namespace. + +The **intended** configuration uses `requestPrincipals: ["*"]`, which matches +any request carrying a validated JWT principal: + +```yaml +apiVersion: security.istio.io/v1beta1 +kind: AuthorizationPolicy +metadata: + name: allow-isvc-sklearn + namespace: kubeflow-user-example-com +spec: + action: ALLOW + rules: + - from: + - source: + requestPrincipals: ["*"] + selector: + matchLabels: + serving.knative.dev/service: isvc-sklearn-predictor +``` + +`requestPrincipals: ["*"]` relies on the JWT principal being propagated from +the ingress gateway to the predictor pod's Envoy sidecar. This propagation +depends on the Istio mTLS and `PeerAuthentication` configuration of the +cluster. In environments where the principal is not propagated (e.g., some +KinD-based CI setups), `requestPrincipals` will always be empty at the +sidecar, and the rule will never match. + +In such environments, the CI tests use a permissive fallback: + +```yaml +spec: + action: ALLOW + rules: + - {} # allow-all: security is enforced at the ingress gateway + selector: + matchLabels: + serving.knative.dev/service: isvc-sklearn-predictor +``` + +> **Important:** `rules: [{}]` allows **all** traffic to the predictor pod, +> including unauthenticated requests that bypass the ingress gateway. +> This is acceptable in CI because the ingress gateway's +> `RequestAuthentication` is the primary security boundary — it validates +> the JWT **before** forwarding traffic to the predictor. However, in +> production clusters with proper mTLS configuration, prefer +> `requestPrincipals: ["*"]` for defense in depth. + +### Path-Based and Host-Based Routing + +KServe supports two routing modes for inference requests, both secured by the +same authentication flow: + +| Mode | URL pattern | Configuration | +|------|-------------|---------------| +| Path-based | `http:///serving///v1/models/:predict` | `pathTemplate` in `inferenceservice-config` ConfigMap | +| Host-based | `http:///v1/models/:predict` with `Host: ..example.com` | `domainTemplate` in `inferenceservice-config` ConfigMap | + +Path-based routing is configured via a kustomize patch on the +`inferenceservice-config` ConfigMap +(`applications/kserve/kserve/kustomization.yaml`): + +```json +{ + "pathTemplate": "/serving/{{ .Namespace }}/{{ .Name }}" +} +``` + +KServe auto-generates a `VirtualService` on the `kubeflow-gateway` for each +`InferenceService`, enabling both routing modes simultaneously. + +### KServe Models Web Application Authentication + +The KServe Models Web Application (`kserve-models-web-app`) uses the same +XSRF + Bearer token authentication pattern as other Kubeflow web applications. +API calls require: + +1. A valid XSRF token (obtained via cookie on the initial page load) +2. A valid `Authorization: Bearer ` header +3. The token's identity must have RBAC permissions in the target namespace + +Unauthorized service accounts (e.g., `default` SA from a different namespace) +receive `401`/`403` when attempting to list `InferenceService` resources in a +namespace they do not have access to. + +### CI Test Coverage + +The KServe test suite (`tests/kserve_test.sh`) validates the following +authentication and security scenarios end-to-end in a KinD cluster: + +| # | Test | What is verified | +|---|------|-----------------| +| 1 | Model prediction via KServe Python SDK | InferenceService deployment, prediction, and cleanup using the `kserve` SDK with M2M token | +| 2a | Path-based routing without token | Unauthenticated request returns `403`/`302` | +| 2b | Host-based routing without token | Unauthenticated request returns `403`/`302` | +| 2c | Path-based routing with valid token | Authenticated request returns `200` | +| 2d | Host-based routing with valid token | Authenticated request returns `200` | +| 3 | KServe Models Web App API | XSRF + auth flow, unauthorized SA gets `401`/`403` | +| 4 | Knative Service auth via cluster-local-gateway | Unauthenticated and invalid-token requests are rejected | +| 5 | Cluster-local-gateway authentication | Direct access without token returns `403` | +| 6 | Namespace isolation | Cross-namespace attacker token is rejected | + +### Architecture Analysis (Future Improvements) + The analysis of KServe auth capabilities suggests that while it's possible to limit access to only authenticated agents, there might be some improvements required to enable access only to authorized agents. @@ -297,6 +432,11 @@ This is based on the following: > create an [Istio AuthorizationPolicy](https://istio.io/latest/docs/reference/config/security/authorization-policy/) to grant access to the pods or disable it Most probably some work is needed to enable authorized access to kserve models. +3. Potential improvement: adding `source.namespaces` to the `AuthorizationPolicy` + to restrict access to traffic originating from specific namespaces (e.g., + `istio-system`). This would provide an additional layer of security but + requires proper mTLS/PeerAuthentication configuration to propagate SPIFFE + identities correctly. ## Links