From fde4d3a877b0747447caf34a4dc2042537edae9e Mon Sep 17 00:00:00 2001 From: juliusvonkohout <45896133+juliusvonkohout@users.noreply.github.com> Date: Tue, 10 Jun 2025 18:25:14 +0200 Subject: [PATCH 01/16] initial seaweedfs code Signed-off-by: juliusvonkohout <45896133+juliusvonkohout@users.noreply.github.com> --- .../kustomize/third-party/seaweedfs/OWNERS | 6 + .../kustomize/third-party/seaweedfs/README.md | 51 ++++ .../third-party/seaweedfs/UPDGRADE.md | 3 + .../workflow-controller-configmap-patch.yaml | 24 ++ .../seaweedfs/base/kustomization.yaml | 30 +++ .../seaweedfs/base/minio-service-patch.yaml | 13 + .../deployment.yaml | 43 ++++ .../base/pipeline-profile-controller/sync.py | 243 ++++++++++++++++++ .../base/seaweedfs/kustomization.yaml | 11 + .../seaweedfs-create-admin-user-job.yaml | 68 +++++ .../base/seaweedfs/seaweedfs-deployment.yaml | 73 ++++++ .../seaweedfs/seaweedfs-networkpolicy.yaml | 43 ++++ .../base/seaweedfs/seaweedfs-pvc.yaml | 11 + .../seaweedfs/seaweedfs-service-account.yaml | 4 + .../base/seaweedfs/seaweedfs-service.yaml | 33 +++ .../istio/istio-authorization-policy.yaml | 32 +++ .../seaweedfs/istio/kustomization.yaml | 7 + .../seaweedfs/pipelines_swfs_install.sh | 21 ++ .../kustomize/third-party/seaweedfs/test.sh | 20 ++ 19 files changed, 736 insertions(+) create mode 100644 manifests/kustomize/third-party/seaweedfs/OWNERS create mode 100644 manifests/kustomize/third-party/seaweedfs/README.md create mode 100644 manifests/kustomize/third-party/seaweedfs/UPDGRADE.md create mode 100644 manifests/kustomize/third-party/seaweedfs/base/argo-workflow-controller/workflow-controller-configmap-patch.yaml create mode 100644 manifests/kustomize/third-party/seaweedfs/base/kustomization.yaml create mode 100644 manifests/kustomize/third-party/seaweedfs/base/minio-service-patch.yaml create mode 100644 manifests/kustomize/third-party/seaweedfs/base/pipeline-profile-controller/deployment.yaml create mode 100644 manifests/kustomize/third-party/seaweedfs/base/pipeline-profile-controller/sync.py create mode 100644 manifests/kustomize/third-party/seaweedfs/base/seaweedfs/kustomization.yaml create mode 100644 manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-create-admin-user-job.yaml create mode 100644 manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-deployment.yaml create mode 100644 manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-networkpolicy.yaml create mode 100644 manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-pvc.yaml create mode 100644 manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-service-account.yaml create mode 100644 manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-service.yaml create mode 100644 manifests/kustomize/third-party/seaweedfs/istio/istio-authorization-policy.yaml create mode 100644 manifests/kustomize/third-party/seaweedfs/istio/kustomization.yaml create mode 100644 manifests/kustomize/third-party/seaweedfs/pipelines_swfs_install.sh create mode 100644 manifests/kustomize/third-party/seaweedfs/test.sh diff --git a/manifests/kustomize/third-party/seaweedfs/OWNERS b/manifests/kustomize/third-party/seaweedfs/OWNERS new file mode 100644 index 00000000000..82967fbf54a --- /dev/null +++ b/manifests/kustomize/third-party/seaweedfs/OWNERS @@ -0,0 +1,6 @@ +approvers: +# - pschoen-itsc + - juliusvonkohout +reviewers: +# - pschoen-itsc + - juliusvonkohout diff --git a/manifests/kustomize/third-party/seaweedfs/README.md b/manifests/kustomize/third-party/seaweedfs/README.md new file mode 100644 index 00000000000..619b677cb0d --- /dev/null +++ b/manifests/kustomize/third-party/seaweedfs/README.md @@ -0,0 +1,51 @@ +# SeaweedFS + +- [Official documentation](https://github.com/seaweedfs/seaweedfs/wiki) +- [Official repository](https://github.com/seaweedfs/seaweedfs) + +SeaweedFS is a simple and highly scalable distributed file system. It has an S3 interface which makes it usable as an object store for kubeflow. + +## Prerequisites + +- Kubernetes (any recent Version should work) +- You should have `kubectl` available and configured to talk to the desired cluster. +- `kustomize` +- If you installed kubeflow with minio, use the `istio` dir instead of `base` for the kustomize commands. + +## Compile manifests + +```bash +kubectl kustomize ./base/ +``` + +## Install SeaweedFS + +**WARNING** +This replaces the service `minio-service` and will redirect the traffic to seaweedfs. + +```bash +# Optional, but recommended to backup existing minio-service +kubectl get -n kubeflow svc minio-service -o=jsonpath='{.metadata.annotations.kubectl\.kubernetes\.io/last-applied-configuration}' > svc-minio-service-backup.json + +kubectl kustomize ./base/ | kubectl apply -f - +``` + +## Verify deployment + +Run +```bash +./test.sh +``` +With the ready check on the container it already verifies that the S3 starts correctly. +You can then use it with the endpoint at http://localhost:8333. +To create access keys open a shell on the pod and use `weed shell` to configure your instance. +Create a user with the command `s3.configure -user -access_key -secret-key -actions Read:/,Write::/ -apply` +Documentation for this can also be found [here](https://github.com/seaweedfs/seaweedfs/wiki/Amazon-S3-API). + +## Uninstall SeaweedFS + +```bash +kubectl kustomize ./base/ | kubectl delete -f - +# Restore minio-service from backup +kubectl apply -f svc-minio-service-backup.json +``` diff --git a/manifests/kustomize/third-party/seaweedfs/UPDGRADE.md b/manifests/kustomize/third-party/seaweedfs/UPDGRADE.md new file mode 100644 index 00000000000..0193a918448 --- /dev/null +++ b/manifests/kustomize/third-party/seaweedfs/UPDGRADE.md @@ -0,0 +1,3 @@ +# Upgrade SeaweedFS + +Change the image tag in the Deployment to the desired version. You can find the available images [here](https://hub.docker.com/r/chrislusf/seaweedfs). diff --git a/manifests/kustomize/third-party/seaweedfs/base/argo-workflow-controller/workflow-controller-configmap-patch.yaml b/manifests/kustomize/third-party/seaweedfs/base/argo-workflow-controller/workflow-controller-configmap-patch.yaml new file mode 100644 index 00000000000..c96a9785234 --- /dev/null +++ b/manifests/kustomize/third-party/seaweedfs/base/argo-workflow-controller/workflow-controller-configmap-patch.yaml @@ -0,0 +1,24 @@ +# based on https://github.com/kubeflow/manifests/blob/master/apps/pipeline/upstream/third-party/argo/base/workflow-controller-configmap-patch.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: workflow-controller-configmap + namespace: kubeflow +data: + # SeaweedFS configuration for V1 pipelines with namespace isolation + # SeaweedFS is exposed through minio-service for backward compatibility + artifactRepository: | + archiveLogs: true + s3: + endpoint: minio-service.kubeflow:9000 + bucket: mlpipeline + keyFormat: private-artifacts/{{workflow.namespace}}/{{workflow.name}}/{{workflow.creationTimestamp.Y}}/{{workflow.creationTimestamp.m}}/{{workflow.creationTimestamp.d}}/{{pod.name}} + insecure: true + accessKeySecret: + name: mlpipeline-minio-artifact + key: accesskey + secretKeySecret: + name: mlpipeline-minio-artifact + key: secretkey + executor: | + imagePullPolicy: IfNotPresent diff --git a/manifests/kustomize/third-party/seaweedfs/base/kustomization.yaml b/manifests/kustomize/third-party/seaweedfs/base/kustomization.yaml new file mode 100644 index 00000000000..8afc5a523e0 --- /dev/null +++ b/manifests/kustomize/third-party/seaweedfs/base/kustomization.yaml @@ -0,0 +1,30 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: kubeflow + +resources: +- seaweedfs/ +- ../../../apps/pipeline/upstream/env/cert-manager/platform-agnostic-multi-user +configMapGenerator: +- name: kubeflow-pipelines-profile-controller-code + behavior: replace + files: + - pipeline-profile-controller/sync.py +patches: +- path: minio-service-patch.yaml +- path: pipeline-profile-controller/deployment.yaml +- path: argo-workflow-controller/workflow-controller-configmap-patch.yaml +- patch: |- + apiVersion: apps/v1 + kind: Deployment + metadata: + name: ml-pipeline-ui + spec: + template: + spec: + containers: + - name: ml-pipeline-ui + env: + - name: ARTIFACTS_SERVICE_PROXY_ENABLED + value: 'false' + $patch: merge diff --git a/manifests/kustomize/third-party/seaweedfs/base/minio-service-patch.yaml b/manifests/kustomize/third-party/seaweedfs/base/minio-service-patch.yaml new file mode 100644 index 00000000000..d44ba3e6144 --- /dev/null +++ b/manifests/kustomize/third-party/seaweedfs/base/minio-service-patch.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: Service +metadata: + name: minio-service + namespace: kubeflow +spec: + ports: + - name: http + port: 9000 + protocol: TCP + targetPort: 8333 + selector: + app: seaweedfs diff --git a/manifests/kustomize/third-party/seaweedfs/base/pipeline-profile-controller/deployment.yaml b/manifests/kustomize/third-party/seaweedfs/base/pipeline-profile-controller/deployment.yaml new file mode 100644 index 00000000000..b7c7ad08cff --- /dev/null +++ b/manifests/kustomize/third-party/seaweedfs/base/pipeline-profile-controller/deployment.yaml @@ -0,0 +1,43 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: kubeflow-pipelines-profile-controller +spec: + template: + spec: + securityContext: + seccompProfile: + type: RuntimeDefault + containers: + - name: profile-controller + securityContext: + allowPrivilegeEscalation: false + runAsNonRoot: true + runAsUser: 1000 + runAsGroup: 0 + capabilities: + drop: + - ALL + # We just need an image with the python botocore library installed + image: docker.io/alpine/k8s:1.32.3 + command: ["python", "/hooks/sync.py"] + env: + - name: KFP_VERSION + valueFrom: + configMapKeyRef: + name: pipeline-install-config + key: appVersion + - name: AWS_ENDPOINT_URL + value: http://seaweedfs:8111 + - name: AWS_REGION + value: us-east-1 + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + name: mlpipeline-minio-artifact + key: accesskey + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + name: mlpipeline-minio-artifact + key: secretkey diff --git a/manifests/kustomize/third-party/seaweedfs/base/pipeline-profile-controller/sync.py b/manifests/kustomize/third-party/seaweedfs/base/pipeline-profile-controller/sync.py new file mode 100644 index 00000000000..bf8c7618fa8 --- /dev/null +++ b/manifests/kustomize/third-party/seaweedfs/base/pipeline-profile-controller/sync.py @@ -0,0 +1,243 @@ +# Copyright 2020-2021 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from http.server import BaseHTTPRequestHandler, HTTPServer +import json +import os +import base64 + +# From awscli installed in alpine/k8s image +import botocore.session + +S3_BUCKET_NAME = 'mlpipeline' + +session = botocore.session.get_session() +# To interact with seaweedfs user management. Region does not matter. +iam = session.create_client('iam', region_name='foobar') + + +def main(): + settings = get_settings_from_env() + server = server_factory(**settings) + server.serve_forever() + + +def get_settings_from_env(controller_port=None, + visualization_server_image=None, frontend_image=None, + visualization_server_tag=None, frontend_tag=None, disable_istio_sidecar=None): + """ + Returns a dict of settings from environment variables relevant to the controller + + Environment settings can be overridden by passing them here as arguments. + + Settings are pulled from the all-caps version of the setting name. The + following defaults are used if those environment variables are not set + to enable backwards compatibility with previous versions of this script: + visualization_server_image: ghcr.io/kubeflow/kfp-visualization-server + visualization_server_tag: value of KFP_VERSION environment variable + frontend_image: ghcr.io/kubeflow/kfp-frontend + frontend_tag: value of KFP_VERSION environment variable + disable_istio_sidecar: Required (no default) + minio_access_key: Required (no default) + minio_secret_key: Required (no default) + """ + settings = dict() + settings["controller_port"] = \ + controller_port or \ + os.environ.get("CONTROLLER_PORT", "8080") + + settings["visualization_server_image"] = \ + visualization_server_image or \ + os.environ.get("VISUALIZATION_SERVER_IMAGE", "ghcr.io/kubeflow/kfp-visualization-server") + + settings["frontend_image"] = \ + frontend_image or \ + os.environ.get("FRONTEND_IMAGE", "ghcr.io/kubeflow/kfp-frontend") + + # Look for specific tags for each image first, falling back to + # previously used KFP_VERSION environment variable for backwards + # compatibility + settings["visualization_server_tag"] = \ + visualization_server_tag or \ + os.environ.get("VISUALIZATION_SERVER_TAG") or \ + os.environ["KFP_VERSION"] + + settings["frontend_tag"] = \ + frontend_tag or \ + os.environ.get("FRONTEND_TAG") or \ + os.environ["KFP_VERSION"] + + settings["disable_istio_sidecar"] = \ + disable_istio_sidecar if disable_istio_sidecar is not None \ + else os.environ.get("DISABLE_ISTIO_SIDECAR") == "true" + + return settings + + +def server_factory(visualization_server_image, + visualization_server_tag, frontend_image, frontend_tag, + disable_istio_sidecar, url="", controller_port=8080): + """ + Returns an HTTPServer populated with Handler with customized settings + """ + class Controller(BaseHTTPRequestHandler): + def sync(self, parent, attachments): + # parent is a namespace + namespace = parent.get("metadata", {}).get("name") + + pipeline_enabled = parent.get("metadata", {}).get( + "labels", {}).get("pipelines.kubeflow.org/enabled") + + if pipeline_enabled != "true": + return {"status": {}, "attachments": []} + + # Compute status based on observed state. + desired_status = { + "kubeflow-pipelines-ready": + len(attachments["Secret.v1"]) == 1 and + len(attachments["ConfigMap.v1"]) == 3 and + len(attachments["Deployment.apps/v1"]) == 2 and + len(attachments["Service.v1"]) == 2 and + len(attachments["DestinationRule.networking.istio.io/v1alpha3"]) == 1 and + len(attachments["AuthorizationPolicy.security.istio.io/v1beta1"]) == 1 and + "True" or "False" + } + + # Generate the desired attachment object(s). + desired_resources = [ + { + "apiVersion": "v1", + "kind": "ConfigMap", + "metadata": { + "name": "kfp-launcher", + "namespace": namespace, + }, + "data": { + "defaultPipelineRoot": f"minio://{S3_BUCKET_NAME}/private-artifacts/{namespace}/v2/artifacts", + }, + }, + { + "apiVersion": "v1", + "kind": "ConfigMap", + "metadata": { + "name": "metadata-grpc-configmap", + "namespace": namespace, + }, + "data": { + "METADATA_GRPC_SERVICE_HOST": + "metadata-grpc-service.kubeflow", + "METADATA_GRPC_SERVICE_PORT": "8080", + }, + }, + { + "apiVersion": "v1", + "kind": "ConfigMap", + "metadata": { + "name": "artifact-repositories", + "namespace": namespace, + "annotations": { + "workflows.argoproj.io/default-artifact-repository": "default-namespaced" + } + }, + "data": { + "default-namespaced": json.dumps({ + "archiveLogs": True, + "s3": { + "endpoint": "minio-service.kubeflow:9000", + "bucket": S3_BUCKET_NAME, + "keyFormat": f"private-artifacts/{namespace}/{{{{workflow.name}}}}/{{{{workflow.creationTimestamp.Y}}}}/{{{{workflow.creationTimestamp.m}}}}/{{{{workflow.creationTimestamp.d}}}}/{{{{pod.name}}}}", + "insecure": True, + "accessKeySecret": { + "name": "mlpipeline-minio-artifact", + "key": "accesskey", + }, + "secretKeySecret": { + "name": "mlpipeline-minio-artifact", + "key": "secretkey", + } + } + }) + } + }, + ] + print('Received request:\n', json.dumps(parent, sort_keys=True)) + print('Desired resources except secrets:\n', json.dumps(desired_resources, sort_keys=True)) + + # Moved after the print argument because this is sensitive data. + + # Check if secret is already there when the controller made the request. If yes, then + # use it. Else create a new credentials on seaweedfs for the namespace. + if s3_secret := attachments["Secret.v1"].get(f"{namespace}/mlpipeline-minio-artifact"): + desired_resources.append(s3_secret) + print('Using existing secret') + else: + print('Creating new access key.') + s3_access_key = iam.create_access_key(UserName=namespace) + # Use the AWS IAM API of seaweedfs to manage access policies to bucket. + # This policy ensures that a user can only access artifacts from his own profile. + iam.put_user_policy( + UserName=namespace, + PolicyName=f"KubeflowProject{namespace}", + PolicyDocument=json.dumps( + { + "Version": "2012-10-17", + "Statement": [{ + "Effect": "Allow", + "Action": [ + "s3:Put*", + "s3:Get*", + "s3:List*" + ], + "Resource": [ + f"arn:aws:s3:::{S3_BUCKET_NAME}/artifacts/*", + f"arn:aws:s3:::{S3_BUCKET_NAME}/private-artifacts/{namespace}/*", + f"arn:aws:s3:::{S3_BUCKET_NAME}/private/{namespace}/*", + f"arn:aws:s3:::{S3_BUCKET_NAME}/shared/*", + ] + }] + }) + ) + desired_resources.insert( + 0, + { + "apiVersion": "v1", + "kind": "Secret", + "metadata": { + "name": "mlpipeline-minio-artifact", + "namespace": namespace, + }, + "data": { + "accesskey": base64.b64encode(s3_access_key["AccessKey"]["AccessKeyId"].encode('utf-8')).decode("utf-8"), + "secretkey": base64.b64encode(s3_access_key["AccessKey"]["SecretAccessKey"].encode('utf-8')).decode("utf-8"), + }, + }) + + return {"status": desired_status, "attachments": desired_resources} + + def do_POST(self): + # Serve the sync() function as a JSON webhook. + observed = json.loads( + self.rfile.read(int(self.headers.get("content-length")))) + desired = self.sync(observed["object"], observed["attachments"]) + + self.send_response(200) + self.send_header("Content-type", "application/json") + self.end_headers() + self.wfile.write(bytes(json.dumps(desired), 'utf-8')) + + return HTTPServer((url, int(controller_port)), Controller) + + +if __name__ == "__main__": + main() diff --git a/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/kustomization.yaml b/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/kustomization.yaml new file mode 100644 index 00000000000..774997572d7 --- /dev/null +++ b/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/kustomization.yaml @@ -0,0 +1,11 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: kubeflow + +resources: +- seaweedfs-deployment.yaml +- seaweedfs-pvc.yaml +- seaweedfs-networkpolicy.yaml +- seaweedfs-create-admin-user-job.yaml +- seaweedfs-service.yaml +- seaweedfs-service-account.yaml diff --git a/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-create-admin-user-job.yaml b/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-create-admin-user-job.yaml new file mode 100644 index 00000000000..f542cc5cb9d --- /dev/null +++ b/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-create-admin-user-job.yaml @@ -0,0 +1,68 @@ +kind: Job +apiVersion: batch/v1 +metadata: + name: init-seaweedfs +spec: + template: + metadata: + name: init-seaweedfs + spec: + securityContext: + seccompProfile: + type: RuntimeDefault + restartPolicy: OnFailure + containers: + - name: init-seaweedfs + image: 'chrislusf/seaweedfs:3.85' + env: + - name: WEED_CLUSTER_DEFAULT + value: "sw" + - name: WEED_CLUSTER_SW_MASTER + value: "seaweedfs.kubeflow:9333" + envFrom: + - secretRef: + name: mlpipeline-minio-artifact + command: + - "/bin/sh" + - "-ec" + - | + wait_for_service() { + local url=$1 + local max_attempts=60 # 5 minutes total (5s * 60) + local attempt=1 + + echo "Waiting for service at $url..." + while [ $attempt -le $max_attempts ]; do + if wget -q --spider "$url" >/dev/null 2>&1; then + echo "Service at $url is up!" + return 0 + fi + echo "Attempt $attempt: Service not ready yet, retrying in 5s..." + sleep 5 + attempt=$((attempt + 1)) + done + echo "Service at $url failed to become ready within 5 minutes" + exit 1 + } + wait_for_service "http://minio-service.kubeflow:9000/status" + exec /bin/echo "s3.bucket.create --name mlpipeline" | /usr/bin/weed shell + exec /bin/echo \ + "s3.configure -user kubeflow-admin \ + -access_key $accesskey \ + -secret_key $secretkey \ + -actions Admin \ + -apply" |\ + /usr/bin/weed shell + securityContext: # Using restricted profile + allowPrivilegeEscalation: false + privileged: false + runAsNonRoot: true + # image defaults to root user + runAsUser: 1001 + runAsGroup: 1001 + capabilities: + drop: + - ALL + add: + - NET_BIND_SERVICE + serviceAccountName: seaweedfs diff --git a/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-deployment.yaml b/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-deployment.yaml new file mode 100644 index 00000000000..99cee8b4c4f --- /dev/null +++ b/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-deployment.yaml @@ -0,0 +1,73 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: seaweedfs + namespace: kubeflow + labels: + app: seaweedfs +spec: + selector: + matchLabels: + app: seaweedfs + strategy: + type: Recreate + # Single container setup not scalable + replicas: 1 + template: + metadata: + labels: + app: seaweedfs + application-crd-id: kubeflow-pipelines + spec: + securityContext: + seccompProfile: + type: RuntimeDefault + containers: + - name: seaweedfs + image: 'chrislusf/seaweedfs:3.85' + args: + - 'server' + - '-dir=/data' + - '-s3' + - '-iam' + ports: + - containerPort: 8333 + - containerPort: 8111 + - containerPort: 9333 + - containerPort: 19333 + - containerPort: 8888 + readinessProbe: + httpGet: + path: /status + port: 8333 + scheme: HTTP + initialDelaySeconds: 15 + periodSeconds: 15 + successThreshold: 1 + failureThreshold: 100 + timeoutSeconds: 10 + securityContext: # Using restricted profile + allowPrivilegeEscalation: false + privileged: false + runAsNonRoot: true + # image defaults to root user + runAsUser: 1001 + runAsGroup: 1001 + capabilities: + drop: + - ALL + add: + - NET_BIND_SERVICE + volumeMounts: + - mountPath: /data + name: data + resources: + # Benchmark this, just taken from minio + requests: + cpu: 20m + memory: 100Mi + volumes: + - name: data + persistentVolumeClaim: + claimName: seaweedfs-pvc + serviceAccountName: seaweedfs diff --git a/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-networkpolicy.yaml b/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-networkpolicy.yaml new file mode 100644 index 00000000000..ab1a6cc6657 --- /dev/null +++ b/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-networkpolicy.yaml @@ -0,0 +1,43 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: default-allow-same-namespace + namespace: kubeflow +spec: + podSelector: {} + ingress: + - from: + - podSelector: {} + policyTypes: + - Ingress +--- +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: seaweedfs +spec: + ingress: + - from: + - namespaceSelector: + matchExpressions: + - key: app.kubernetes.io/part-of + operator: In + values: + - kubeflow-profile + ports: + - port: 8333 + - from: + - namespaceSelector: + matchExpressions: + - key: kubernetes.io/metadata.name + operator: In + values: + - istio-system + podSelector: + matchExpressions: + - key: app + operator: In + values: + - seaweedfs + policyTypes: + - Ingress diff --git a/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-pvc.yaml b/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-pvc.yaml new file mode 100644 index 00000000000..7d47bee65de --- /dev/null +++ b/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-pvc.yaml @@ -0,0 +1,11 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: seaweedfs-pvc + namespace: kubeflow +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 5Gi diff --git a/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-service-account.yaml b/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-service-account.yaml new file mode 100644 index 00000000000..9e0b2176f25 --- /dev/null +++ b/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-service-account.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: seaweedfs diff --git a/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-service.yaml b/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-service.yaml new file mode 100644 index 00000000000..0134c34f391 --- /dev/null +++ b/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-service.yaml @@ -0,0 +1,33 @@ +# Separate service for new ports of seaweedfs. If we add them to the existing minio-service there will be a problem +# with the mlpipeline api server because it relies on MINIO_SERVICE_SERVICE_PORT pointing to the S3 port. +# But with multiple ports on a service that is not really reliable. So we use the existing minio-service for +# backwards-compatibility, but everything new, seaweedfs related is here. +apiVersion: v1 +kind: Service +metadata: + name: seaweedfs + namespace: kubeflow +spec: + ports: + - name: http-iam + port: 8111 + protocol: TCP + targetPort: 8111 + - name: http-master + port: 9333 + protocol: TCP + targetPort: 9333 + - name: grpc-master + port: 19333 + protocol: TCP + targetPort: 19333 + - name: grpc-filer + port: 18888 + protocol: TCP + targetPort: 18888 + - name: http-filer + port: 8888 + protocol: TCP + targetPort: 8888 + selector: + app: seaweedfs diff --git a/manifests/kustomize/third-party/seaweedfs/istio/istio-authorization-policy.yaml b/manifests/kustomize/third-party/seaweedfs/istio/istio-authorization-policy.yaml new file mode 100644 index 00000000000..4c6ac2c1630 --- /dev/null +++ b/manifests/kustomize/third-party/seaweedfs/istio/istio-authorization-policy.yaml @@ -0,0 +1,32 @@ +apiVersion: security.istio.io/v1beta1 +kind: AuthorizationPolicy +metadata: + name: seaweedfs-service +spec: + action: ALLOW + selector: + matchLabels: + app: seaweedfs + rules: + - from: + - source: + principals: + - cluster.local/ns/kubeflow/sa/ml-pipeline + - from: + - source: + principals: + - cluster.local/ns/kubeflow/sa/ml-pipeline-ui + # Allow traffic to s3 endpoint from User Pipeline Pods, which don't have a sidecar. + # Also needed for traffic from seaweedfs init pod. Seaweedfs gives the client an ip to connect to. This can not be + # handled well by istio (AuthPolicy). Instead, access to the sensitive ports will be limited by the NetworkPolicy. + - {} +--- +apiVersion: "networking.istio.io/v1alpha3" +kind: DestinationRule +metadata: + name: ml-pipeline-seaweedfs +spec: + host: seaweedfs.kubeflow.svc.cluster.local + trafficPolicy: + tls: + mode: ISTIO_MUTUAL diff --git a/manifests/kustomize/third-party/seaweedfs/istio/kustomization.yaml b/manifests/kustomize/third-party/seaweedfs/istio/kustomization.yaml new file mode 100644 index 00000000000..2dffd5d150b --- /dev/null +++ b/manifests/kustomize/third-party/seaweedfs/istio/kustomization.yaml @@ -0,0 +1,7 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: kubeflow + +resources: +- ../base/ +- istio-authorization-policy.yaml diff --git a/manifests/kustomize/third-party/seaweedfs/pipelines_swfs_install.sh b/manifests/kustomize/third-party/seaweedfs/pipelines_swfs_install.sh new file mode 100644 index 00000000000..e01abb67579 --- /dev/null +++ b/manifests/kustomize/third-party/seaweedfs/pipelines_swfs_install.sh @@ -0,0 +1,21 @@ +#!/bin/bash +set -euo pipefail +echo "Installing Pipelines ..." +kubectl apply -f apps/pipeline/upstream/third-party/metacontroller/base/crd.yaml +echo "Waiting for crd/compositecontrollers.metacontroller.k8s.io to be available ..." +kubectl wait --for condition=established --timeout=30s crd/compositecontrollers.metacontroller.k8s.io +kustomize build experimental/seaweedfs/istio | kubectl apply -f - +sleep 60 +kubectl wait --for=condition=Ready pods --all --all-namespaces --timeout=600s \ + --field-selector=status.phase!=Succeeded + +kubectl wait --for=condition=Available deployment/ml-pipeline -n kubeflow --timeout=10s +kubectl wait --for=condition=Available deployment/ml-pipeline-ui -n kubeflow --timeout=10s +kubectl wait --for=condition=Available deployment/ml-pipeline-persistenceagent -n kubeflow --timeout=10s +kubectl wait --for=condition=Available deployment/ml-pipeline-scheduledworkflow -n kubeflow --timeout=10s +kubectl wait --for=condition=Available deployment/ml-pipeline-viewer-crd -n kubeflow --timeout=10s +kubectl wait --for=condition=Available deployment/cache-server -n kubeflow --timeout=10s +kubectl wait --for=condition=Available deployment/metadata-writer -n kubeflow --timeout=10s +kubectl wait --for=condition=Available deployment/seaweedfs -n kubeflow --timeout=10s +kubectl wait --for=condition=Available deployment/mysql -n kubeflow --timeout=10s +kubectl get deployment -n kubeflow -l app=ml-pipeline diff --git a/manifests/kustomize/third-party/seaweedfs/test.sh b/manifests/kustomize/third-party/seaweedfs/test.sh new file mode 100644 index 00000000000..f55ca2135e1 --- /dev/null +++ b/manifests/kustomize/third-party/seaweedfs/test.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash + +set -xe + +kubectl create ns kubeflow || echo "namespace kubeflow already exists" +kubectl get -n kubeflow svc minio-service -o=jsonpath='{.metadata.annotations.kubectl\.kubernetes\.io/last-applied-configuration}' > svc-minio-service-backup.json +kustomize build istio/ | kubectl apply --server-side -f - +kubectl -n kubeflow wait --for=condition=available --timeout=600s deploy/seaweedfs +kubectl -n kubeflow exec deployments/seaweedfs -c seaweedfs -- sh -c "echo \"s3.configure -user minio -access_key minio -secret_key minio123 -actions Read,Write,List -apply\" | /usr/bin/weed shell" + +kubectl -n kubeflow port-forward svc/minio-service 8333:9000 +echo "S3 endpoint available on localhost:8333" & + +function trap_handler { + kubectl -n kubeflow logs -l app=seaweedfs --tail=100 + kustomize build istio/ | kubectl delete -f - + kubectl apply -f svc-minio-service-backup.json +} + +trap trap_handler EXIT From 5ee8cabff72ee7f8c3184b479c6946980835fd4c Mon Sep 17 00:00:00 2001 From: juliusvonkohout <45896133+juliusvonkohout@users.noreply.github.com> Date: Tue, 10 Jun 2025 18:35:33 +0200 Subject: [PATCH 02/16] add initial security tests. Signed-off-by: juliusvonkohout <45896133+juliusvonkohout@users.noreply.github.com> --- .../seaweedfs/pipeline_swfs_test.yaml | 144 ++++++++++++++++++ 1 file changed, 144 insertions(+) create mode 100644 manifests/kustomize/third-party/seaweedfs/pipeline_swfs_test.yaml diff --git a/manifests/kustomize/third-party/seaweedfs/pipeline_swfs_test.yaml b/manifests/kustomize/third-party/seaweedfs/pipeline_swfs_test.yaml new file mode 100644 index 00000000000..bda6a51f366 --- /dev/null +++ b/manifests/kustomize/third-party/seaweedfs/pipeline_swfs_test.yaml @@ -0,0 +1,144 @@ +name: Test pipelines with seaweedfs +on: + workflow_dispatch: + pull_request: + paths: + - tests/install_KinD_create_KinD_cluster_install_kustomize.sh + - .github/workflows/pipeline_swfs_test.yaml + - apps/pipeline/upstream/** + - tests/istio* + - tests/oauth2-proxy_install.sh + - common/cert-manager/** + - common/oauth2-proxy/** + - common/istio*/** + - experimental/seaweedfs/** + - tests/swfs_namespace_isolation_test.sh + - tests/s3_helper_test.py + +jobs: + build: + timeout-minutes: 15 + runs-on: + labels: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Remove unused software + run: | + df -h # Check disk space before removal + sudo rm -rf /usr/share/dotnet # Example: Remove .NET SDK + sudo rm -rf /usr/local/lib/android # Example: Remove Android SDK + sudo rm -rf /opt/ghc # Example: Remove Haskell + df -h # Check disk space after removal + + - name: Proactively prune OCI system on GHA runner + run: docker system prune -a --volumes --force + + - name: Install KinD, Create KinD cluster and Install kustomize + run: ./tests/install_KinD_create_KinD_cluster_install_kustomize.sh + + - name: Install kubectl + run: ./tests/kubectl_install.sh + + - name: Install Istio + run: ./tests/istio-cni_install.sh + + - name: Install oauth2-proxy + run: ./tests/oauth2-proxy_install.sh + + - name: Install cert-manager + run: ./tests/cert_manager_install.sh + + - name: Create kubeflow namespace + run: kustomize build common/kubeflow-namespace/base | kubectl apply -f - + + - name: Install KF Pipelines + run: ./tests/pipelines_swfs_install.sh + + - name: Install KF Multi Tenancy + run: ./tests/multi_tenancy_install.sh + + - name: Install kubeflow-istio-resources + run: kustomize build common/istio-cni-1-24/kubeflow-istio-resources/base | kubectl apply -f - + + - name: Create KF Profile + run: ./tests/kubeflow_profile_install.sh + + - name: Verify Pipeline Integration + run: | + KF_PROFILE=kubeflow-user-example-com + if ! kubectl get secret mlpipeline-minio-artifact -n $KF_PROFILE > /dev/null 2>&1; then + echo "Error: Secret mlpipeline-minio-artifact not found in namespace $KF_PROFILE" + exit 1 + fi + kubectl get secret mlpipeline-minio-artifact -n "$KF_PROFILE" -o json | jq -r '.data | keys[] as $k | "\($k): \(. | .[$k] | @base64d)"' | tr '\n' ' ' + + + - name: Port forward + run: ./tests/port_forward_gateway.sh + + - name: List and deploy test pipeline with V1 API + run: | + pip3 install "kfp>=1.8.22,<2.0.0" + KF_PROFILE=kubeflow-user-example-com + TOKEN="$(kubectl -n $KF_PROFILE create token default-editor)" + python3 tests/pipeline_v1_test.py "${TOKEN}" "${KF_PROFILE}" + + - name: Prune images inside Kind cluster + run: docker exec kind-control-plane bash -c "crictl images prune" + + - name: List and deploy test pipeline with V2 API + run: | + pip3 install kfp==2.13.0 + KF_PROFILE=kubeflow-user-example-com + TOKEN="$(kubectl -n $KF_PROFILE create token default-editor)" + python3 tests/pipeline_v2_test.py run_pipeline "${TOKEN}" "${KF_PROFILE}" + + - name: Fail to list pipelines with unauthorized ServiceAccount Token (V2 API) + run: | + pip3 install kfp==2.13.0 + KF_PROFILE=kubeflow-user-example-com + TOKEN="$(kubectl -n default create token default)" + python3 tests/pipeline_v2_test.py test_unauthorized_access "${TOKEN}" "${KF_PROFILE}" + echo "Test succeeded. Token from unauthorized ServiceAccount cannot list pipelines in $KF_PROFILE namespace." + + - name: Test SeaweedFS Namespace Isolation + run: ./tests/swfs_namespace_isolation_test.sh + + - name: Apply Pod Security Standards baseline levels for static namespaces + run: ./tests/PSS_baseline_enable.sh + + - name: Unapply applied baseline labels + run: | + NAMESPACES=("istio-system" "auth" "cert-manager" "oauth2-proxy" "kubeflow") + for NAMESPACE in "${NAMESPACES[@]}"; do + if kubectl get namespace "$NAMESPACE" >/dev/null 2>&1; then + kubectl label namespace $NAMESPACE pod-security.kubernetes.io/enforce- + fi + done + + - name: Applying Pod Security Standards restricted levels for static namespaces + run: ./tests/PSS_restricted_enable.sh + + - name: Collect Logs on Failure + if: failure() + run: | + mkdir -p logs + kubectl get all --all-namespaces > logs/resources.txt + kubectl get events --all-namespaces --sort-by=.metadata.creationTimestamp > logs/events.txt + for namespace in kubeflow istio-system cert-manager auth kubeflow-user-example-com test-profile-1 test-profile-2; do + if kubectl get namespace $namespace >/dev/null 2>&1; then + kubectl describe pods -n $namespace > logs/$namespace-pods.txt + for pod in $(kubectl get pods -n $namespace -o jsonpath='{.items[*].metadata.name}'); do + kubectl logs -n $namespace $pod --tail=100 > logs/$namespace-$pod.txt 2>&1 || true + done + fi + done + + - name: Upload Diagnostic Logs + if: always() + uses: actions/upload-artifact@v4 + with: + name: kubeflow-test-logs + path: logs/ From 4b8610b34666159910e8517a181ea4d572856947 Mon Sep 17 00:00:00 2001 From: juliusvonkohout <45896133+juliusvonkohout@users.noreply.github.com> Date: Tue, 10 Jun 2025 18:39:27 +0200 Subject: [PATCH 03/16] add initial security tests. Signed-off-by: juliusvonkohout <45896133+juliusvonkohout@users.noreply.github.com> --- .../seaweedfs/PSS_baseline_enable.sh | 17 ++ .../seaweedfs/PSS_restricted_enable.sh | 15 ++ .../third-party/seaweedfs/pipeline_v1_test.py | 49 +++++ .../third-party/seaweedfs/pipeline_v2_test.py | 96 +++++++++ .../seaweedfs/port_forward_gateway.sh | 6 + .../swfs_namespace_isolation_test.sh | 197 ++++++++++++++++++ 6 files changed, 380 insertions(+) create mode 100644 manifests/kustomize/third-party/seaweedfs/PSS_baseline_enable.sh create mode 100644 manifests/kustomize/third-party/seaweedfs/PSS_restricted_enable.sh create mode 100644 manifests/kustomize/third-party/seaweedfs/pipeline_v1_test.py create mode 100644 manifests/kustomize/third-party/seaweedfs/pipeline_v2_test.py create mode 100644 manifests/kustomize/third-party/seaweedfs/port_forward_gateway.sh create mode 100644 manifests/kustomize/third-party/seaweedfs/swfs_namespace_isolation_test.sh diff --git a/manifests/kustomize/third-party/seaweedfs/PSS_baseline_enable.sh b/manifests/kustomize/third-party/seaweedfs/PSS_baseline_enable.sh new file mode 100644 index 00000000000..ed8165fa2ac --- /dev/null +++ b/manifests/kustomize/third-party/seaweedfs/PSS_baseline_enable.sh @@ -0,0 +1,17 @@ +#!/bin/bash +set -euo pipefail + +NAMESPACES=("istio-system" "auth" "cert-manager" "oauth2-proxy" "kubeflow" "knative-serving") + +for NAMESPACE in "${NAMESPACES[@]}"; do + if kubectl get namespace "$NAMESPACE" >/dev/null 2>&1; then + PATCH_OUTPUT=$(kubectl label namespace $NAMESPACE pod-security.kubernetes.io/enforce=baseline --overwrite 2>&1) + if echo "$PATCH_OUTPUT" | grep -q "violate the new PodSecurity"; then + echo "ERROR: PSS violation detected for namespace $NAMESPACE" + echo "$PATCH_OUTPUT" | grep -A 5 "violate the new PodSecurity" + exit 1 + else + echo "✅ Namespace '$NAMESPACE' labeled successfully." + fi + fi +done \ No newline at end of file diff --git a/manifests/kustomize/third-party/seaweedfs/PSS_restricted_enable.sh b/manifests/kustomize/third-party/seaweedfs/PSS_restricted_enable.sh new file mode 100644 index 00000000000..166659ced74 --- /dev/null +++ b/manifests/kustomize/third-party/seaweedfs/PSS_restricted_enable.sh @@ -0,0 +1,15 @@ +#!/bin/bash +set -euo pipefail + +NAMESPACES=("istio-system" "auth" "cert-manager" "oauth2-proxy" "kubeflow" "knative-serving") +for NAMESPACE in "${NAMESPACES[@]}"; do + if kubectl get namespace "$NAMESPACE" >/dev/null 2>&1; then + PATCH_OUTPUT=$(kubectl label namespace $NAMESPACE pod-security.kubernetes.io/enforce=restricted --overwrite 2>&1) + if echo "$PATCH_OUTPUT" | grep -q "violate the new PodSecurity"; then + echo "WARNING: PSS violation detected for namespace $NAMESPACE" + echo "$PATCH_OUTPUT" | grep -A 5 "violate the new PodSecurity" + else + echo "✅ Namespace '$NAMESPACE' labeled successfully." + fi + fi +done \ No newline at end of file diff --git a/manifests/kustomize/third-party/seaweedfs/pipeline_v1_test.py b/manifests/kustomize/third-party/seaweedfs/pipeline_v1_test.py new file mode 100644 index 00000000000..4b9abbeedce --- /dev/null +++ b/manifests/kustomize/third-party/seaweedfs/pipeline_v1_test.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python3 + +import kfp +import sys +import time + +def hello_world_op(): + from kfp.components import func_to_container_op + + def hello_world(): + print("Hello World from Kubeflow Pipelines V1!") + return "Hello World" + + return func_to_container_op(hello_world) + +def hello_world_pipeline(): + hello_op = hello_world_op() + hello_op() + +def run_v1_pipeline(token, namespace): + client = kfp.Client(host="http://localhost:8080/pipeline", existing_token=token) + + experiment = client.create_experiment("v1-pipeline-test", namespace=namespace) + + pipeline_run = client.create_run_from_pipeline_func( + hello_world_pipeline, + experiment_name=experiment.name, + run_name="v1-hello-world", + namespace=namespace, + arguments={} + ) + + for iteration in range(15): + pipeline_status = client.get_run(pipeline_run.run_id).run.status + + if pipeline_status == "Succeeded": + return + elif pipeline_status not in ["Running", "Pending"]: + sys.exit(1) + + time.sleep(10) + + sys.exit(1) + +if __name__ == "__main__": + if len(sys.argv) != 3: + sys.exit(1) + + run_v1_pipeline(sys.argv[1], sys.argv[2]) \ No newline at end of file diff --git a/manifests/kustomize/third-party/seaweedfs/pipeline_v2_test.py b/manifests/kustomize/third-party/seaweedfs/pipeline_v2_test.py new file mode 100644 index 00000000000..9a1c7461f3b --- /dev/null +++ b/manifests/kustomize/third-party/seaweedfs/pipeline_v2_test.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python3 + +import kfp +import sys +import time +from kfp import dsl +from kfp_server_api.exceptions import ApiException + + +@dsl.component +def hello_world_op() -> str: + print("Hello World from Kubeflow Pipelines V2!") + return "Hello World" + + +@dsl.pipeline( + name="hello-world-v2", + description="A very simple hello world pipeline" +) +def hello_world_pipeline(): + hello_world_op() + + +def run_pipeline(token, namespace): + client = kfp.Client(host="http://localhost:8080/pipeline", existing_token=token) + + try: + pipelines = client.list_pipelines() + print(f"Successfully connected to KFP server, found {len(pipelines.pipelines)} pipelines") + + experiment = client.create_experiment("v2-pipeline-test", namespace=namespace) + print(f"Created experiment: v2-pipeline-test in namespace {namespace}") + + run = client.create_run_from_pipeline_func( + pipeline_func=hello_world_pipeline, + experiment_name="v2-pipeline-test", + run_name="v2-test-run", + arguments={}, + namespace=namespace + ) + + run_id = run.run_id + + for _ in range(30): + status = client.get_run(run_id=run_id).state + + if status == "SUCCEEDED": + return + elif status not in ["PENDING", "RUNNING"]: + print(f"Pipeline failed with status: {status}") + + pods = client._get_k8s_client().list_namespaced_pod( + namespace=namespace, + label_selector=f"pipeline/runid={run_id}" + ) + + print(f"Found {len(pods.items)} pods for this run") + for pod in pods.items: + print(f"Pod {pod.metadata.name}: {pod.status.phase}") + + sys.exit(1) + + time.sleep(10) + + sys.exit(1) + + except Exception as exception: + print(f"Error in pipeline execution: {exception}") + sys.exit(1) + + +def test_unauthorized_access(token, namespace): + client = kfp.Client(host="http://localhost:8080/pipeline", existing_token=token) + + try: + pipeline = client.list_runs(namespace=namespace) + sys.exit(1) + except ApiException as exception: + if exception.status != 403: + sys.exit(1) + + +if __name__ == "__main__": + if len(sys.argv) < 3: + sys.exit(1) + + action = sys.argv[1] + token = sys.argv[2] + namespace = sys.argv[3] + + if action == "run_pipeline": + run_pipeline(token, namespace) + elif action == "test_unauthorized_access": + test_unauthorized_access(token, namespace) + else: + sys.exit(1) \ No newline at end of file diff --git a/manifests/kustomize/third-party/seaweedfs/port_forward_gateway.sh b/manifests/kustomize/third-party/seaweedfs/port_forward_gateway.sh new file mode 100644 index 00000000000..f12e25bd7cf --- /dev/null +++ b/manifests/kustomize/third-party/seaweedfs/port_forward_gateway.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -euxo pipefail + +GATEWAY_SERVICE=$(kubectl get svc -n istio-system -l app=istio-ingressgateway -o jsonpath='{.items[0].metadata.name}') +nohup kubectl port-forward -n istio-system svc/$GATEWAY_SERVICE 8080:80 & +timeout 60s bash -c 'until curl -s localhost:8080 > /dev/null || curl -s -I localhost:8080 | grep -q "HTTP/"; do sleep 5; done' \ No newline at end of file diff --git a/manifests/kustomize/third-party/seaweedfs/swfs_namespace_isolation_test.sh b/manifests/kustomize/third-party/seaweedfs/swfs_namespace_isolation_test.sh new file mode 100644 index 00000000000..03b0af0d285 --- /dev/null +++ b/manifests/kustomize/third-party/seaweedfs/swfs_namespace_isolation_test.sh @@ -0,0 +1,197 @@ +#!/bin/bash +set -euxo pipefail + +echo "SeaweedFS Security Test - Unauthorized Access Check" +echo "Testing if one namespace can access files from another namespace" + +# Check dependencies +for cmd in kubectl python3; do + if ! command -v $cmd &> /dev/null; then + echo "Error: $cmd is required but not installed" + exit 1 + fi +done + +# Install boto3 if not available +if ! python3 -c "import boto3" 2>/dev/null; then + echo "Installing boto3..." + pip3 install boto3 +fi + +PORT_FORWARD_PID="" +# Cleanup function +cleanup() { + echo "Cleaning up..." + if [ -n "$PORT_FORWARD_PID" ]; then + kill $PORT_FORWARD_PID 2>/dev/null || true + fi + rm -f test-file.txt accessed-file.txt + kubectl delete profile test-profile-1 test-profile-2 --ignore-not-found +} +trap cleanup EXIT + +# Create test profiles +create_profiles() { + echo "Creating test profiles..." + + # Create both profiles + kubectl apply -f - </dev/null 2>&1; then + echo "Namespaces created" + return 0 + fi + sleep 10 + done + + echo "Error: Namespaces not created" + exit 1 +} + +# Wait for S3 credentials +wait_for_credentials() { + local namespace=$1 + echo "Waiting for S3 credentials in $namespace..." + + for i in {1..6}; do + if kubectl get secret -n $namespace mlpipeline-minio-artifact >/dev/null 2>&1; then + echo "Credentials found" + return 0 + fi + sleep 10 + done + + echo "Error: No credentials found" + return 1 +} + +# Get credentials for namespace +get_credentials() { + local namespace=$1 + local access_key=$(kubectl get secret -n $namespace mlpipeline-minio-artifact -o jsonpath='{.data.accesskey}' | base64 -d) + local secret_key=$(kubectl get secret -n $namespace mlpipeline-minio-artifact -o jsonpath='{.data.secretkey}' | base64 -d) + echo "$access_key:$secret_key" +} + +# Setup port forward to SeaweedFS +setup_port_forward() { + if [ -n "$PORT_FORWARD_PID" ]; then + return 0 # Already running + fi + + echo "Setting up port-forward..." + local pod=$(kubectl get pod -n kubeflow -l app=seaweedfs -o jsonpath='{.items[0].metadata.name}') + kubectl port-forward -n kubeflow pod/$pod 8333:8333 >/dev/null 2>&1 & + PORT_FORWARD_PID=$! + sleep 3 +} + +# Upload test file +upload_file() { + local namespace=$1 + echo "Uploading test file to $namespace..." + + local credentials=$(get_credentials $namespace) + local access_key=$(echo $credentials | cut -d: -f1) + local secret_key=$(echo $credentials | cut -d: -f2) + + setup_port_forward + + python3 tests/s3_helper_test.py upload \ + --access-key "$access_key" \ + --secret-key "$secret_key" \ + --endpoint-url "http://localhost:8333" \ + --bucket "mlpipeline" \ + --key "private-artifacts/$namespace/test-file.txt" \ + --content "Test file for $namespace" +} + +# Test unauthorized access +test_unauthorized_access() { + local from_namespace=$1 + local target_namespace=$2 + + echo "Testing unauthorized access from $from_namespace to $target_namespace..." + + local credentials=$(get_credentials $from_namespace) + local access_key=$(echo $credentials | cut -d: -f1) + local secret_key=$(echo $credentials | cut -d: -f2) + + setup_port_forward + + # Try to access the other namespace's file + # Note: Python script returns 0 when access is denied (good), 1 when access succeeds (bad) + if python3 tests/s3_helper_test.py download \ + --access-key "$access_key" \ + --secret-key "$secret_key" \ + --endpoint-url "http://localhost:8333" \ + --bucket "mlpipeline" \ + --key "private-artifacts/$target_namespace/test-file.txt"; then + + echo "Security OK: Access denied as expected" + return 0 + else + echo "SECURITY ISSUE: Unauthorized access successful!" + return 1 + fi +} + +# Main test function +main() { + echo "Starting security test..." + + # Create test profiles + create_profiles + + # Wait for credentials to be created + echo "Waiting for profile controller to create credentials..." + sleep 30 + + wait_for_credentials "test-profile-1" || { + echo "Failed to get credentials for test-profile-1" + exit 1 + } + + wait_for_credentials "test-profile-2" || { + echo "Failed to get credentials for test-profile-2" + exit 1 + } + + # Upload file to first namespace + upload_file "test-profile-1" || { + echo "Failed to upload file" + exit 1 + } + + # Test unauthorized access + if test_unauthorized_access "test-profile-2" "test-profile-1"; then + echo "SECURITY TEST PASSED: No unauthorized access detected" + else + echo "SECURITY TEST FAILED: Unauthorized access detected" + echo "This indicates a security vulnerability in the SeaweedFS setup" + exit 1 + fi +} + +main From ee6938a22c96f595420b94daf517ecf5a323a94c Mon Sep 17 00:00:00 2001 From: Julius von Kohout <45896133+juliusvonkohout@users.noreply.github.com> Date: Mon, 16 Jun 2025 08:43:56 +0200 Subject: [PATCH 04/16] Update seaweedfs-create-admin-user-job.yaml Signed-off-by: Julius von Kohout <45896133+juliusvonkohout@users.noreply.github.com> --- .../base/seaweedfs/seaweedfs-create-admin-user-job.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-create-admin-user-job.yaml b/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-create-admin-user-job.yaml index f542cc5cb9d..eac8ed91a6d 100644 --- a/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-create-admin-user-job.yaml +++ b/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-create-admin-user-job.yaml @@ -63,6 +63,4 @@ spec: capabilities: drop: - ALL - add: - - NET_BIND_SERVICE serviceAccountName: seaweedfs From e5b5bc79601a7501af34717b18c6dc6f7a7053bd Mon Sep 17 00:00:00 2001 From: Julius von Kohout <45896133+juliusvonkohout@users.noreply.github.com> Date: Mon, 16 Jun 2025 08:44:15 +0200 Subject: [PATCH 05/16] Update seaweedfs-deployment.yaml Signed-off-by: Julius von Kohout <45896133+juliusvonkohout@users.noreply.github.com> --- .../seaweedfs/base/seaweedfs/seaweedfs-deployment.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-deployment.yaml b/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-deployment.yaml index 99cee8b4c4f..9a466bf8c57 100644 --- a/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-deployment.yaml +++ b/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-deployment.yaml @@ -56,8 +56,6 @@ spec: capabilities: drop: - ALL - add: - - NET_BIND_SERVICE volumeMounts: - mountPath: /data name: data From b347e58a84befed11853e385a50bdd203023eba0 Mon Sep 17 00:00:00 2001 From: Harshvir Potpose Date: Thu, 3 Jul 2025 19:18:21 +0530 Subject: [PATCH 06/16] chore: Replace minio with seaweedfs (#11987) * replace minio with seaweedfs Signed-off-by: Harshvir Potpose * fix Signed-off-by: Harshvir Potpose * add tests Signed-off-by: Harshvir Potpose * fix Signed-off-by: Harshvir Potpose * Handel Completed Kubernetes Jobs Signed-off-by: Harshvir Potpose * update test Signed-off-by: Harshvir Potpose * rm unused files Signed-off-by: Harshvir Potpose * update deployment Signed-off-by: Harshvir Potpose * update test Signed-off-by: Harshvir Potpose * rm env/azure Signed-off-by: Harshvir Potpose * fix Signed-off-by: Harshvir Potpose * add TTL configuration for SeaweedFS Signed-off-by: Harshvir Potpose * fix sdk execution tests Signed-off-by: Harshvir Potpose * fix sample-test Signed-off-by: Harshvir Potpose * fix Signed-off-by: Harshvir Potpose * update client Signed-off-by: Harshvir Potpose * add env vars Signed-off-by: Harshvir Potpose * fix package installation Signed-off-by: Harshvir Potpose * add lifecycle poilicy Signed-off-by: Harshvir Potpose * fix chache test Signed-off-by: Harshvir Potpose * revert client changes Signed-off-by: Harshvir Potpose * add all necessary internal services to NO_PROXY Signed-off-by: Harshvir Potpose * fix cache test Signed-off-by: Harshvir Potpose * update workflow configmap Signed-off-by: Harshvir Potpose * revert Signed-off-by: Harshvir Potpose * increase timeout Signed-off-by: Harshvir Potpose * fix no space left issue Signed-off-by: Harshvir Potpose * fix Signed-off-by: Harshvir Potpose * add ci space cleanup Signed-off-by: Harshvir Potpose * update seaweedfs test Signed-off-by: Harshvir Potpose * Update e2e-seaweedfs-test.yml Signed-off-by: Julius von Kohout <45896133+juliusvonkohout@users.noreply.github.com> * Update e2e-test.yml Signed-off-by: Julius von Kohout <45896133+juliusvonkohout@users.noreply.github.com> * Update sdk-execution.yml Signed-off-by: Julius von Kohout <45896133+juliusvonkohout@users.noreply.github.com> * Update hello-world.yaml Signed-off-by: Julius von Kohout <45896133+juliusvonkohout@users.noreply.github.com> * Update env-var.yaml Signed-off-by: Julius von Kohout <45896133+juliusvonkohout@users.noreply.github.com> * Update e2e-seaweedfs-test.yml Signed-off-by: Julius von Kohout <45896133+juliusvonkohout@users.noreply.github.com> * fix Signed-off-by: Harshvir Potpose * revert Signed-off-by: Harshvir Potpose * Update e2e-test.yml Signed-off-by: Julius von Kohout <45896133+juliusvonkohout@users.noreply.github.com> * revert tests changes Signed-off-by: Harshvir Potpose * fix Signed-off-by: Harshvir Potpose * Update workflow-disable-logs-patch.yaml Signed-off-by: Julius von Kohout <45896133+juliusvonkohout@users.noreply.github.com> * Update workflow-controller-configmap-patch.yaml Signed-off-by: Julius von Kohout <45896133+juliusvonkohout@users.noreply.github.com> * remove env/aws Signed-off-by: Harshvir Potpose * Update workflow-disable-logs-patch.yaml Signed-off-by: Julius von Kohout <45896133+juliusvonkohout@users.noreply.github.com> * Update proxy-env.yaml Signed-off-by: Julius von Kohout <45896133+juliusvonkohout@users.noreply.github.com> * Update e2e-seaweedfs-test.yml Signed-off-by: Julius von Kohout <45896133+juliusvonkohout@users.noreply.github.com> * Update kfp-samples.yml Signed-off-by: Julius von Kohout <45896133+juliusvonkohout@users.noreply.github.com> * Update sdk-execution.yml Signed-off-by: Julius von Kohout <45896133+juliusvonkohout@users.noreply.github.com> * Update sdk-execution.yml Signed-off-by: Julius von Kohout <45896133+juliusvonkohout@users.noreply.github.com> * Update sdk-execution.yml Signed-off-by: Julius von Kohout <45896133+juliusvonkohout@users.noreply.github.com> * Update ml-pipeline-apiserver-deployment.yaml Signed-off-by: Julius von Kohout <45896133+juliusvonkohout@users.noreply.github.com> * Update seaweedfs-deployment.yaml Signed-off-by: Julius von Kohout <45896133+juliusvonkohout@users.noreply.github.com> * Update seaweedfs-deployment.yaml Signed-off-by: Julius von Kohout <45896133+juliusvonkohout@users.noreply.github.com> * Update seaweedfs-pvc.yaml Signed-off-by: Julius von Kohout <45896133+juliusvonkohout@users.noreply.github.com> * fix Signed-off-by: Harshvir Potpose * fix seaweedfs test Signed-off-by: Harshvir Potpose * fix seaweedfs test Signed-off-by: Harshvir Potpose * update busybox image Signed-off-by: Harshvir Potpose * add kubeflwo-edit cluster-role Signed-off-by: Harshvir Potpose * Fix e2e proxy test Signed-off-by: Harshvir Potpose * add debug Signed-off-by: Harshvir Potpose * fix cluster role Signed-off-by: Harshvir Potpose * DRY disk cleanup Signed-off-by: Harshvir Potpose * fix Signed-off-by: Harshvir Potpose * fix Signed-off-by: Harshvir Potpose --------- Signed-off-by: Harshvir Potpose Signed-off-by: Julius von Kohout <45896133+juliusvonkohout@users.noreply.github.com> Co-authored-by: Julius von Kohout <45896133+juliusvonkohout@users.noreply.github.com> --- .../no-proxy/workflow-disable-logs-patch.yaml | 2 +- .../argo/overlays/proxy/proxy-env.yaml | 2 +- .github/resources/scripts/free-disk-space.sh | 49 +++ .../scripts/kfp-readiness/wait_for_pods.py | 17 +- .github/workflows/e2e-seaweedfs-test.yml | 144 +++++++ .github/workflows/e2e-test.yml | 15 + .github/workflows/kfp-samples.yml | 60 +-- .github/workflows/sdk-execution.yml | 15 +- backend/Dockerfile | 2 +- .../deployment.yaml | 32 +- .../pipelines-profile-controller/sync.py | 365 ++++++------------ .../pipeline/ml-pipeline-ui-deployment.yaml | 2 + manifests/kustomize/env/aws/README.md | 56 --- .../aws/aws-configuration-pipeline-patch.yaml | 32 -- .../aws-configuration-pipeline-ui-patch.yaml | 27 -- manifests/kustomize/env/aws/config | 20 - .../kustomize/env/aws/kustomization.yaml | 39 -- .../env/aws/minio-artifact-secret-patch.env | 2 - manifests/kustomize/env/aws/params.env | 5 - manifests/kustomize/env/aws/secret.env | 2 - .../env/aws/viewer-pod-template.json | 37 -- .../kustomize/env/azure/kustomization.yaml | 29 -- .../minio-azure-gateway/kustomization.yaml | 14 - .../minio-artifact-secret.env | 2 - .../minio-azure-gateway-deployment.yaml | 40 -- .../minio-azure-gateway-service.yaml | 11 - .../kustomize/env/azure/mysql-secret.env | 2 - manifests/kustomize/env/azure/params.env | 1 - manifests/kustomize/env/azure/readme.md | 15 - .../env/plain-multi-user/kustomization.yaml | 3 +- .../kustomize/env/plain/kustomization.yaml | 2 +- .../kustomization.yaml | 3 +- .../kustomization.yaml | 3 +- .../kustomization.yaml | 2 +- .../env/platform-agnostic/kustomization.yaml | 2 +- manifests/kustomize/hack/test.sh | 2 - .../workflow-controller-configmap-patch.yaml | 3 +- .../third-party/minio/base/kustomization.yaml | 9 - .../minio/base/minio-deployment.yaml | 63 --- .../third-party/minio/base/minio-pvc.yaml | 10 - .../third-party/minio/base/minio-service.yaml | 12 - .../istio/istio-authorization-policy.yaml | 30 -- .../minio/options/istio/kustomization.yaml | 5 - .../workflow-controller-configmap-patch.yaml | 24 -- .../seaweedfs/base/kustomization.yaml | 24 -- .../deployment.yaml | 43 --- .../base/pipeline-profile-controller/sync.py | 243 ------------ .../base/seaweedfs/kustomization.yaml | 2 + .../minio-service.yaml} | 3 + .../mlpipeline-minio-artifact-secret.yaml | 0 .../base/seaweedfs/seaweedfs-deployment.yaml | 5 +- .../seaweedfs/pipeline_swfs_test.yaml | 144 ------- .../allow-user-namespace-access.yaml | 18 + test/seaweedfs/kubeflow-edit-clusterrole.yaml | 46 +++ .../seaweedfs/namespace_isolation_test.sh | 6 +- test/seaweedfs/s3_helper.py | 95 +++++ test/seaweedfs/test-profiles.yaml | 8 + .../seaweedfs/test_pipeline_v1_seaweedfs.py | 12 +- .../seaweedfs/test_pipeline_v2_seaweedfs.py | 6 +- 59 files changed, 601 insertions(+), 1266 deletions(-) create mode 100755 .github/resources/scripts/free-disk-space.sh create mode 100644 .github/workflows/e2e-seaweedfs-test.yml delete mode 100644 manifests/kustomize/env/aws/README.md delete mode 100644 manifests/kustomize/env/aws/aws-configuration-pipeline-patch.yaml delete mode 100644 manifests/kustomize/env/aws/aws-configuration-pipeline-ui-patch.yaml delete mode 100644 manifests/kustomize/env/aws/config delete mode 100644 manifests/kustomize/env/aws/kustomization.yaml delete mode 100644 manifests/kustomize/env/aws/minio-artifact-secret-patch.env delete mode 100644 manifests/kustomize/env/aws/params.env delete mode 100644 manifests/kustomize/env/aws/secret.env delete mode 100644 manifests/kustomize/env/aws/viewer-pod-template.json delete mode 100644 manifests/kustomize/env/azure/kustomization.yaml delete mode 100644 manifests/kustomize/env/azure/minio-azure-gateway/kustomization.yaml delete mode 100644 manifests/kustomize/env/azure/minio-azure-gateway/minio-artifact-secret.env delete mode 100644 manifests/kustomize/env/azure/minio-azure-gateway/minio-azure-gateway-deployment.yaml delete mode 100644 manifests/kustomize/env/azure/minio-azure-gateway/minio-azure-gateway-service.yaml delete mode 100644 manifests/kustomize/env/azure/mysql-secret.env delete mode 100644 manifests/kustomize/env/azure/params.env delete mode 100644 manifests/kustomize/env/azure/readme.md delete mode 100644 manifests/kustomize/third-party/minio/base/kustomization.yaml delete mode 100644 manifests/kustomize/third-party/minio/base/minio-deployment.yaml delete mode 100644 manifests/kustomize/third-party/minio/base/minio-pvc.yaml delete mode 100644 manifests/kustomize/third-party/minio/base/minio-service.yaml delete mode 100644 manifests/kustomize/third-party/minio/options/istio/istio-authorization-policy.yaml delete mode 100644 manifests/kustomize/third-party/minio/options/istio/kustomization.yaml delete mode 100644 manifests/kustomize/third-party/seaweedfs/base/argo-workflow-controller/workflow-controller-configmap-patch.yaml delete mode 100644 manifests/kustomize/third-party/seaweedfs/base/pipeline-profile-controller/deployment.yaml delete mode 100644 manifests/kustomize/third-party/seaweedfs/base/pipeline-profile-controller/sync.py rename manifests/kustomize/third-party/seaweedfs/base/{minio-service-patch.yaml => seaweedfs/minio-service.yaml} (75%) rename manifests/kustomize/third-party/{minio/base => seaweedfs/base/seaweedfs}/mlpipeline-minio-artifact-secret.yaml (100%) delete mode 100644 manifests/kustomize/third-party/seaweedfs/pipeline_swfs_test.yaml create mode 100644 test/seaweedfs/allow-user-namespace-access.yaml create mode 100644 test/seaweedfs/kubeflow-edit-clusterrole.yaml rename manifests/kustomize/third-party/seaweedfs/swfs_namespace_isolation_test.sh => test/seaweedfs/namespace_isolation_test.sh (97%) mode change 100644 => 100755 create mode 100755 test/seaweedfs/s3_helper.py create mode 100644 test/seaweedfs/test-profiles.yaml rename manifests/kustomize/third-party/seaweedfs/pipeline_v1_test.py => test/seaweedfs/test_pipeline_v1_seaweedfs.py (66%) mode change 100644 => 100755 rename manifests/kustomize/third-party/seaweedfs/pipeline_v2_test.py => test/seaweedfs/test_pipeline_v2_seaweedfs.py (93%) mode change 100644 => 100755 diff --git a/.github/resources/manifests/argo/overlays/no-proxy/workflow-disable-logs-patch.yaml b/.github/resources/manifests/argo/overlays/no-proxy/workflow-disable-logs-patch.yaml index 623bbe9621f..109296e3d10 100644 --- a/.github/resources/manifests/argo/overlays/no-proxy/workflow-disable-logs-patch.yaml +++ b/.github/resources/manifests/argo/overlays/no-proxy/workflow-disable-logs-patch.yaml @@ -8,7 +8,7 @@ data: s3: endpoint: "minio-service.$(kfp-namespace):9000" bucket: "$(kfp-artifact-bucket-name)" - keyFormat: "artifacts/{{workflow.name}}/{{workflow.creationTimestamp.Y}}/{{workflow.creationTimestamp.m}}/{{workflow.creationTimestamp.d}}/{{pod.name}}" + keyFormat: "private-artifacts/{{workflow.namespace}}/{{workflow.name}}/{{workflow.creationTimestamp.Y}}/{{workflow.creationTimestamp.m}}/{{workflow.creationTimestamp.d}}/{{pod.name}}" insecure: true accessKeySecret: name: mlpipeline-minio-artifact diff --git a/.github/resources/manifests/argo/overlays/proxy/proxy-env.yaml b/.github/resources/manifests/argo/overlays/proxy/proxy-env.yaml index 70e03d05e27..234122b6954 100644 --- a/.github/resources/manifests/argo/overlays/proxy/proxy-env.yaml +++ b/.github/resources/manifests/argo/overlays/proxy/proxy-env.yaml @@ -13,4 +13,4 @@ spec: - name: HTTPS_PROXY value: "http://squid.squid.svc.cluster.local:3128" - name: NO_PROXY - value: "localhost,127.0.0.1,.svc.cluster.local,kubernetes.default.svc,metadata-grpc-service,0,1,2,3,4,5,6,7,8,9" + value: "localhost,127.0.0.1,.svc.cluster.local,kubernetes.default.svc,metadata-grpc-service,10.0.0.0/8,0,1,2,3,4,5,6,7,8,9" diff --git a/.github/resources/scripts/free-disk-space.sh b/.github/resources/scripts/free-disk-space.sh new file mode 100755 index 00000000000..e546bf14547 --- /dev/null +++ b/.github/resources/scripts/free-disk-space.sh @@ -0,0 +1,49 @@ +#!/bin/bash +set -euo pipefail + +# This script frees up disk space on GitHub Actions runners. +# Several GHA workflows were failing with "no space left on device" errors. +# This script is only meant to run in GitHub Actions CI environment. + +# Safety check: Only run on GitHub Actions +if [[ "${GITHUB_ACTIONS:-false}" != "true" ]]; then + echo "ERROR: This script is for GitHub Actions runners only!" + exit 1 +fi + +echo "=== Initial disk usage ===" +df -h + +echo "=== Freeing up disk space ===" + +# Remove large directories not needed for KFP tests +sudo rm -rf /usr/share/dotnet +sudo rm -rf /opt/ghc +sudo rm -rf /usr/local/share/boost +sudo rm -rf /usr/local/lib/android +sudo rm -rf /usr/local/.ghcup +sudo rm -rf /usr/share/swift + +# Selectively remove large tools from hostedtoolcache while preserving Go, Node, Python +# Remove these specific large tools that aren't needed for KFP tests +sudo rm -rf /opt/hostedtoolcache/CodeQL || true +sudo rm -rf /opt/hostedtoolcache/Java_* || true +sudo rm -rf /opt/hostedtoolcache/Ruby || true +sudo rm -rf /opt/hostedtoolcache/PyPy || true +sudo rm -rf /opt/hostedtoolcache/boost || true + +# Clean package manager +sudo apt-get autoremove -y +sudo apt-get autoclean + +# Clean Docker +docker system prune -af --volumes +docker image prune -af + +# Clean containerd +sudo systemctl stop containerd || true +sudo rm -rf /var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots/* || true +sudo systemctl start containerd || true + +echo "=== Final disk usage ===" +df -h \ No newline at end of file diff --git a/.github/resources/scripts/kfp-readiness/wait_for_pods.py b/.github/resources/scripts/kfp-readiness/wait_for_pods.py index 3519d7337f5..198d5c67530 100644 --- a/.github/resources/scripts/kfp-readiness/wait_for_pods.py +++ b/.github/resources/scripts/kfp-readiness/wait_for_pods.py @@ -49,8 +49,17 @@ def get_pod_statuses(): def all_pods_ready(statuses): - return all(pod_status == 'Running' and ready == total - for pod_status, ready, total, _ in statuses.values()) + def is_pod_ready(pod_name, pod_status, ready, total, waiting_messages): + # Jobs/CronJobs are ready when they succeed + if pod_status == 'Succeeded': + return True + # Regular pods are ready when running and all containers are ready + if pod_status == 'Running' and ready == total: + return True + return False + + return all(is_pod_ready(pod_name, pod_status, ready, total, waiting_messages) + for pod_name, (pod_status, ready, total, waiting_messages) in statuses.items()) def print_get_pods(): @@ -107,7 +116,9 @@ def check_pods(calm_time=10, timeout=600, retries_after_ready=5): logging.info("Final pod statuses:") for pod_name, (pod_status, ready, total, _) in previous_statuses.items(): - if pod_status == 'Running' and ready == total: + if pod_status == 'Succeeded': + logging.info(f"Pod {pod_name} completed successfully (Job/CronJob)") + elif pod_status == 'Running' and ready == total: logging.info(f"Pod {pod_name} is fully ready ({ready}/{total})") else: logging.info(f"Pod {pod_name} is not ready (Status: {pod_status}, Ready: {ready}/{total})") diff --git a/.github/workflows/e2e-seaweedfs-test.yml b/.github/workflows/e2e-seaweedfs-test.yml new file mode 100644 index 00000000000..d438965a569 --- /dev/null +++ b/.github/workflows/e2e-seaweedfs-test.yml @@ -0,0 +1,144 @@ +name: KFP SeaweedFS Integration Tests + +on: + workflow_dispatch: + pull_request: + paths: + - '.github/workflows/e2e-seaweedfs-test.yml' + - 'manifests/kustomize/third-party/seaweedfs/**' + - 'test/seaweedfs/**' + - 'manifests/kustomize/base/installs/multi-user/pipelines-profile-controller' + - '!**/*.md' + +jobs: + seaweedfs-integration-tests: + runs-on: ubuntu-latest + strategy: + matrix: + k8s_version: [ "v1.29.2", "v1.31.0" ] + name: SeaweedFS Integration Tests - K8s ${{ matrix.k8s_version }} + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: 3.9 + + - name: Create KFP cluster with SeaweedFS + id: create-kfp-cluster + uses: ./.github/actions/kfp-cluster + with: + k8s_version: ${{ matrix.k8s_version }} + + - name: Free up disk space for CI + run: ./.github/resources/scripts/free-disk-space.sh + + - name: Install istio + id: install-istio + if: ${{ steps.create-kfp-cluster.outcome == 'success' }} + run: | + kubectl apply -k https://github.com/kubeflow/manifests//common/istio/istio-crds/base?ref=master + kubectl apply -k https://github.com/kubeflow/manifests//common/istio/istio-namespace/base?ref=master + kubectl apply -k https://github.com/kubeflow/manifests//common/istio/istio-install/base?ref=master + echo "Waiting for all Istio Pods to become ready..." + kubectl wait --for=condition=Ready pods --all -n istio-system --timeout 300s + + - name: Deploy Platform Agnostic Multi-User + id: deploy-platform-agnostic-multi-user + if: ${{ steps.install-istio.outcome == 'success' }} + run: | + kubectl apply -f manifests/kustomize/third-party/metacontroller/base/crd.yaml + kubectl apply --force -k manifests/kustomize/env/platform-agnostic-multi-user + echo "Waiting for Pods to be ready..." + kubectl wait --for=condition=Ready pods --all --namespace kubeflow --timeout=300s --field-selector=status.phase!=Succeeded || true + + - name: Install Profile Controller Resources + id: install-profile-resources + if: ${{ steps.deploy-platform-agnostic-multi-user.outcome == 'success' }} + run: | + echo "Installing Profile Controller resources..." + kubectl apply -k https://github.com/kubeflow/manifests/applications/profiles/upstream/overlays/kubeflow?ref=master + # Wait for profile controller to be ready + kubectl -n kubeflow wait --for=condition=Ready pods -l kustomize.component=profiles --timeout 180s || true + echo "Profile Controller resources installed successfully" + + - name: ClusterRole for User Permissions + id: fix-clusterrole + if: ${{ steps.install-profile-resources.outcome == 'success' }} + run: | + echo "Applying kubeflow-edit ClusterRole with proper aggregation..." + kubectl apply -f test/seaweedfs/kubeflow-edit-clusterrole.yaml + + - name: Create KF Profile + id: create-kf-profile + if: ${{ steps.fix-clusterrole.outcome == 'success' }} + run: | + echo "Creating KF Profile..." + kubectl apply -f test/seaweedfs/test-profiles.yaml + echo "KF Profile created successfully" + + - name: ApplyNetwork Policy for Cross-Namespace Access + id: fix-network-policy + if: ${{ steps.create-kf-profile.outcome == 'success' }} + run: | + echo "Applying network policy to allow user namespace access to kubeflow services..." + kubectl apply -f test/seaweedfs/allow-user-namespace-access.yaml + + - name: Verify Pipeline Integration + id: verify-pipeline-integration + if: ${{ steps.fix-network-policy.outcome == 'success' }} + run: | + KF_PROFILE=kubeflow-user-example-com + if ! kubectl get secret mlpipeline-minio-artifact -n $KF_PROFILE > /dev/null 2>&1; then + echo "Error: Secret mlpipeline-minio-artifact not found in namespace $KF_PROFILE" + fi + kubectl get secret mlpipeline-minio-artifact -n "$KF_PROFILE" -o json | jq -r '.data | keys[] as $k | "\($k): \(. | .[$k] | @base64d)"' | tr '\n' ' ' + + - name: Forward API port + id: forward-api-port + if: ${{ steps.verify-pipeline-integration.outcome == 'success' }} + run: | + echo "Starting port-forward to ml-pipeline service..." + kubectl port-forward svc/ml-pipeline -n kubeflow 8080:8888 & + + - name: Test Pipeline V1 API with SeaweedFS + id: test-v1-api + if: ${{ steps.forward-api-port.outcome == 'success' }} + run: | + pip3 install "kfp>=1.8.22,<2.0.0" + KF_PROFILE=kubeflow-user-example-com + TOKEN="$(kubectl -n $KF_PROFILE create token default-editor --audience=pipelines.kubeflow.org)" + python3 test/seaweedfs/test_pipeline_v1_seaweedfs.py "$TOKEN" "$KF_PROFILE" + continue-on-error: true + + - name: Test Pipeline V2 API with SeaweedFS + id: test-v2-api + if: ${{ steps.test-v1-api.outcome == 'success' }} + run: | + pip3 install kfp==2.13.0 + KF_PROFILE=kubeflow-user-example-com + TOKEN="$(kubectl -n $KF_PROFILE create token default-editor --audience=pipelines.kubeflow.org)" + python3 test/seaweedfs/test_pipeline_v2_seaweedfs.py run_pipeline "$TOKEN" "$KF_PROFILE" + continue-on-error: true + + - name: Test SeaweedFS Namespace Isolation + id: test-namespace-isolation + if: ${{ steps.test-v2-api.outcome == 'success' }} + run: ./test/seaweedfs/namespace_isolation_test.sh + continue-on-error: true + + - name: Collect failed logs + if: ${{ steps.create-kfp-cluster.outcome != 'success' || steps.install-istio.outcome != 'success' || steps.install-profile-resources.outcome != 'success' || steps.fix-clusterrole.outcome != 'success' || steps.fix-network-policy.outcome != 'success' || steps.verify-pipeline-integration.outcome != 'success' || steps.forward-api-port.outcome != 'success' || steps.test-v1-api.outcome != 'success' || steps.test-v2-api.outcome != 'success' || steps.test-namespace-isolation.outcome != 'success' }} + run: | + ./.github/resources/scripts/collect-logs.sh --ns kubeflow --output /tmp/tmp_kubeflow_pod_log.txt + ./.github/resources/scripts/collect-logs.sh --ns kubeflow-user-example-com --output /tmp/tmp_user_pod_log.txt + exit 1 + + - name: Collect test results + if: always() + uses: actions/upload-artifact@v4 + with: + name: kfp-seaweedfs-tests-artifacts-k8s-${{ matrix.k8s_version }} + path: /tmp/tmp*/* diff --git a/.github/workflows/e2e-test.yml b/.github/workflows/e2e-test.yml index 54c3f2c3b66..ee9b41d1334 100644 --- a/.github/workflows/e2e-test.yml +++ b/.github/workflows/e2e-test.yml @@ -202,6 +202,9 @@ jobs: steps: - name: Checkout code uses: actions/checkout@v5 + + - name: Free up disk space + run: ./.github/resources/scripts/free-disk-space.sh - name: Set up Python uses: actions/setup-python@v4 @@ -219,6 +222,9 @@ jobs: image_registry: ${{ needs.build.outputs.IMAGE_REGISTRY }} continue-on-error: true + - name: Free up disk space + run: ./.github/resources/scripts/free-disk-space.sh + - name: Forward API port id: forward-api-port if: ${{ steps.create-kfp-cluster.outcome == 'success' }} @@ -265,6 +271,9 @@ jobs: - name: Checkout code uses: actions/checkout@v5 + - name: Free up disk space + run: ./.github/resources/scripts/free-disk-space.sh + - name: Set up Python uses: actions/setup-python@v4 with: @@ -327,6 +336,9 @@ jobs: - name: Checkout code uses: actions/checkout@v5 + - name: Free up disk space + run: ./.github/resources/scripts/free-disk-space.sh + - name: Set up Python uses: actions/setup-python@v4 with: @@ -449,6 +461,9 @@ jobs: - name: Checkout code uses: actions/checkout@v5 + - name: Free up disk space + run: ./.github/resources/scripts/free-disk-space.sh + - name: Set up Python uses: actions/setup-python@v4 with: diff --git a/.github/workflows/kfp-samples.yml b/.github/workflows/kfp-samples.yml index 6b72cc38893..8de79c148fc 100644 --- a/.github/workflows/kfp-samples.yml +++ b/.github/workflows/kfp-samples.yml @@ -32,32 +32,42 @@ jobs: - name: Checkout code uses: actions/checkout@v5 - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: 3.9 + - name: Free up disk space + run: ./.github/resources/scripts/free-disk-space.sh - - name: Free up space in /dev/root - run: | - echo "Disk usage before clean up:" - df -h - sudo rm -rf /usr/share/dotnet - sudo rm -rf "$AGENT_TOOLSDIRECTORY" - sudo rm -rf /usr/share/dotnet - sudo rm -rf /opt/ghc - sudo rm -rf /usr/local/share/boost - sudo rm -rf /usr/local/lib/android - sudo rm -rf /usr/local/.ghcup - sudo rm -rf /usr/share/swift - sudo rm -rf /opt/hostedtoolcache/CodeQL || true - sudo rm -rf /opt/hostedtoolcache/Java_* || true - sudo rm -rf /opt/hostedtoolcache/Ruby || true - sudo rm -rf /opt/hostedtoolcache/PyPy || true - sudo rm -rf /opt/hostedtoolcache/boost || true - docker system prune -af --volumes - docker image prune -af - echo "Disk usage after clean up:" - df -h + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: 3.9 + + - name: apt-get update + run: sudo apt-get update + + - name: Install protobuf-compiler + run: sudo apt-get install protobuf-compiler -y + + - name: Install setuptools + run: | + pip3 install setuptools + pip3 freeze + + - name: Install Wheel + run: pip3 install wheel==0.42.0 + + - name: Install protobuf + run: pip3 install protobuf==4.25.3 + + - name: Generate API proto files + working-directory: ./api + run: make python + + - name: Install kfp-pipeline-spec from source + run: | + python3 -m pip install api/v2alpha1/python + + - name: Generate, Build, and Install Kubernetes API proto files & packages + working-directory: ./kubernetes_platform + run: make python && pip install python/dist/*.whl - name: Create KFP cluster id: create-kfp-cluster diff --git a/.github/workflows/sdk-execution.yml b/.github/workflows/sdk-execution.yml index 2e13bae7208..54d192da6bb 100644 --- a/.github/workflows/sdk-execution.yml +++ b/.github/workflows/sdk-execution.yml @@ -29,18 +29,9 @@ jobs: - name: Checkout code uses: actions/checkout@v5 - # This is intended to address disk space issues that have surfaced - # intermittently during CI - - # https://github.com/actions/runner-images/issues/2840#issuecomment-1284059930 - - name: Free up space in /dev/root - run: | - echo "Disk usage before clean up:" - df -h - sudo rm -rf /usr/share/dotnet - sudo rm -rf "$AGENT_TOOLSDIRECTORY" - echo "Disk usage after clean up:" - df -h - + - name: Free up disk space + run: ./.github/resources/scripts/free-disk-space.sh + # This must occur after "Free up space" step # otherwise python version will be overridden - name: Set up Python diff --git a/backend/Dockerfile b/backend/Dockerfile index f93fd74a34f..b7f277ff197 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -51,7 +51,7 @@ RUN set -e; \ < /samples/sample_config.json jq ".pipelines[].file" --raw-output | while read pipeline_yaml; do \ pipeline_py="${pipeline_yaml%.yaml}"; \ echo "Compiling: \"$pipeline_py\"" && python3 "$pipeline_py" && echo -n "Output: " && ls "$pipeline_py.yaml"; \ - done +done # 3. Start api web server FROM debian:stable diff --git a/manifests/kustomize/base/installs/multi-user/pipelines-profile-controller/deployment.yaml b/manifests/kustomize/base/installs/multi-user/pipelines-profile-controller/deployment.yaml index faa73ccb336..2ff27235839 100644 --- a/manifests/kustomize/base/installs/multi-user/pipelines-profile-controller/deployment.yaml +++ b/manifests/kustomize/base/installs/multi-user/pipelines-profile-controller/deployment.yaml @@ -9,19 +9,21 @@ spec: labels: sidecar.istio.io/inject: "false" spec: + securityContext: + seccompProfile: + type: RuntimeDefault containers: - name: profile-controller securityContext: allowPrivilegeEscalation: false - seccompProfile: - type: RuntimeDefault runAsNonRoot: true runAsUser: 1000 runAsGroup: 0 capabilities: drop: - ALL - image: public.ecr.aws/docker/library/python:3.12 + # We just need an image with the python botocore library installed + image: docker.io/alpine/k8s:1.32.3 command: ["python", "/hooks/sync.py"] envFrom: - configMapRef: @@ -32,22 +34,26 @@ spec: configMapKeyRef: name: pipeline-install-config key: appVersion - - name: KFP_DEFAULT_PIPELINE_ROOT - valueFrom: - configMapKeyRef: - optional: true - name: pipeline-install-config - key: defaultPipelineRoot - - name: MINIO_ACCESS_KEY + - name: AWS_ENDPOINT_URL + value: http://seaweedfs:8111 + - name: AWS_REGION + value: us-east-1 + - name: AWS_ACCESS_KEY_ID valueFrom: secretKeyRef: - name: mlpipeline-minio-artifact key: accesskey - - name: MINIO_SECRET_KEY + name: mlpipeline-minio-artifact + - name: AWS_SECRET_ACCESS_KEY valueFrom: secretKeyRef: - name: mlpipeline-minio-artifact key: secretkey + name: mlpipeline-minio-artifact + - name: KFP_DEFAULT_PIPELINE_ROOT + valueFrom: + configMapKeyRef: + key: defaultPipelineRoot + name: pipeline-install-config + optional: true volumeMounts: - name: hooks mountPath: /hooks diff --git a/manifests/kustomize/base/installs/multi-user/pipelines-profile-controller/sync.py b/manifests/kustomize/base/installs/multi-user/pipelines-profile-controller/sync.py index 3d39e3187f9..554554a0254 100644 --- a/manifests/kustomize/base/installs/multi-user/pipelines-profile-controller/sync.py +++ b/manifests/kustomize/base/installs/multi-user/pipelines-profile-controller/sync.py @@ -17,6 +17,17 @@ import os import base64 +# From awscli installed in alpine/k8s image +import botocore.session + +S3_BUCKET_NAME = 'mlpipeline' + +session = botocore.session.get_session() +# To interact with seaweedfs user management. Region does not matter. +iam = session.create_client('iam', region_name='foobar') +# S3 client for lifecycle policy management +s3 = session.create_client('s3', region_name='foobar') + def main(): settings = get_settings_from_env() @@ -26,8 +37,7 @@ def main(): def get_settings_from_env(controller_port=None, visualization_server_image=None, frontend_image=None, - visualization_server_tag=None, frontend_tag=None, disable_istio_sidecar=None, - minio_access_key=None, minio_secret_key=None, kfp_default_pipeline_root=None): + visualization_server_tag=None, frontend_tag=None, disable_istio_sidecar=None): """ Returns a dict of settings from environment variables relevant to the controller @@ -74,62 +84,53 @@ def get_settings_from_env(controller_port=None, disable_istio_sidecar if disable_istio_sidecar is not None \ else os.environ.get("DISABLE_ISTIO_SIDECAR") == "true" - settings["minio_access_key"] = \ - minio_access_key or \ - base64.b64encode(bytes(os.environ.get("MINIO_ACCESS_KEY"), 'utf-8')).decode('utf-8') - - settings["minio_secret_key"] = \ - minio_secret_key or \ - base64.b64encode(bytes(os.environ.get("MINIO_SECRET_KEY"), 'utf-8')).decode('utf-8') - - # KFP_DEFAULT_PIPELINE_ROOT is optional - settings["kfp_default_pipeline_root"] = \ - kfp_default_pipeline_root or \ - os.environ.get("KFP_DEFAULT_PIPELINE_ROOT") - return settings def server_factory(visualization_server_image, visualization_server_tag, frontend_image, frontend_tag, - disable_istio_sidecar, minio_access_key, - minio_secret_key, kfp_default_pipeline_root=None, - url="", controller_port=8080): + disable_istio_sidecar, url="", controller_port=8080): """ Returns an HTTPServer populated with Handler with customized settings """ class Controller(BaseHTTPRequestHandler): + def upsert_lifecycle_policy(self, bucket_name): + """Configure TTL lifecycle policy for SeaweedFS using S3 API""" + lfc = { + "Rules": [ + { + "Status": "Enabled", + "Filter": {"Prefix": "private-artifacts/"}, + "Expiration": {"Days": 183}, + "ID": "private-artifacts", + }, + ] + } + print('upsert_lifecycle_policy:', lfc) + try: + api_response = s3.put_bucket_lifecycle_configuration( + Bucket=bucket_name, + LifecycleConfiguration=lfc + ) + print('Lifecycle policy configured successfully:', api_response) + except Exception as e: + print(f'Warning: Failed to configure lifecycle policy: {e}') + def sync(self, parent, attachments): # parent is a namespace namespace = parent.get("metadata", {}).get("name") + pipeline_enabled = parent.get("metadata", {}).get( "labels", {}).get("pipelines.kubeflow.org/enabled") if pipeline_enabled != "true": return {"status": {}, "attachments": []} - desired_configmap_count = 1 - desired_resources = [] - if kfp_default_pipeline_root: - desired_configmap_count = 2 - desired_resources += [{ - "apiVersion": "v1", - "kind": "ConfigMap", - "metadata": { - "name": "kfp-launcher", - "namespace": namespace, - }, - "data": { - "defaultPipelineRoot": kfp_default_pipeline_root, - }, - }] - - # Compute status based on observed state. desired_status = { "kubeflow-pipelines-ready": len(attachments["Secret.v1"]) == 1 and - len(attachments["ConfigMap.v1"]) == desired_configmap_count and + len(attachments["ConfigMap.v1"]) == 3 and len(attachments["Deployment.apps/v1"]) == 2 and len(attachments["Service.v1"]) == 2 and len(attachments["DestinationRule.networking.istio.io/v1alpha3"]) == 1 and @@ -138,242 +139,116 @@ def sync(self, parent, attachments): } # Generate the desired attachment object(s). - desired_resources += [ + desired_resources = [ { "apiVersion": "v1", "kind": "ConfigMap", "metadata": { - "name": "metadata-grpc-configmap", + "name": "kfp-launcher", "namespace": namespace, }, "data": { - "METADATA_GRPC_SERVICE_HOST": - "metadata-grpc-service.kubeflow", - "METADATA_GRPC_SERVICE_PORT": "8080", + "defaultPipelineRoot": f"minio://{S3_BUCKET_NAME}/private-artifacts/{namespace}/v2/artifacts", }, }, - # Visualization server related manifests below - { - "apiVersion": "apps/v1", - "kind": "Deployment", - "metadata": { - "labels": { - "app": "ml-pipeline-visualizationserver" - }, - "name": "ml-pipeline-visualizationserver", - "namespace": namespace, - }, - "spec": { - "selector": { - "matchLabels": { - "app": "ml-pipeline-visualizationserver" - }, - }, - "template": { - "metadata": { - "labels": { - "app": "ml-pipeline-visualizationserver" - }, - "annotations": disable_istio_sidecar and { - "sidecar.istio.io/inject": "false" - } or {}, - }, - "spec": { - "containers": [{ - "image": f"{visualization_server_image}:{visualization_server_tag}", - "imagePullPolicy": - "IfNotPresent", - "name": - "ml-pipeline-visualizationserver", - "ports": [{ - "containerPort": 8888 - }], - "resources": { - "requests": { - "cpu": "50m", - "memory": "200Mi" - }, - "limits": { - "cpu": "500m", - "memory": "1Gi" - }, - } - }], - "serviceAccountName": - "default-editor", - }, - }, - }, - }, - { - "apiVersion": "networking.istio.io/v1alpha3", - "kind": "DestinationRule", - "metadata": { - "name": "ml-pipeline-visualizationserver", - "namespace": namespace, - }, - "spec": { - "host": "ml-pipeline-visualizationserver", - "trafficPolicy": { - "tls": { - "mode": "ISTIO_MUTUAL" - } - } - } - }, - { - "apiVersion": "security.istio.io/v1beta1", - "kind": "AuthorizationPolicy", - "metadata": { - "name": "ml-pipeline-visualizationserver", - "namespace": namespace, - }, - "spec": { - "selector": { - "matchLabels": { - "app": "ml-pipeline-visualizationserver" - } - }, - "rules": [{ - "from": [{ - "source": { - "principals": ["cluster.local/ns/kubeflow/sa/ml-pipeline"] - } - }] - }] - } - }, { "apiVersion": "v1", - "kind": "Service", + "kind": "ConfigMap", "metadata": { - "name": "ml-pipeline-visualizationserver", + "name": "metadata-grpc-configmap", "namespace": namespace, }, - "spec": { - "ports": [{ - "name": "http", - "port": 8888, - "protocol": "TCP", - "targetPort": 8888, - }], - "selector": { - "app": "ml-pipeline-visualizationserver", - }, - }, - }, - # Artifact fetcher related resources below. - { - "apiVersion": "apps/v1", - "kind": "Deployment", - "metadata": { - "labels": { - "app": "ml-pipeline-ui-artifact" - }, - "name": "ml-pipeline-ui-artifact", - "namespace": namespace, + "data": { + "METADATA_GRPC_SERVICE_HOST": + "metadata-grpc-service.kubeflow", + "METADATA_GRPC_SERVICE_PORT": "8080", }, - "spec": { - "selector": { - "matchLabels": { - "app": "ml-pipeline-ui-artifact" - } - }, - "template": { - "metadata": { - "labels": { - "app": "ml-pipeline-ui-artifact" - }, - "annotations": disable_istio_sidecar and { - "sidecar.istio.io/inject": "false" - } or {}, - }, - "spec": { - "containers": [{ - "name": - "ml-pipeline-ui-artifact", - "image": f"{frontend_image}:{frontend_tag}", - "imagePullPolicy": - "IfNotPresent", - "ports": [{ - "containerPort": 3000 - }], - "env": [ - { - "name": "MINIO_ACCESS_KEY", - "valueFrom": { - "secretKeyRef": { - "key": "accesskey", - "name": "mlpipeline-minio-artifact" - } - } - }, - { - "name": "MINIO_SECRET_KEY", - "valueFrom": { - "secretKeyRef": { - "key": "secretkey", - "name": "mlpipeline-minio-artifact" - } - } - } - ], - "resources": { - "requests": { - "cpu": "10m", - "memory": "70Mi" - }, - "limits": { - "cpu": "100m", - "memory": "500Mi" - }, - } - }], - "serviceAccountName": - "default-editor" - } - } - } }, { "apiVersion": "v1", - "kind": "Service", + "kind": "ConfigMap", "metadata": { - "name": "ml-pipeline-ui-artifact", + "name": "artifact-repositories", "namespace": namespace, - "labels": { - "app": "ml-pipeline-ui-artifact" + "annotations": { + "workflows.argoproj.io/default-artifact-repository": "default-namespaced" } }, - "spec": { - "ports": [{ - "name": - "http", # name is required to let istio understand request protocol - "port": 80, - "protocol": "TCP", - "targetPort": 3000 - }], - "selector": { - "app": "ml-pipeline-ui-artifact" - } + "data": { + "default-namespaced": json.dumps({ + "archiveLogs": True, + "s3": { + "endpoint": "minio-service.kubeflow:9000", + "bucket": S3_BUCKET_NAME, + "keyFormat": f"private-artifacts/{namespace}/{{{{workflow.name}}}}/{{{{workflow.creationTimestamp.Y}}}}/{{{{workflow.creationTimestamp.m}}}}/{{{{workflow.creationTimestamp.d}}}}/{{{{pod.name}}}}", + "insecure": True, + "accessKeySecret": { + "name": "mlpipeline-minio-artifact", + "key": "accesskey", + }, + "secretKeySecret": { + "name": "mlpipeline-minio-artifact", + "key": "secretkey", + } + } + }) } }, ] print('Received request:\n', json.dumps(parent, sort_keys=True)) print('Desired resources except secrets:\n', json.dumps(desired_resources, sort_keys=True)) + # Moved after the print argument because this is sensitive data. - desired_resources.append({ - "apiVersion": "v1", - "kind": "Secret", - "metadata": { - "name": "mlpipeline-minio-artifact", - "namespace": namespace, - }, - "data": { - "accesskey": minio_access_key, - "secretkey": minio_secret_key, - }, - }) + + # Check if secret is already there when the controller made the request. If yes, then + # use it. Else create a new credentials on seaweedfs for the namespace. + if s3_secret := attachments["Secret.v1"].get(f"{namespace}/mlpipeline-minio-artifact"): + desired_resources.append(s3_secret) + print('Using existing secret') + else: + print('Creating new access key.') + s3_access_key = iam.create_access_key(UserName=namespace) + # Use the AWS IAM API of seaweedfs to manage access policies to bucket. + # This policy ensures that a user can only access artifacts from his own profile. + iam.put_user_policy( + UserName=namespace, + PolicyName=f"KubeflowProject{namespace}", + PolicyDocument=json.dumps( + { + "Version": "2012-10-17", + "Statement": [{ + "Effect": "Allow", + "Action": [ + "s3:Put*", + "s3:Get*", + "s3:List*" + ], + "Resource": [ + f"arn:aws:s3:::{S3_BUCKET_NAME}/artifacts/*", + f"arn:aws:s3:::{S3_BUCKET_NAME}/private-artifacts/{namespace}/*", + f"arn:aws:s3:::{S3_BUCKET_NAME}/private/{namespace}/*", + f"arn:aws:s3:::{S3_BUCKET_NAME}/shared/*", + ] + }] + }) + ) + + self.upsert_lifecycle_policy(S3_BUCKET_NAME) + + desired_resources.insert( + 0, + { + "apiVersion": "v1", + "kind": "Secret", + "metadata": { + "name": "mlpipeline-minio-artifact", + "namespace": namespace, + }, + "data": { + "accesskey": base64.b64encode(s3_access_key["AccessKey"]["AccessKeyId"].encode('utf-8')).decode("utf-8"), + "secretkey": base64.b64encode(s3_access_key["AccessKey"]["SecretAccessKey"].encode('utf-8')).decode("utf-8"), + }, + }) return {"status": desired_status, "attachments": desired_resources} @@ -392,4 +267,4 @@ def do_POST(self): if __name__ == "__main__": - main() + main() \ No newline at end of file diff --git a/manifests/kustomize/base/pipeline/ml-pipeline-ui-deployment.yaml b/manifests/kustomize/base/pipeline/ml-pipeline-ui-deployment.yaml index 1b0b4af9547..94bd550cac0 100644 --- a/manifests/kustomize/base/pipeline/ml-pipeline-ui-deployment.yaml +++ b/manifests/kustomize/base/pipeline/ml-pipeline-ui-deployment.yaml @@ -67,6 +67,8 @@ spec: value: "true" - name: DISABLE_GKE_METADATA value: "true" + - name: ARTIFACTS_SERVICE_PROXY_ENABLED + value: 'false' readinessProbe: httpGet: path: /apis/v1beta1/healthz diff --git a/manifests/kustomize/env/aws/README.md b/manifests/kustomize/env/aws/README.md deleted file mode 100644 index 978adf1f745..00000000000 --- a/manifests/kustomize/env/aws/README.md +++ /dev/null @@ -1,56 +0,0 @@ -# Sample installation - -1. Create an EKS cluster - -Run this command to create EKS cluster -``` -eksctl create cluster \ ---name AWS-KFP \ ---version 1.17 \ ---region us-west-2 \ ---nodegroup-name linux-nodes \ ---node-type m5.xlarge \ ---nodes 2 \ ---nodes-min 1 \ ---nodes-max 4 \ ---managed -``` - -2. Prepare S3 - -Create S3 bucket. [Console](https://console.aws.amazon.com/s3/home). - -Run this command to create S3 bucket by changing `` to your prefer s3 bucket name. - -``` -export S3_BUCKET= -export AWS_REGION=us-west-2 -aws s3 mb s3://$S3_BUCKET --region $AWS_REGION -``` - -3. Prepare RDS - -Follow this [doc](https://awslabs.github.io/kubeflow-manifests/docs/deployment/rds-s3/guide/) to set up AWS RDS instance. - -4. Customize your values -- Edit [params.env](params.env), [secret.env](secret.env) and [minio-artifact-secret-patch.env](minio-artifact-secret-patch.env) - -5. Install - -``` -kustomize build ../../cluster-scoped-resources | kubectl apply -f - -# If upper one action got failed, e.x. you used wrong value, try delete, fix and apply again -# kubectl delete -k ../../cluster-scoped-resources - -kubectl wait crd/applications.app.k8s.io --for condition=established --timeout=60s - -kustomize build ./ | kubectl apply -f - -# If upper one action got failed, e.x. you used wrong value, try delete, fix and apply again -# kubectl delete -k ./ - -kubectl wait applications/pipeline -n kubeflow --for condition=Ready --timeout=1800s - -kubectl port-forward -n kubeflow svc/ml-pipeline-ui 8080:80 -``` - -Now you can access via `localhost:8080` diff --git a/manifests/kustomize/env/aws/aws-configuration-pipeline-patch.yaml b/manifests/kustomize/env/aws/aws-configuration-pipeline-patch.yaml deleted file mode 100644 index 598d8c4d213..00000000000 --- a/manifests/kustomize/env/aws/aws-configuration-pipeline-patch.yaml +++ /dev/null @@ -1,32 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: ml-pipeline -spec: - template: - metadata: - labels: - app: ml-pipeline - spec: - containers: - - env: - - name: OBJECTSTORECONFIG_SECURE - value: "true" - - name: OBJECTSTORECONFIG_BUCKETNAME - valueFrom: - configMapKeyRef: - name: pipeline-install-config - key: bucketName - - name: OBJECTSTORECONFIG_HOST - valueFrom: - configMapKeyRef: - name: pipeline-install-config - key: minioServiceHost - - name: OBJECTSTORECONFIG_REGION - valueFrom: - configMapKeyRef: - name: pipeline-install-config - key: minioServiceRegion - - name: OBJECTSTORECONFIG_PORT - value: "" - name: ml-pipeline-api-server diff --git a/manifests/kustomize/env/aws/aws-configuration-pipeline-ui-patch.yaml b/manifests/kustomize/env/aws/aws-configuration-pipeline-ui-patch.yaml deleted file mode 100644 index 2a4de3838e1..00000000000 --- a/manifests/kustomize/env/aws/aws-configuration-pipeline-ui-patch.yaml +++ /dev/null @@ -1,27 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: ml-pipeline-ui -spec: - template: - metadata: - labels: - app: ml-pipeline-ui - spec: - volumes: - - name: config-volume - configMap: - name: ml-pipeline-ui-configmap - containers: - - name: ml-pipeline-ui - env: - - name: AWS_ACCESS_KEY_ID - valueFrom: - secretKeyRef: - name: mlpipeline-minio-artifact - key: accesskey - - name: AWS_SECRET_ACCESS_KEY - valueFrom: - secretKeyRef: - name: mlpipeline-minio-artifact - key: secretkey diff --git a/manifests/kustomize/env/aws/config b/manifests/kustomize/env/aws/config deleted file mode 100644 index ebf05538dc9..00000000000 --- a/manifests/kustomize/env/aws/config +++ /dev/null @@ -1,20 +0,0 @@ -{ -artifactRepository: -{ - s3: { - bucket: $(kfp-artifact-bucket-name), - keyPrefix: artifacts, - endpoint: s3.amazonaws.com, - insecure: true, - accessKeySecret: { - name: mlpipeline-minio-artifact, - key: accesskey - }, - secretKeySecret: { - name: mlpipeline-minio-artifact, - key: secretkey - } - }, - archiveLogs: true -} -} diff --git a/manifests/kustomize/env/aws/kustomization.yaml b/manifests/kustomize/env/aws/kustomization.yaml deleted file mode 100644 index 93a5bc5e8c1..00000000000 --- a/manifests/kustomize/env/aws/kustomization.yaml +++ /dev/null @@ -1,39 +0,0 @@ -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization -namespace: kubeflow -resources: -- ../../env/platform-agnostic -configMapGenerator: -- behavior: merge - envs: - - params.env - name: pipeline-install-config -- behavior: replace - files: - - config - name: workflow-controller-configmap -- behavior: replace - files: - - viewer-pod-template.json - name: ml-pipeline-ui-configmap -secretGenerator: -- behavior: merge - envs: - - secret.env - name: mysql-secret -- behavior: merge - envs: - - minio-artifact-secret-patch.env - name: mlpipeline-minio-artifact -generatorOptions: - disableNameSuffixHash: true -patches: -- path: aws-configuration-pipeline-patch.yaml -- path: aws-configuration-pipeline-ui-patch.yaml -# Identifier for application manager to apply ownerReference. -# The ownerReference ensures the resources get garbage collected -# when application is deleted. -labels: -- includeSelectors: true - pairs: - application-crd-id: kubeflow-pipelines diff --git a/manifests/kustomize/env/aws/minio-artifact-secret-patch.env b/manifests/kustomize/env/aws/minio-artifact-secret-patch.env deleted file mode 100644 index 3f11b74138c..00000000000 --- a/manifests/kustomize/env/aws/minio-artifact-secret-patch.env +++ /dev/null @@ -1,2 +0,0 @@ -accesskey=YOUR_AWS_ACCESS_ID -secretkey=YOUR_AWS_SECRET_KEY diff --git a/manifests/kustomize/env/aws/params.env b/manifests/kustomize/env/aws/params.env deleted file mode 100644 index 30e966592ca..00000000000 --- a/manifests/kustomize/env/aws/params.env +++ /dev/null @@ -1,5 +0,0 @@ -dbHost=YOUR_RDS_ENDPOINT - -bucketName=YOUR_S3_BUCKET_NAME -minioServiceHost=s3.amazonaws.com -minioServiceRegion=YOUR_AWS_REGION diff --git a/manifests/kustomize/env/aws/secret.env b/manifests/kustomize/env/aws/secret.env deleted file mode 100644 index cdd7b0a5fc4..00000000000 --- a/manifests/kustomize/env/aws/secret.env +++ /dev/null @@ -1,2 +0,0 @@ -username=YOUR_RDS_USERNAME -password=YOUR_RDS_PASSWORD diff --git a/manifests/kustomize/env/aws/viewer-pod-template.json b/manifests/kustomize/env/aws/viewer-pod-template.json deleted file mode 100644 index 5cce566794e..00000000000 --- a/manifests/kustomize/env/aws/viewer-pod-template.json +++ /dev/null @@ -1,37 +0,0 @@ -{ - "spec": { - "containers": [ - { - "env": [ - { - "name": "AWS_ACCESS_KEY_ID", - "valueFrom": { - "secretKeyRef": { - "name": "mlpipeline-minio-artifact", - "key": "accesskey" - } - } - }, - { - "name": "AWS_SECRET_ACCESS_KEY", - "valueFrom": { - "secretKeyRef": { - "name": "mlpipeline-minio-artifact", - "key": "secretkey" - } - } - }, - { - "name": "AWS_REGION", - "valueFrom": { - "configMapKeyRef": { - "name": "pipeline-install-config", - "key": "minioServiceRegion" - } - } - } - ] - } - ] - } -} \ No newline at end of file diff --git a/manifests/kustomize/env/azure/kustomization.yaml b/manifests/kustomize/env/azure/kustomization.yaml deleted file mode 100644 index 75efe953bd0..00000000000 --- a/manifests/kustomize/env/azure/kustomization.yaml +++ /dev/null @@ -1,29 +0,0 @@ -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization -namespace: kubeflow - -resources: -- ../../base/installs/generic -- ../../base/metadata/base -- ../../third-party/argo/installs/namespace -- minio-azure-gateway - -configMapGenerator: -- behavior: merge - envs: - - params.env - name: pipeline-install-config - -secretGenerator: -- behavior: merge - envs: - - mysql-secret.env - name: mysql-secret - -# Identifier for application manager to apply ownerReference. -# The ownerReference ensures the resources get garbage collected -# when application is deleted. -labels: -- includeSelectors: true - pairs: - application-crd-id: kubeflow-pipelines diff --git a/manifests/kustomize/env/azure/minio-azure-gateway/kustomization.yaml b/manifests/kustomize/env/azure/minio-azure-gateway/kustomization.yaml deleted file mode 100644 index 65a0957e278..00000000000 --- a/manifests/kustomize/env/azure/minio-azure-gateway/kustomization.yaml +++ /dev/null @@ -1,14 +0,0 @@ -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization - -resources: -- minio-azure-gateway-deployment.yaml -- minio-azure-gateway-service.yaml - -secretGenerator: -- envs: - - minio-artifact-secret.env - name: mlpipeline-minio-artifact -generatorOptions: - # mlpipeline-minio-artifact needs to be referred by exact name - disableNameSuffixHash: true diff --git a/manifests/kustomize/env/azure/minio-azure-gateway/minio-artifact-secret.env b/manifests/kustomize/env/azure/minio-azure-gateway/minio-artifact-secret.env deleted file mode 100644 index 7d9d25d6f05..00000000000 --- a/manifests/kustomize/env/azure/minio-azure-gateway/minio-artifact-secret.env +++ /dev/null @@ -1,2 +0,0 @@ -accesskey=[STORAGEACCOUNTNAME] -secretkey=[STORAGEACCOUNTKEY] \ No newline at end of file diff --git a/manifests/kustomize/env/azure/minio-azure-gateway/minio-azure-gateway-deployment.yaml b/manifests/kustomize/env/azure/minio-azure-gateway/minio-azure-gateway-deployment.yaml deleted file mode 100644 index 56979c42e94..00000000000 --- a/manifests/kustomize/env/azure/minio-azure-gateway/minio-azure-gateway-deployment.yaml +++ /dev/null @@ -1,40 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: minio - labels: - app: minio -spec: - selector: - matchLabels: - app: minio - strategy: - type: Recreate - template: - metadata: - labels: - app: minio - spec: - containers: - - name: minio - image: gcr.io/ml-pipeline/minio:RELEASE.2019-08-14T20-37-41Z-license-compliance - args: - - gateway - - azure - env: - - name: MINIO_ACCESS_KEY - valueFrom: - secretKeyRef: - name: mlpipeline-minio-artifact - key: accesskey - - name: MINIO_SECRET_KEY - valueFrom: - secretKeyRef: - name: mlpipeline-minio-artifact - key: secretkey - ports: - - containerPort: 9000 - resources: - requests: - cpu: 20m - memory: 25Mi diff --git a/manifests/kustomize/env/azure/minio-azure-gateway/minio-azure-gateway-service.yaml b/manifests/kustomize/env/azure/minio-azure-gateway/minio-azure-gateway-service.yaml deleted file mode 100644 index 7dd18174965..00000000000 --- a/manifests/kustomize/env/azure/minio-azure-gateway/minio-azure-gateway-service.yaml +++ /dev/null @@ -1,11 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: minio-service -spec: - ports: - - port: 9000 - targetPort: 9000 - protocol: TCP - selector: - app: minio \ No newline at end of file diff --git a/manifests/kustomize/env/azure/mysql-secret.env b/manifests/kustomize/env/azure/mysql-secret.env deleted file mode 100644 index 93f6075227d..00000000000 --- a/manifests/kustomize/env/azure/mysql-secret.env +++ /dev/null @@ -1,2 +0,0 @@ -username=[SQLUSER]@[SQLSERVERNAME] -password=[SQLPASS] \ No newline at end of file diff --git a/manifests/kustomize/env/azure/params.env b/manifests/kustomize/env/azure/params.env deleted file mode 100644 index 44ed71784f1..00000000000 --- a/manifests/kustomize/env/azure/params.env +++ /dev/null @@ -1 +0,0 @@ -dbHost=[SQLSERVERNAME].mysql.database.azure.com diff --git a/manifests/kustomize/env/azure/readme.md b/manifests/kustomize/env/azure/readme.md deleted file mode 100644 index 5487d6aa02f..00000000000 --- a/manifests/kustomize/env/azure/readme.md +++ /dev/null @@ -1,15 +0,0 @@ -# KFP customizations for Azure - -This template provides a starting point to configure KFP to use an Azure hosted MySQL database, as well as an Azure Blob backed MinIO service. - -## MySQL - -1. [Create an Azure Database for MySQL](https://docs.microsoft.com/azure/mysql/quickstart-create-mysql-server-database-using-azure-portal). Ensure that it will allow connections from the Kubernetes cluster. - -2. Substitute the server name into [params.env](./params.env), and the username and password into [mysql-secret.env](./mysql-secret.env) - -## MinIO Gateway for Azure Blobstore - -1. [Create an Azure Storage account](https://docs.microsoft.com/azure/storage/common/storage-account-create). Ensure that it will allow connections from the Kubernetes cluster. - -2. Substitute the storage name and access key into [minio-artifact-secret.env](./minio-azure-gateway/minio-artifact-secret.env). diff --git a/manifests/kustomize/env/plain-multi-user/kustomization.yaml b/manifests/kustomize/env/plain-multi-user/kustomization.yaml index 8b6d13f5142..5f1016d0288 100644 --- a/manifests/kustomize/env/plain-multi-user/kustomization.yaml +++ b/manifests/kustomize/env/plain-multi-user/kustomization.yaml @@ -7,8 +7,7 @@ resources: - ../../base/metadata/options/istio - ../../third-party/mysql/base - ../../third-party/mysql/options/istio -- ../../third-party/minio/base -- ../../third-party/minio/options/istio +- ../../third-party/seaweedfs/istio - ../../third-party/metacontroller/base diff --git a/manifests/kustomize/env/plain/kustomization.yaml b/manifests/kustomize/env/plain/kustomization.yaml index a60ce3a8e7e..dcbc92514b3 100644 --- a/manifests/kustomize/env/plain/kustomization.yaml +++ b/manifests/kustomize/env/plain/kustomization.yaml @@ -4,7 +4,7 @@ kind: Kustomization resources: - ../../base/installs/generic - ../../base/metadata/base -- ../../third-party/minio/base +- ../../third-party/seaweedfs/base - ../../third-party/mysql/base # Identifier for application manager to apply ownerReference. diff --git a/manifests/kustomize/env/platform-agnostic-multi-user-legacy/kustomization.yaml b/manifests/kustomize/env/platform-agnostic-multi-user-legacy/kustomization.yaml index e600b13e732..b8deb06d39c 100644 --- a/manifests/kustomize/env/platform-agnostic-multi-user-legacy/kustomization.yaml +++ b/manifests/kustomize/env/platform-agnostic-multi-user-legacy/kustomization.yaml @@ -9,8 +9,7 @@ resources: - ../../third-party/argo/installs/cluster - ../../third-party/mysql/base - ../../third-party/mysql/options/istio -- ../../third-party/minio/base -- ../../third-party/minio/options/istio +- ../../third-party/seaweedfs/istio # !!! If you want to customize the namespace, diff --git a/manifests/kustomize/env/platform-agnostic-multi-user/kustomization.yaml b/manifests/kustomize/env/platform-agnostic-multi-user/kustomization.yaml index aa4d6c6670a..014f327cc2a 100644 --- a/manifests/kustomize/env/platform-agnostic-multi-user/kustomization.yaml +++ b/manifests/kustomize/env/platform-agnostic-multi-user/kustomization.yaml @@ -9,8 +9,7 @@ resources: - ../../third-party/argo/installs/cluster - ../../third-party/mysql/base - ../../third-party/mysql/options/istio -- ../../third-party/minio/base -- ../../third-party/minio/options/istio +- ../../third-party/seaweedfs/istio # !!! If you want to customize the namespace, diff --git a/manifests/kustomize/env/platform-agnostic-postgresql/kustomization.yaml b/manifests/kustomize/env/platform-agnostic-postgresql/kustomization.yaml index e6e4af38686..a456d1cd881 100644 --- a/manifests/kustomize/env/platform-agnostic-postgresql/kustomization.yaml +++ b/manifests/kustomize/env/platform-agnostic-postgresql/kustomization.yaml @@ -5,7 +5,7 @@ resources: - ../../base/installs/generic/postgres - ../../base/metadata/overlays/postgres - ../../third-party/argo/installs/namespace -- ../../third-party/minio/base +- ../../third-party/seaweedfs/base - ../../third-party/postgresql/base diff --git a/manifests/kustomize/env/platform-agnostic/kustomization.yaml b/manifests/kustomize/env/platform-agnostic/kustomization.yaml index 0a9d90b016e..e1e4cdd4779 100644 --- a/manifests/kustomize/env/platform-agnostic/kustomization.yaml +++ b/manifests/kustomize/env/platform-agnostic/kustomization.yaml @@ -5,7 +5,7 @@ resources: - ../../base/installs/generic - ../../base/metadata/base - ../../third-party/argo/installs/namespace -- ../../third-party/minio/base +- ../../third-party/seaweedfs/base - ../../third-party/mysql/base # Identifier for application manager to apply ownerReference. diff --git a/manifests/kustomize/hack/test.sh b/manifests/kustomize/hack/test.sh index 6df4293c585..532b907506e 100755 --- a/manifests/kustomize/hack/test.sh +++ b/manifests/kustomize/hack/test.sh @@ -35,8 +35,6 @@ kustomization_yamls=( "env/gcp" "env/platform-agnostic" "env/platform-agnostic-emissary" - "env/aws" - "env/azure" ) for path in "${kustomization_yamls[@]}" do diff --git a/manifests/kustomize/third-party/argo/base/workflow-controller-configmap-patch.yaml b/manifests/kustomize/third-party/argo/base/workflow-controller-configmap-patch.yaml index 47b52aa04a3..4cd84f880e4 100644 --- a/manifests/kustomize/third-party/argo/base/workflow-controller-configmap-patch.yaml +++ b/manifests/kustomize/third-party/argo/base/workflow-controller-configmap-patch.yaml @@ -25,8 +25,7 @@ data: # The following format looks like: # artifacts/my-workflow-abc123/2018/08/23/my-workflow-abc123-1234567890 # Adding date into the path greatly reduces the chance of {{pod.name}} collision. - keyFormat: "artifacts/{{workflow.name}}/{{workflow.creationTimestamp.Y}}/{{workflow.creationTimestamp.m}}/{{workflow.creationTimestamp.d}}/{{pod.name}}" - # insecure will disable TLS. Primarily used for minio installs not configured with TLS + keyFormat: "private-artifacts/{{workflow.namespace}}/{{workflow.name}}/{{workflow.creationTimestamp.Y}}/{{workflow.creationTimestamp.m}}/{{workflow.creationTimestamp.d}}/{{pod.name}}" insecure: true accessKeySecret: name: mlpipeline-minio-artifact diff --git a/manifests/kustomize/third-party/minio/base/kustomization.yaml b/manifests/kustomize/third-party/minio/base/kustomization.yaml deleted file mode 100644 index 5a9d5f37517..00000000000 --- a/manifests/kustomize/third-party/minio/base/kustomization.yaml +++ /dev/null @@ -1,9 +0,0 @@ -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization -namespace: kubeflow - -resources: -- minio-deployment.yaml -- minio-pvc.yaml -- minio-service.yaml -- mlpipeline-minio-artifact-secret.yaml diff --git a/manifests/kustomize/third-party/minio/base/minio-deployment.yaml b/manifests/kustomize/third-party/minio/base/minio-deployment.yaml deleted file mode 100644 index e42f4903d0a..00000000000 --- a/manifests/kustomize/third-party/minio/base/minio-deployment.yaml +++ /dev/null @@ -1,63 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: minio - labels: - app: minio -spec: - selector: - matchLabels: - app: minio - strategy: - type: Recreate - template: - metadata: - labels: - app: minio - spec: - securityContext: - fsGroup: 1000 - fsGroupChangePolicy: "OnRootMismatch" - seccompProfile: - type: RuntimeDefault - containers: - - args: - - server - - /data - env: - - name: MINIO_ACCESS_KEY - valueFrom: - secretKeyRef: - name: mlpipeline-minio-artifact - key: accesskey - - name: MINIO_SECRET_KEY - valueFrom: - secretKeyRef: - name: mlpipeline-minio-artifact - key: secretkey - image: gcr.io/ml-pipeline/minio:RELEASE.2019-08-14T20-37-41Z-license-compliance - name: minio - ports: - - containerPort: 9000 - securityContext: - allowPrivilegeEscalation: false - seccompProfile: - type: RuntimeDefault - runAsNonRoot: true - runAsUser: 1000 - runAsGroup: 0 - capabilities: - drop: - - ALL - volumeMounts: - - mountPath: /data - name: data - subPath: minio - resources: - requests: - cpu: 20m - memory: 100Mi - volumes: - - name: data - persistentVolumeClaim: - claimName: minio-pvc diff --git a/manifests/kustomize/third-party/minio/base/minio-pvc.yaml b/manifests/kustomize/third-party/minio/base/minio-pvc.yaml deleted file mode 100644 index ecfa32bbe8a..00000000000 --- a/manifests/kustomize/third-party/minio/base/minio-pvc.yaml +++ /dev/null @@ -1,10 +0,0 @@ -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: minio-pvc -spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 20Gi diff --git a/manifests/kustomize/third-party/minio/base/minio-service.yaml b/manifests/kustomize/third-party/minio/base/minio-service.yaml deleted file mode 100644 index 3ab42043017..00000000000 --- a/manifests/kustomize/third-party/minio/base/minio-service.yaml +++ /dev/null @@ -1,12 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: minio-service -spec: - ports: - - name: http - port: 9000 - protocol: TCP - targetPort: 9000 - selector: - app: minio diff --git a/manifests/kustomize/third-party/minio/options/istio/istio-authorization-policy.yaml b/manifests/kustomize/third-party/minio/options/istio/istio-authorization-policy.yaml deleted file mode 100644 index 7247cd34f96..00000000000 --- a/manifests/kustomize/third-party/minio/options/istio/istio-authorization-policy.yaml +++ /dev/null @@ -1,30 +0,0 @@ -apiVersion: security.istio.io/v1beta1 -kind: AuthorizationPolicy -metadata: - name: minio-service -spec: - action: ALLOW - selector: - matchLabels: - app: minio - rules: - - from: - - source: - principals: - - cluster.local/ns/kubeflow/sa/ml-pipeline - - from: - - source: - principals: - - cluster.local/ns/kubeflow/sa/ml-pipeline-ui - # Allow traffic from User Pipeline Pods, which don't have a sidecar. - - {} ---- -apiVersion: "networking.istio.io/v1alpha3" -kind: DestinationRule -metadata: - name: ml-pipeline-minio -spec: - host: minio-service.kubeflow.svc.cluster.local - trafficPolicy: - tls: - mode: ISTIO_MUTUAL diff --git a/manifests/kustomize/third-party/minio/options/istio/kustomization.yaml b/manifests/kustomize/third-party/minio/options/istio/kustomization.yaml deleted file mode 100644 index 611e399e57c..00000000000 --- a/manifests/kustomize/third-party/minio/options/istio/kustomization.yaml +++ /dev/null @@ -1,5 +0,0 @@ -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization - -resources: -- istio-authorization-policy.yaml diff --git a/manifests/kustomize/third-party/seaweedfs/base/argo-workflow-controller/workflow-controller-configmap-patch.yaml b/manifests/kustomize/third-party/seaweedfs/base/argo-workflow-controller/workflow-controller-configmap-patch.yaml deleted file mode 100644 index c96a9785234..00000000000 --- a/manifests/kustomize/third-party/seaweedfs/base/argo-workflow-controller/workflow-controller-configmap-patch.yaml +++ /dev/null @@ -1,24 +0,0 @@ -# based on https://github.com/kubeflow/manifests/blob/master/apps/pipeline/upstream/third-party/argo/base/workflow-controller-configmap-patch.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - name: workflow-controller-configmap - namespace: kubeflow -data: - # SeaweedFS configuration for V1 pipelines with namespace isolation - # SeaweedFS is exposed through minio-service for backward compatibility - artifactRepository: | - archiveLogs: true - s3: - endpoint: minio-service.kubeflow:9000 - bucket: mlpipeline - keyFormat: private-artifacts/{{workflow.namespace}}/{{workflow.name}}/{{workflow.creationTimestamp.Y}}/{{workflow.creationTimestamp.m}}/{{workflow.creationTimestamp.d}}/{{pod.name}} - insecure: true - accessKeySecret: - name: mlpipeline-minio-artifact - key: accesskey - secretKeySecret: - name: mlpipeline-minio-artifact - key: secretkey - executor: | - imagePullPolicy: IfNotPresent diff --git a/manifests/kustomize/third-party/seaweedfs/base/kustomization.yaml b/manifests/kustomize/third-party/seaweedfs/base/kustomization.yaml index 8afc5a523e0..9138ac90bb1 100644 --- a/manifests/kustomize/third-party/seaweedfs/base/kustomization.yaml +++ b/manifests/kustomize/third-party/seaweedfs/base/kustomization.yaml @@ -4,27 +4,3 @@ namespace: kubeflow resources: - seaweedfs/ -- ../../../apps/pipeline/upstream/env/cert-manager/platform-agnostic-multi-user -configMapGenerator: -- name: kubeflow-pipelines-profile-controller-code - behavior: replace - files: - - pipeline-profile-controller/sync.py -patches: -- path: minio-service-patch.yaml -- path: pipeline-profile-controller/deployment.yaml -- path: argo-workflow-controller/workflow-controller-configmap-patch.yaml -- patch: |- - apiVersion: apps/v1 - kind: Deployment - metadata: - name: ml-pipeline-ui - spec: - template: - spec: - containers: - - name: ml-pipeline-ui - env: - - name: ARTIFACTS_SERVICE_PROXY_ENABLED - value: 'false' - $patch: merge diff --git a/manifests/kustomize/third-party/seaweedfs/base/pipeline-profile-controller/deployment.yaml b/manifests/kustomize/third-party/seaweedfs/base/pipeline-profile-controller/deployment.yaml deleted file mode 100644 index b7c7ad08cff..00000000000 --- a/manifests/kustomize/third-party/seaweedfs/base/pipeline-profile-controller/deployment.yaml +++ /dev/null @@ -1,43 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: kubeflow-pipelines-profile-controller -spec: - template: - spec: - securityContext: - seccompProfile: - type: RuntimeDefault - containers: - - name: profile-controller - securityContext: - allowPrivilegeEscalation: false - runAsNonRoot: true - runAsUser: 1000 - runAsGroup: 0 - capabilities: - drop: - - ALL - # We just need an image with the python botocore library installed - image: docker.io/alpine/k8s:1.32.3 - command: ["python", "/hooks/sync.py"] - env: - - name: KFP_VERSION - valueFrom: - configMapKeyRef: - name: pipeline-install-config - key: appVersion - - name: AWS_ENDPOINT_URL - value: http://seaweedfs:8111 - - name: AWS_REGION - value: us-east-1 - - name: AWS_ACCESS_KEY_ID - valueFrom: - secretKeyRef: - name: mlpipeline-minio-artifact - key: accesskey - - name: AWS_SECRET_ACCESS_KEY - valueFrom: - secretKeyRef: - name: mlpipeline-minio-artifact - key: secretkey diff --git a/manifests/kustomize/third-party/seaweedfs/base/pipeline-profile-controller/sync.py b/manifests/kustomize/third-party/seaweedfs/base/pipeline-profile-controller/sync.py deleted file mode 100644 index bf8c7618fa8..00000000000 --- a/manifests/kustomize/third-party/seaweedfs/base/pipeline-profile-controller/sync.py +++ /dev/null @@ -1,243 +0,0 @@ -# Copyright 2020-2021 The Kubeflow Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from http.server import BaseHTTPRequestHandler, HTTPServer -import json -import os -import base64 - -# From awscli installed in alpine/k8s image -import botocore.session - -S3_BUCKET_NAME = 'mlpipeline' - -session = botocore.session.get_session() -# To interact with seaweedfs user management. Region does not matter. -iam = session.create_client('iam', region_name='foobar') - - -def main(): - settings = get_settings_from_env() - server = server_factory(**settings) - server.serve_forever() - - -def get_settings_from_env(controller_port=None, - visualization_server_image=None, frontend_image=None, - visualization_server_tag=None, frontend_tag=None, disable_istio_sidecar=None): - """ - Returns a dict of settings from environment variables relevant to the controller - - Environment settings can be overridden by passing them here as arguments. - - Settings are pulled from the all-caps version of the setting name. The - following defaults are used if those environment variables are not set - to enable backwards compatibility with previous versions of this script: - visualization_server_image: ghcr.io/kubeflow/kfp-visualization-server - visualization_server_tag: value of KFP_VERSION environment variable - frontend_image: ghcr.io/kubeflow/kfp-frontend - frontend_tag: value of KFP_VERSION environment variable - disable_istio_sidecar: Required (no default) - minio_access_key: Required (no default) - minio_secret_key: Required (no default) - """ - settings = dict() - settings["controller_port"] = \ - controller_port or \ - os.environ.get("CONTROLLER_PORT", "8080") - - settings["visualization_server_image"] = \ - visualization_server_image or \ - os.environ.get("VISUALIZATION_SERVER_IMAGE", "ghcr.io/kubeflow/kfp-visualization-server") - - settings["frontend_image"] = \ - frontend_image or \ - os.environ.get("FRONTEND_IMAGE", "ghcr.io/kubeflow/kfp-frontend") - - # Look for specific tags for each image first, falling back to - # previously used KFP_VERSION environment variable for backwards - # compatibility - settings["visualization_server_tag"] = \ - visualization_server_tag or \ - os.environ.get("VISUALIZATION_SERVER_TAG") or \ - os.environ["KFP_VERSION"] - - settings["frontend_tag"] = \ - frontend_tag or \ - os.environ.get("FRONTEND_TAG") or \ - os.environ["KFP_VERSION"] - - settings["disable_istio_sidecar"] = \ - disable_istio_sidecar if disable_istio_sidecar is not None \ - else os.environ.get("DISABLE_ISTIO_SIDECAR") == "true" - - return settings - - -def server_factory(visualization_server_image, - visualization_server_tag, frontend_image, frontend_tag, - disable_istio_sidecar, url="", controller_port=8080): - """ - Returns an HTTPServer populated with Handler with customized settings - """ - class Controller(BaseHTTPRequestHandler): - def sync(self, parent, attachments): - # parent is a namespace - namespace = parent.get("metadata", {}).get("name") - - pipeline_enabled = parent.get("metadata", {}).get( - "labels", {}).get("pipelines.kubeflow.org/enabled") - - if pipeline_enabled != "true": - return {"status": {}, "attachments": []} - - # Compute status based on observed state. - desired_status = { - "kubeflow-pipelines-ready": - len(attachments["Secret.v1"]) == 1 and - len(attachments["ConfigMap.v1"]) == 3 and - len(attachments["Deployment.apps/v1"]) == 2 and - len(attachments["Service.v1"]) == 2 and - len(attachments["DestinationRule.networking.istio.io/v1alpha3"]) == 1 and - len(attachments["AuthorizationPolicy.security.istio.io/v1beta1"]) == 1 and - "True" or "False" - } - - # Generate the desired attachment object(s). - desired_resources = [ - { - "apiVersion": "v1", - "kind": "ConfigMap", - "metadata": { - "name": "kfp-launcher", - "namespace": namespace, - }, - "data": { - "defaultPipelineRoot": f"minio://{S3_BUCKET_NAME}/private-artifacts/{namespace}/v2/artifacts", - }, - }, - { - "apiVersion": "v1", - "kind": "ConfigMap", - "metadata": { - "name": "metadata-grpc-configmap", - "namespace": namespace, - }, - "data": { - "METADATA_GRPC_SERVICE_HOST": - "metadata-grpc-service.kubeflow", - "METADATA_GRPC_SERVICE_PORT": "8080", - }, - }, - { - "apiVersion": "v1", - "kind": "ConfigMap", - "metadata": { - "name": "artifact-repositories", - "namespace": namespace, - "annotations": { - "workflows.argoproj.io/default-artifact-repository": "default-namespaced" - } - }, - "data": { - "default-namespaced": json.dumps({ - "archiveLogs": True, - "s3": { - "endpoint": "minio-service.kubeflow:9000", - "bucket": S3_BUCKET_NAME, - "keyFormat": f"private-artifacts/{namespace}/{{{{workflow.name}}}}/{{{{workflow.creationTimestamp.Y}}}}/{{{{workflow.creationTimestamp.m}}}}/{{{{workflow.creationTimestamp.d}}}}/{{{{pod.name}}}}", - "insecure": True, - "accessKeySecret": { - "name": "mlpipeline-minio-artifact", - "key": "accesskey", - }, - "secretKeySecret": { - "name": "mlpipeline-minio-artifact", - "key": "secretkey", - } - } - }) - } - }, - ] - print('Received request:\n', json.dumps(parent, sort_keys=True)) - print('Desired resources except secrets:\n', json.dumps(desired_resources, sort_keys=True)) - - # Moved after the print argument because this is sensitive data. - - # Check if secret is already there when the controller made the request. If yes, then - # use it. Else create a new credentials on seaweedfs for the namespace. - if s3_secret := attachments["Secret.v1"].get(f"{namespace}/mlpipeline-minio-artifact"): - desired_resources.append(s3_secret) - print('Using existing secret') - else: - print('Creating new access key.') - s3_access_key = iam.create_access_key(UserName=namespace) - # Use the AWS IAM API of seaweedfs to manage access policies to bucket. - # This policy ensures that a user can only access artifacts from his own profile. - iam.put_user_policy( - UserName=namespace, - PolicyName=f"KubeflowProject{namespace}", - PolicyDocument=json.dumps( - { - "Version": "2012-10-17", - "Statement": [{ - "Effect": "Allow", - "Action": [ - "s3:Put*", - "s3:Get*", - "s3:List*" - ], - "Resource": [ - f"arn:aws:s3:::{S3_BUCKET_NAME}/artifacts/*", - f"arn:aws:s3:::{S3_BUCKET_NAME}/private-artifacts/{namespace}/*", - f"arn:aws:s3:::{S3_BUCKET_NAME}/private/{namespace}/*", - f"arn:aws:s3:::{S3_BUCKET_NAME}/shared/*", - ] - }] - }) - ) - desired_resources.insert( - 0, - { - "apiVersion": "v1", - "kind": "Secret", - "metadata": { - "name": "mlpipeline-minio-artifact", - "namespace": namespace, - }, - "data": { - "accesskey": base64.b64encode(s3_access_key["AccessKey"]["AccessKeyId"].encode('utf-8')).decode("utf-8"), - "secretkey": base64.b64encode(s3_access_key["AccessKey"]["SecretAccessKey"].encode('utf-8')).decode("utf-8"), - }, - }) - - return {"status": desired_status, "attachments": desired_resources} - - def do_POST(self): - # Serve the sync() function as a JSON webhook. - observed = json.loads( - self.rfile.read(int(self.headers.get("content-length")))) - desired = self.sync(observed["object"], observed["attachments"]) - - self.send_response(200) - self.send_header("Content-type", "application/json") - self.end_headers() - self.wfile.write(bytes(json.dumps(desired), 'utf-8')) - - return HTTPServer((url, int(controller_port)), Controller) - - -if __name__ == "__main__": - main() diff --git a/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/kustomization.yaml b/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/kustomization.yaml index 774997572d7..9dbf9e742b3 100644 --- a/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/kustomization.yaml +++ b/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/kustomization.yaml @@ -9,3 +9,5 @@ resources: - seaweedfs-create-admin-user-job.yaml - seaweedfs-service.yaml - seaweedfs-service-account.yaml +- minio-service.yaml +- mlpipeline-minio-artifact-secret.yaml diff --git a/manifests/kustomize/third-party/seaweedfs/base/minio-service-patch.yaml b/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/minio-service.yaml similarity index 75% rename from manifests/kustomize/third-party/seaweedfs/base/minio-service-patch.yaml rename to manifests/kustomize/third-party/seaweedfs/base/seaweedfs/minio-service.yaml index d44ba3e6144..245153420cd 100644 --- a/manifests/kustomize/third-party/seaweedfs/base/minio-service-patch.yaml +++ b/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/minio-service.yaml @@ -3,6 +3,9 @@ kind: Service metadata: name: minio-service namespace: kubeflow + labels: + app: seaweedfs + component: minio-compatibility spec: ports: - name: http diff --git a/manifests/kustomize/third-party/minio/base/mlpipeline-minio-artifact-secret.yaml b/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/mlpipeline-minio-artifact-secret.yaml similarity index 100% rename from manifests/kustomize/third-party/minio/base/mlpipeline-minio-artifact-secret.yaml rename to manifests/kustomize/third-party/seaweedfs/base/seaweedfs/mlpipeline-minio-artifact-secret.yaml diff --git a/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-deployment.yaml b/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-deployment.yaml index 9a466bf8c57..55f01672541 100644 --- a/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-deployment.yaml +++ b/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-deployment.yaml @@ -60,10 +60,9 @@ spec: - mountPath: /data name: data resources: - # Benchmark this, just taken from minio requests: - cpu: 20m - memory: 100Mi + cpu: 32m + memory: 128Mi volumes: - name: data persistentVolumeClaim: diff --git a/manifests/kustomize/third-party/seaweedfs/pipeline_swfs_test.yaml b/manifests/kustomize/third-party/seaweedfs/pipeline_swfs_test.yaml deleted file mode 100644 index bda6a51f366..00000000000 --- a/manifests/kustomize/third-party/seaweedfs/pipeline_swfs_test.yaml +++ /dev/null @@ -1,144 +0,0 @@ -name: Test pipelines with seaweedfs -on: - workflow_dispatch: - pull_request: - paths: - - tests/install_KinD_create_KinD_cluster_install_kustomize.sh - - .github/workflows/pipeline_swfs_test.yaml - - apps/pipeline/upstream/** - - tests/istio* - - tests/oauth2-proxy_install.sh - - common/cert-manager/** - - common/oauth2-proxy/** - - common/istio*/** - - experimental/seaweedfs/** - - tests/swfs_namespace_isolation_test.sh - - tests/s3_helper_test.py - -jobs: - build: - timeout-minutes: 15 - runs-on: - labels: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Remove unused software - run: | - df -h # Check disk space before removal - sudo rm -rf /usr/share/dotnet # Example: Remove .NET SDK - sudo rm -rf /usr/local/lib/android # Example: Remove Android SDK - sudo rm -rf /opt/ghc # Example: Remove Haskell - df -h # Check disk space after removal - - - name: Proactively prune OCI system on GHA runner - run: docker system prune -a --volumes --force - - - name: Install KinD, Create KinD cluster and Install kustomize - run: ./tests/install_KinD_create_KinD_cluster_install_kustomize.sh - - - name: Install kubectl - run: ./tests/kubectl_install.sh - - - name: Install Istio - run: ./tests/istio-cni_install.sh - - - name: Install oauth2-proxy - run: ./tests/oauth2-proxy_install.sh - - - name: Install cert-manager - run: ./tests/cert_manager_install.sh - - - name: Create kubeflow namespace - run: kustomize build common/kubeflow-namespace/base | kubectl apply -f - - - - name: Install KF Pipelines - run: ./tests/pipelines_swfs_install.sh - - - name: Install KF Multi Tenancy - run: ./tests/multi_tenancy_install.sh - - - name: Install kubeflow-istio-resources - run: kustomize build common/istio-cni-1-24/kubeflow-istio-resources/base | kubectl apply -f - - - - name: Create KF Profile - run: ./tests/kubeflow_profile_install.sh - - - name: Verify Pipeline Integration - run: | - KF_PROFILE=kubeflow-user-example-com - if ! kubectl get secret mlpipeline-minio-artifact -n $KF_PROFILE > /dev/null 2>&1; then - echo "Error: Secret mlpipeline-minio-artifact not found in namespace $KF_PROFILE" - exit 1 - fi - kubectl get secret mlpipeline-minio-artifact -n "$KF_PROFILE" -o json | jq -r '.data | keys[] as $k | "\($k): \(. | .[$k] | @base64d)"' | tr '\n' ' ' - - - - name: Port forward - run: ./tests/port_forward_gateway.sh - - - name: List and deploy test pipeline with V1 API - run: | - pip3 install "kfp>=1.8.22,<2.0.0" - KF_PROFILE=kubeflow-user-example-com - TOKEN="$(kubectl -n $KF_PROFILE create token default-editor)" - python3 tests/pipeline_v1_test.py "${TOKEN}" "${KF_PROFILE}" - - - name: Prune images inside Kind cluster - run: docker exec kind-control-plane bash -c "crictl images prune" - - - name: List and deploy test pipeline with V2 API - run: | - pip3 install kfp==2.13.0 - KF_PROFILE=kubeflow-user-example-com - TOKEN="$(kubectl -n $KF_PROFILE create token default-editor)" - python3 tests/pipeline_v2_test.py run_pipeline "${TOKEN}" "${KF_PROFILE}" - - - name: Fail to list pipelines with unauthorized ServiceAccount Token (V2 API) - run: | - pip3 install kfp==2.13.0 - KF_PROFILE=kubeflow-user-example-com - TOKEN="$(kubectl -n default create token default)" - python3 tests/pipeline_v2_test.py test_unauthorized_access "${TOKEN}" "${KF_PROFILE}" - echo "Test succeeded. Token from unauthorized ServiceAccount cannot list pipelines in $KF_PROFILE namespace." - - - name: Test SeaweedFS Namespace Isolation - run: ./tests/swfs_namespace_isolation_test.sh - - - name: Apply Pod Security Standards baseline levels for static namespaces - run: ./tests/PSS_baseline_enable.sh - - - name: Unapply applied baseline labels - run: | - NAMESPACES=("istio-system" "auth" "cert-manager" "oauth2-proxy" "kubeflow") - for NAMESPACE in "${NAMESPACES[@]}"; do - if kubectl get namespace "$NAMESPACE" >/dev/null 2>&1; then - kubectl label namespace $NAMESPACE pod-security.kubernetes.io/enforce- - fi - done - - - name: Applying Pod Security Standards restricted levels for static namespaces - run: ./tests/PSS_restricted_enable.sh - - - name: Collect Logs on Failure - if: failure() - run: | - mkdir -p logs - kubectl get all --all-namespaces > logs/resources.txt - kubectl get events --all-namespaces --sort-by=.metadata.creationTimestamp > logs/events.txt - for namespace in kubeflow istio-system cert-manager auth kubeflow-user-example-com test-profile-1 test-profile-2; do - if kubectl get namespace $namespace >/dev/null 2>&1; then - kubectl describe pods -n $namespace > logs/$namespace-pods.txt - for pod in $(kubectl get pods -n $namespace -o jsonpath='{.items[*].metadata.name}'); do - kubectl logs -n $namespace $pod --tail=100 > logs/$namespace-$pod.txt 2>&1 || true - done - fi - done - - - name: Upload Diagnostic Logs - if: always() - uses: actions/upload-artifact@v4 - with: - name: kubeflow-test-logs - path: logs/ diff --git a/test/seaweedfs/allow-user-namespace-access.yaml b/test/seaweedfs/allow-user-namespace-access.yaml new file mode 100644 index 00000000000..3b99091c81d --- /dev/null +++ b/test/seaweedfs/allow-user-namespace-access.yaml @@ -0,0 +1,18 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: allow-user-namespace-access + namespace: kubeflow +spec: + podSelector: {} + policyTypes: + - Ingress + ingress: + - from: + - namespaceSelector: + matchExpressions: + - key: kubernetes.io/metadata.name + operator: In + values: ["kubeflow-user-example-com"] + - from: + - podSelector: {} diff --git a/test/seaweedfs/kubeflow-edit-clusterrole.yaml b/test/seaweedfs/kubeflow-edit-clusterrole.yaml new file mode 100644 index 00000000000..752855e15d7 --- /dev/null +++ b/test/seaweedfs/kubeflow-edit-clusterrole.yaml @@ -0,0 +1,46 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: kubeflow-edit +aggregationRule: + clusterRoleSelectors: + - matchLabels: + rbac.authorization.kubeflow.org/aggregate-to-kubeflow-edit: "true" +rules: [] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: kubeflow-edit-basic-permissions + labels: + rbac.authorization.kubeflow.org/aggregate-to-kubeflow-edit: "true" +rules: +- apiGroups: + - "" + resources: + - configmaps + - pods + - pods/log + - secrets + - events + - persistentvolumeclaims + - services + verbs: + - get + - list + - create + - update + - patch + - delete +- apiGroups: + - "apps" + resources: + - deployments + - replicasets + verbs: + - get + - list + - create + - update + - patch + - delete \ No newline at end of file diff --git a/manifests/kustomize/third-party/seaweedfs/swfs_namespace_isolation_test.sh b/test/seaweedfs/namespace_isolation_test.sh old mode 100644 new mode 100755 similarity index 97% rename from manifests/kustomize/third-party/seaweedfs/swfs_namespace_isolation_test.sh rename to test/seaweedfs/namespace_isolation_test.sh index 03b0af0d285..1ac5b53bf34 --- a/manifests/kustomize/third-party/seaweedfs/swfs_namespace_isolation_test.sh +++ b/test/seaweedfs/namespace_isolation_test.sh @@ -1,5 +1,5 @@ #!/bin/bash -set -euxo pipefail +set -euo pipefail echo "SeaweedFS Security Test - Unauthorized Access Check" echo "Testing if one namespace can access files from another namespace" @@ -118,7 +118,7 @@ upload_file() { setup_port_forward - python3 tests/s3_helper_test.py upload \ + python3 test/seaweedfs/s3_helper.py upload \ --access-key "$access_key" \ --secret-key "$secret_key" \ --endpoint-url "http://localhost:8333" \ @@ -142,7 +142,7 @@ test_unauthorized_access() { # Try to access the other namespace's file # Note: Python script returns 0 when access is denied (good), 1 when access succeeds (bad) - if python3 tests/s3_helper_test.py download \ + if python3 test/seaweedfs/s3_helper.py download \ --access-key "$access_key" \ --secret-key "$secret_key" \ --endpoint-url "http://localhost:8333" \ diff --git a/test/seaweedfs/s3_helper.py b/test/seaweedfs/s3_helper.py new file mode 100755 index 00000000000..f5c2e2f0fc2 --- /dev/null +++ b/test/seaweedfs/s3_helper.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python3 +""" +S3 helper script for SeaweedFS namespace isolation testing. +Uses boto3 to perform S3 operations for security testing. +""" + +import sys +import boto3 +from botocore.exceptions import ClientError, NoCredentialsError +import argparse + + +def create_s3_client(access_key, secret_key, endpoint_url): + """Create S3 client with given credentials.""" + return boto3.client( + 's3', + aws_access_key_id=access_key, + aws_secret_access_key=secret_key, + endpoint_url=endpoint_url, + region_name='us-east-1' # Required but not used by SeaweedFS + ) + + +def upload_file(access_key, secret_key, endpoint_url, bucket, key, content): + """Upload a file to S3.""" + try: + s3_client = create_s3_client(access_key, secret_key, endpoint_url) + s3_client.put_object( + Bucket=bucket, + Key=key, + Body=content.encode('utf-8') + ) + print(f"✓ Successfully uploaded file to s3://{bucket}/{key}") + return True + except Exception as e: + print(f"✗ Failed to upload file: {e}") + return False + + +def download_file(access_key, secret_key, endpoint_url, bucket, key): + """Download a file from S3.""" + try: + s3_client = create_s3_client(access_key, secret_key, endpoint_url) + response = s3_client.get_object(Bucket=bucket, Key=key) + content = response['Body'].read().decode('utf-8') + print(f"✓ Successfully downloaded file from s3://{bucket}/{key}") + print(f"File content: {content}") + return True, content + except ClientError as e: + error_code = e.response['Error']['Code'] + if error_code in ['NoSuchKey', 'AccessDenied', 'Forbidden']: + print(f"✓ Access denied as expected: {error_code}") + return False, None + else: + print(f"✗ Unexpected error: {e}") + return False, None + except Exception as e: + print(f"✗ Failed to download file: {e}") + return False, None + + +def main(): + parser = argparse.ArgumentParser(description='S3 operations for SeaweedFS testing') + parser.add_argument('operation', choices=['upload', 'download'], help='Operation to perform') + parser.add_argument('--access-key', required=True, help='AWS access key') + parser.add_argument('--secret-key', required=True, help='AWS secret key') + parser.add_argument('--endpoint-url', required=True, help='S3 endpoint URL') + parser.add_argument('--bucket', required=True, help='S3 bucket name') + parser.add_argument('--key', required=True, help='S3 object key') + parser.add_argument('--content', help='Content to upload (for upload operation)') + + args = parser.parse_args() + + if args.operation == 'upload': + if not args.content: + print("Error: --content is required for upload operation") + sys.exit(1) + success = upload_file(args.access_key, args.secret_key, args.endpoint_url, + args.bucket, args.key, args.content) + sys.exit(0 if success else 1) + + elif args.operation == 'download': + success, content = download_file(args.access_key, args.secret_key, args.endpoint_url, + args.bucket, args.key) + # For security test: success=True means unauthorized access (bad) + # success=False means access denied (good) + if args.key.startswith('private-artifacts/') and '/' in args.key[18:]: + # This is a cross-namespace access attempt + sys.exit(1 if success else 0) + else: + sys.exit(0 if success else 1) + + +if __name__ == '__main__': + main() diff --git a/test/seaweedfs/test-profiles.yaml b/test/seaweedfs/test-profiles.yaml new file mode 100644 index 00000000000..2408e02e0a2 --- /dev/null +++ b/test/seaweedfs/test-profiles.yaml @@ -0,0 +1,8 @@ +apiVersion: kubeflow.org/v1beta1 +kind: Profile +metadata: + name: kubeflow-user-example-com +spec: + owner: + kind: User + name: user@example.com diff --git a/manifests/kustomize/third-party/seaweedfs/pipeline_v1_test.py b/test/seaweedfs/test_pipeline_v1_seaweedfs.py old mode 100644 new mode 100755 similarity index 66% rename from manifests/kustomize/third-party/seaweedfs/pipeline_v1_test.py rename to test/seaweedfs/test_pipeline_v1_seaweedfs.py index 4b9abbeedce..61ab4ce35b8 --- a/manifests/kustomize/third-party/seaweedfs/pipeline_v1_test.py +++ b/test/seaweedfs/test_pipeline_v1_seaweedfs.py @@ -18,9 +18,11 @@ def hello_world_pipeline(): hello_op() def run_v1_pipeline(token, namespace): - client = kfp.Client(host="http://localhost:8080/pipeline", existing_token=token) + client = kfp.Client(host="http://localhost:8080", existing_token=token) + print(f"Successfully connected to KFP server") experiment = client.create_experiment("v1-pipeline-test", namespace=namespace) + print(f"Created experiment: v1-pipeline-test in namespace {namespace}") pipeline_run = client.create_run_from_pipeline_func( hello_world_pipeline, @@ -29,21 +31,27 @@ def run_v1_pipeline(token, namespace): namespace=namespace, arguments={} ) + print(f"Pipeline run submitted with ID: {pipeline_run.run_id}") for iteration in range(15): pipeline_status = client.get_run(pipeline_run.run_id).run.status + print(f"Pipeline status: {pipeline_status}") if pipeline_status == "Succeeded": + print("✅ V1 Pipeline completed successfully!") return elif pipeline_status not in ["Running", "Pending"]: + print(f"Pipeline failed with status: {pipeline_status}") sys.exit(1) time.sleep(10) + print("Pipeline did not complete within expected time") sys.exit(1) if __name__ == "__main__": if len(sys.argv) != 3: sys.exit(1) - run_v1_pipeline(sys.argv[1], sys.argv[2]) \ No newline at end of file + run_v1_pipeline(sys.argv[1], sys.argv[2]) + diff --git a/manifests/kustomize/third-party/seaweedfs/pipeline_v2_test.py b/test/seaweedfs/test_pipeline_v2_seaweedfs.py old mode 100644 new mode 100755 similarity index 93% rename from manifests/kustomize/third-party/seaweedfs/pipeline_v2_test.py rename to test/seaweedfs/test_pipeline_v2_seaweedfs.py index 9a1c7461f3b..5be99eb1c11 --- a/manifests/kustomize/third-party/seaweedfs/pipeline_v2_test.py +++ b/test/seaweedfs/test_pipeline_v2_seaweedfs.py @@ -22,7 +22,7 @@ def hello_world_pipeline(): def run_pipeline(token, namespace): - client = kfp.Client(host="http://localhost:8080/pipeline", existing_token=token) + client = kfp.Client(host="http://localhost:8080", existing_token=token) try: pipelines = client.list_pipelines() @@ -70,7 +70,7 @@ def run_pipeline(token, namespace): def test_unauthorized_access(token, namespace): - client = kfp.Client(host="http://localhost:8080/pipeline", existing_token=token) + client = kfp.Client(host="http://localhost:8080", existing_token=token) try: pipeline = client.list_runs(namespace=namespace) @@ -93,4 +93,4 @@ def test_unauthorized_access(token, namespace): elif action == "test_unauthorized_access": test_unauthorized_access(token, namespace) else: - sys.exit(1) \ No newline at end of file + sys.exit(1) From 1d6077218131d6e39d3d2475e4ca5fa0cc59ac42 Mon Sep 17 00:00:00 2001 From: Julius von Kohout <45896133+juliusvonkohout@users.noreply.github.com> Date: Thu, 3 Jul 2025 16:29:10 +0200 Subject: [PATCH 07/16] Potential fix for code scanning alert no. 131: Workflow does not contain permissions Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> Signed-off-by: Julius von Kohout <45896133+juliusvonkohout@users.noreply.github.com> --- .github/workflows/e2e-seaweedfs-test.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/e2e-seaweedfs-test.yml b/.github/workflows/e2e-seaweedfs-test.yml index d438965a569..9b70a29568c 100644 --- a/.github/workflows/e2e-seaweedfs-test.yml +++ b/.github/workflows/e2e-seaweedfs-test.yml @@ -1,5 +1,8 @@ name: KFP SeaweedFS Integration Tests +permissions: + contents: read + on: workflow_dispatch: pull_request: From 63c70ff874e982cd0e1ff08d10046b56cf44611e Mon Sep 17 00:00:00 2001 From: Julius von Kohout <45896133+juliusvonkohout@users.noreply.github.com> Date: Thu, 3 Jul 2025 17:34:21 +0200 Subject: [PATCH 08/16] Update seaweedfs-deployment.yaml Signed-off-by: Julius von Kohout <45896133+juliusvonkohout@users.noreply.github.com> --- .../seaweedfs/base/seaweedfs/seaweedfs-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-deployment.yaml b/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-deployment.yaml index 55f01672541..b61c4ad46e8 100644 --- a/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-deployment.yaml +++ b/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-deployment.yaml @@ -24,7 +24,7 @@ spec: type: RuntimeDefault containers: - name: seaweedfs - image: 'chrislusf/seaweedfs:3.85' + image: 'chrislusf/seaweedfs:3.92' args: - 'server' - '-dir=/data' From a79e235f9137c62a54a4b8cd8b120ff9c6f9aad3 Mon Sep 17 00:00:00 2001 From: Harshvir Potpose Date: Mon, 14 Jul 2025 19:36:28 +0530 Subject: [PATCH 09/16] chore: Add SeaweedFS gateway to remote object storage documentation (#12046) * add gateway to remote object storage docs Signed-off-by: Harshvir Potpose * update test script Signed-off-by: Harshvir Potpose * fix Signed-off-by: Harshvir Potpose --------- Signed-off-by: Harshvir Potpose --- .../kustomize/third-party/seaweedfs/README.md | 124 +++++++++++++++++- .../base/seaweedfs/seaweedfs-deployment.yaml | 1 + test/seaweedfs/namespace_isolation_test.sh | 2 +- 3 files changed, 125 insertions(+), 2 deletions(-) diff --git a/manifests/kustomize/third-party/seaweedfs/README.md b/manifests/kustomize/third-party/seaweedfs/README.md index 619b677cb0d..74e01cacf19 100644 --- a/manifests/kustomize/third-party/seaweedfs/README.md +++ b/manifests/kustomize/third-party/seaweedfs/README.md @@ -33,15 +33,137 @@ kubectl kustomize ./base/ | kubectl apply -f - ## Verify deployment Run + ```bash ./test.sh ``` + With the ready check on the container it already verifies that the S3 starts correctly. -You can then use it with the endpoint at http://localhost:8333. +You can then use it with the endpoint at . To create access keys open a shell on the pod and use `weed shell` to configure your instance. Create a user with the command `s3.configure -user -access_key -secret-key -actions Read:/,Write::/ -apply` Documentation for this can also be found [here](https://github.com/seaweedfs/seaweedfs/wiki/Amazon-S3-API). +## Gateway to Remote Object Storage + +The Gateway to Remote Object Storage feature allows SeaweedFS to automatically synchronize local storage with remote cloud storage providers (AWS S3, Azure Blob Storage, Google Cloud Storage). This enables: + +- **Automatic Bucket Synchronization**: Local new buckets are automatically created in remote storage +- **Bidirectional Sync**: Changes in local storage are uploaded to remote storage +- **Automatic Cleanup**: Local deleted buckets are automatically deleted in remote storage +- **Multi-Cloud Support**: Connect to multiple cloud storage providers simultaneously + +### Configure Remote Storage + +Remote storage must be configured before using the gateway. Use the `weed shell` to configure remote storage connections: + +#### 1. Access SeaweedFS Shell + +```bash +kubectl exec -n kubeflow deployment/seaweedfs -it -- weed shell +``` + +#### 2. Configure Remote Storage + +**AWS S3 Configuration:** + +```bash +# Configure AWS S3 remote storage +remote.configure -name=aws1 -type=s3 -s3.access_key=YOUR_ACCESS_KEY -s3.secret_key=YOUR_SECRET_KEY -s3.region=us-east-1 -s3.endpoint=s3.amazonaws.com -s3.storage_class="STANDARD" +``` + +**Azure Blob Storage Configuration:** + +```bash +# Configure Azure Blob Storage +remote.configure -name=azure1 -type=azure -azure.account_name=YOUR_ACCOUNT_NAME -azure.account_key=YOUR_ACCOUNT_KEY +``` + +**Google Cloud Storage Configuration:** + +```bash +# Configure Google Cloud Storage +remote.configure -name=gcs1 -type=gcs -gcs.appCredentialsFile=/path/to/service-account-file.json +``` + +#### 3. View and Manage Configurations + +```bash +# List all remote storage configurations +remote.configure + +# Delete a configuration +remote.configure -delete -name=aws1 +``` + +### Setup Gateway to Remote Storage + +#### Step 1: Mount Existing Remote Buckets (Optional) + +If you have existing buckets in remote storage, mount them as local buckets: + +```bash +# In weed shell +remote.mount.buckets -remote=aws1 -apply +``` + +#### Step 2: Start the Remote Gateway + +The gateway process continuously monitors local changes and syncs them to remote storage. + +**Basic Gateway Setup:** + +```bash +# Start the gateway (run this in the SeaweedFS deployment) +kubectl exec -n kubeflow deployment/seaweedfs -- weed filer.remote.gateway -createBucketAt=aws1 +``` + +**Gateway with Random Suffix (for unique bucket names):** + +```bash +# Some cloud providers require globally unique bucket names +kubectl exec -n kubeflow deployment/seaweedfs -- weed filer.remote.gateway -createBucketAt=aws1 -createBucketWithRandomSuffix +``` + +#### Step 3(Optional): Cache Management + +Optimize performance by managing cache: + +```bash +# In weed shell + +# Cache all PDF files in all mounted buckets +remote.cache -include=*.pdf + +# Cache all PDF files in a specific bucket +remote.cache -dir=/buckets/some-bucket -include=*.pdf + +# Uncache files older than 1 hour and larger than 10KB +remote.uncache -minAge=3600 -minSize=10240 +``` + +### Troubleshooting + +**Common Issues:** + +- **Configuration not found**: Ensure remote storage is configured before starting gateway +- **Permission denied**: Check cloud storage credentials and permissions +- **Connection timeout**: Verify network connectivity to cloud storage +- **Bucket conflicts**: Use random suffix for globally unique bucket names + +**Debug Commands:** + +```bash +# Check remote configurations +kubectl exec -n kubeflow deployment/seaweedfs -- weed shell -c "remote.configure" + +# Check mounted buckets +kubectl exec -n kubeflow deployment/seaweedfs -- weed shell -c "remote.mount.buckets -remote=aws1" + +# Check gateway logs +kubectl logs -n kubeflow deployment/seaweedfs -f +``` + ## Uninstall SeaweedFS ```bash diff --git a/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-deployment.yaml b/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-deployment.yaml index b61c4ad46e8..c23f0d556a4 100644 --- a/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-deployment.yaml +++ b/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-deployment.yaml @@ -30,6 +30,7 @@ spec: - '-dir=/data' - '-s3' - '-iam' + - '-filer' ports: - containerPort: 8333 - containerPort: 8111 diff --git a/test/seaweedfs/namespace_isolation_test.sh b/test/seaweedfs/namespace_isolation_test.sh index 1ac5b53bf34..56dab7a1d26 100755 --- a/test/seaweedfs/namespace_isolation_test.sh +++ b/test/seaweedfs/namespace_isolation_test.sh @@ -1,5 +1,5 @@ #!/bin/bash -set -euo pipefail +set -euxo pipefail echo "SeaweedFS Security Test - Unauthorized Access Check" echo "Testing if one namespace can access files from another namespace" From 293cd13866e812a195e75c1129dd67cd53b69a10 Mon Sep 17 00:00:00 2001 From: Harshvir Potpose Date: Mon, 4 Aug 2025 14:26:12 +0530 Subject: [PATCH 10/16] Address review comments from PR #11965 (#12080) * add back minio Signed-off-by: Harshvir Potpose * fix Signed-off-by: Harshvir Potpose * add logging and redirect sensitive output Signed-off-by: Harshvir Potpose * rm unused files Signed-off-by: Harshvir Potpose * update env vars Signed-off-by: Harshvir Potpose * add seaweedfs tests in samples test Signed-off-by: Harshvir Potpose * fix Signed-off-by: Harshvir Potpose * fix Signed-off-by: Harshvir Potpose * fix Signed-off-by: Harshvir Potpose * update depoy-kfp.sh Signed-off-by: Harshvir Potpose * add env vars Signed-off-by: Harshvir Potpose * fix Signed-off-by: Harshvir Potpose * update samples test Signed-off-by: Harshvir Potpose * fix Signed-off-by: Harshvir Potpose * fix Signed-off-by: Harshvir Potpose * fix Signed-off-by: Harshvir Potpose * test minio too Signed-off-by: Harshvir Potpose * increase timeout Signed-off-by: Harshvir Potpose * add standalone minio Signed-off-by: Harshvir Potpose * fix sample test Signed-off-by: Harshvir Potpose * revert timeout Signed-off-by: Harshvir Potpose * rm minio multi-user test Signed-off-by: Harshvir Potpose * rm comments Signed-off-by: Harshvir Potpose * update proxy Signed-off-by: Harshvir Potpose * usd full name Signed-off-by: Harshvir Potpose * override env vars Signed-off-by: Harshvir Potpose * use DNS name insted of env var Signed-off-by: Harshvir Potpose * test with ip Signed-off-by: Harshvir Potpose * fix Signed-off-by: Harshvir Potpose * fix proxy Signed-off-by: Harshvir Potpose * fix Signed-off-by: Harshvir Potpose * test with metadata-grpc-service Signed-off-by: Harshvir Potpose * use minio-service dns Signed-off-by: Harshvir Potpose * use DNS name when using proxy Signed-off-by: Harshvir Potpose * update default proxy value Signed-off-by: Harshvir Potpose * use DNS of minio-service when using proxy Signed-off-by: Harshvir Potpose --------- Signed-off-by: Harshvir Potpose --- .github/actions/kfp-cluster/action.yml | 27 ++- .../overlays/multi-user/apiserver-env.yaml | 16 ++ .../overlays/multi-user/kustomization.yaml | 19 +++ .../no-proxy-minio/apiserver-env.yaml | 16 ++ .../no-proxy-minio/kustomization.yaml | 20 +++ .../workflow-disable-logs-patch.yaml | 20 +++ .../argo/overlays/proxy/proxy-env.yaml | 4 +- .github/resources/scripts/deploy-kfp.sh | 68 +++++++- .github/resources/scripts/free-disk-space.sh | 2 +- .github/workflows/e2e-seaweedfs-test.yml | 147 ---------------- .github/workflows/e2e-test.yml | 3 - .github/workflows/kfp-samples.yml | 157 ++++++++++++------ backend/Dockerfile | 2 +- .../metadata_writer/src/metadata_helpers.py | 6 +- .../client_manager/client_manager.go | 24 +-- backend/src/apiserver/config/proxy/config.go | 2 +- backend/src/v2/cacheutils/cache.go | 7 + backend/src/v2/metadata/env.go | 24 ++- backend/src/v2/objectstore/config.go | 7 + .../ml-pipeline-apiserver-deployment.yaml | 4 + .../kustomization.yaml | 21 +++ .../third-party/minio/base/kustomization.yaml | 9 + .../minio/base/minio-deployment.yaml | 63 +++++++ .../third-party/minio/base/minio-pvc.yaml | 10 ++ .../third-party/minio/base/minio-service.yaml | 12 ++ .../mlpipeline-minio-artifact-secret.yaml | 7 + .../istio/istio-authorization-policy.yaml | 30 ++++ .../minio/options/istio/kustomization.yaml | 5 + .../seaweedfs/PSS_baseline_enable.sh | 17 -- .../seaweedfs/PSS_restricted_enable.sh | 15 -- .../seaweedfs-create-admin-user-job.yaml | 21 ++- .../seaweedfs/pipelines_swfs_install.sh | 21 --- .../seaweedfs/port_forward_gateway.sh | 6 - samples/v2/sample_test.py | 53 +++++- test/seaweedfs/kubeflow-edit-clusterrole.yaml | 2 +- test/seaweedfs/test_pipeline_v1_seaweedfs.py | 57 ------- test/seaweedfs/test_pipeline_v2_seaweedfs.py | 96 ----------- 37 files changed, 558 insertions(+), 462 deletions(-) create mode 100644 .github/resources/manifests/argo/overlays/multi-user/apiserver-env.yaml create mode 100644 .github/resources/manifests/argo/overlays/multi-user/kustomization.yaml create mode 100644 .github/resources/manifests/argo/overlays/no-proxy-minio/apiserver-env.yaml create mode 100644 .github/resources/manifests/argo/overlays/no-proxy-minio/kustomization.yaml create mode 100644 .github/resources/manifests/argo/overlays/no-proxy-minio/workflow-disable-logs-patch.yaml delete mode 100644 .github/workflows/e2e-seaweedfs-test.yml create mode 100644 manifests/kustomize/env/platform-agnostic-minio/kustomization.yaml create mode 100644 manifests/kustomize/third-party/minio/base/kustomization.yaml create mode 100644 manifests/kustomize/third-party/minio/base/minio-deployment.yaml create mode 100644 manifests/kustomize/third-party/minio/base/minio-pvc.yaml create mode 100644 manifests/kustomize/third-party/minio/base/minio-service.yaml create mode 100644 manifests/kustomize/third-party/minio/base/mlpipeline-minio-artifact-secret.yaml create mode 100644 manifests/kustomize/third-party/minio/options/istio/istio-authorization-policy.yaml create mode 100644 manifests/kustomize/third-party/minio/options/istio/kustomization.yaml delete mode 100644 manifests/kustomize/third-party/seaweedfs/PSS_baseline_enable.sh delete mode 100644 manifests/kustomize/third-party/seaweedfs/PSS_restricted_enable.sh delete mode 100644 manifests/kustomize/third-party/seaweedfs/pipelines_swfs_install.sh delete mode 100644 manifests/kustomize/third-party/seaweedfs/port_forward_gateway.sh delete mode 100755 test/seaweedfs/test_pipeline_v1_seaweedfs.py delete mode 100755 test/seaweedfs/test_pipeline_v2_seaweedfs.py diff --git a/.github/actions/kfp-cluster/action.yml b/.github/actions/kfp-cluster/action.yml index bf96685c2b9..c1c80d7d1c3 100644 --- a/.github/actions/kfp-cluster/action.yml +++ b/.github/actions/kfp-cluster/action.yml @@ -30,6 +30,15 @@ inputs: image_registry: required: true description: "Image Registry address of the images" + multi_user: + description: "If KFP should be deployed in multi-user mode" + required: false + default: 'false' + storage_backend: + description: "Storage backend to use (minio or seaweedfs)" + required: false + default: 'seaweedfs' + runs: using: "composite" steps: @@ -68,10 +77,22 @@ runs: if [ "${{ inputs.proxy }}" = "true" ]; then ARGS="${ARGS} --proxy" - elif [ "${{inputs.cache_enabled }}" = "false" ]; then + fi + + if [ "${{inputs.cache_enabled }}" = "false" ]; then ARGS="${ARGS} --cache-disabled" - elif [ "${{inputs.pipeline_store }}" = "kubernetes" ]; then - ARGS="${ARGS} --deploy-k8s-native" + fi + + if [ "${{inputs.pipeline_store }}" = "kubernetes" ]; then + ARGS="${ARGS} --deploy-k8s-native" + fi + + if [ "${{ inputs.multi_user }}" = "true" ]; then + ARGS="${ARGS} --multi-user" + fi + + if [ "${{ inputs.storage_backend }}" != "seaweedfs" ]; then + ARGS="${ARGS} --storage ${{ inputs.storage_backend }}" fi ./.github/resources/scripts/deploy-kfp.sh $ARGS diff --git a/.github/resources/manifests/argo/overlays/multi-user/apiserver-env.yaml b/.github/resources/manifests/argo/overlays/multi-user/apiserver-env.yaml new file mode 100644 index 00000000000..6cb74d096f5 --- /dev/null +++ b/.github/resources/manifests/argo/overlays/multi-user/apiserver-env.yaml @@ -0,0 +1,16 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: ml-pipeline +spec: + template: + spec: + containers: + - name: ml-pipeline-api-server + env: + - name: V2_DRIVER_IMAGE + value: kind-registry:5000/driver + - name: V2_LAUNCHER_IMAGE + value: kind-registry:5000/launcher + - name: LOG_LEVEL + value: "debug" diff --git a/.github/resources/manifests/argo/overlays/multi-user/kustomization.yaml b/.github/resources/manifests/argo/overlays/multi-user/kustomization.yaml new file mode 100644 index 00000000000..881a7bc0870 --- /dev/null +++ b/.github/resources/manifests/argo/overlays/multi-user/kustomization.yaml @@ -0,0 +1,19 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: +- ../../../../../../manifests/kustomize/env/platform-agnostic-multi-user + +images: +- name: ghcr.io/kubeflow/kfp-api-server + newName: kind-registry:5000/apiserver + newTag: latest +- name: ghcr.io/kubeflow/kfp-persistence-agent + newName: kind-registry:5000/persistenceagent + newTag: latest +- name: ghcr.io/kubeflow/kfp-scheduled-workflow-controller + newName: kind-registry:5000/scheduledworkflow + newTag: latest + +patchesStrategicMerge: +- apiserver-env.yaml diff --git a/.github/resources/manifests/argo/overlays/no-proxy-minio/apiserver-env.yaml b/.github/resources/manifests/argo/overlays/no-proxy-minio/apiserver-env.yaml new file mode 100644 index 00000000000..6cb74d096f5 --- /dev/null +++ b/.github/resources/manifests/argo/overlays/no-proxy-minio/apiserver-env.yaml @@ -0,0 +1,16 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: ml-pipeline +spec: + template: + spec: + containers: + - name: ml-pipeline-api-server + env: + - name: V2_DRIVER_IMAGE + value: kind-registry:5000/driver + - name: V2_LAUNCHER_IMAGE + value: kind-registry:5000/launcher + - name: LOG_LEVEL + value: "debug" diff --git a/.github/resources/manifests/argo/overlays/no-proxy-minio/kustomization.yaml b/.github/resources/manifests/argo/overlays/no-proxy-minio/kustomization.yaml new file mode 100644 index 00000000000..8ffdcb12129 --- /dev/null +++ b/.github/resources/manifests/argo/overlays/no-proxy-minio/kustomization.yaml @@ -0,0 +1,20 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: +- ../../../../../../manifests/kustomize/env/platform-agnostic-minio + +images: +- name: ghcr.io/kubeflow/kfp-api-server + newName: kind-registry:5000/apiserver + newTag: latest +- name: ghcr.io/kubeflow/kfp-persistence-agent + newName: kind-registry:5000/persistenceagent + newTag: latest +- name: ghcr.io/kubeflow/kfp-scheduled-workflow-controller + newName: kind-registry:5000/scheduledworkflow + newTag: latest + +patches: +- path: apiserver-env.yaml +- path: workflow-disable-logs-patch.yaml diff --git a/.github/resources/manifests/argo/overlays/no-proxy-minio/workflow-disable-logs-patch.yaml b/.github/resources/manifests/argo/overlays/no-proxy-minio/workflow-disable-logs-patch.yaml new file mode 100644 index 00000000000..109296e3d10 --- /dev/null +++ b/.github/resources/manifests/argo/overlays/no-proxy-minio/workflow-disable-logs-patch.yaml @@ -0,0 +1,20 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: workflow-controller-configmap +data: + artifactRepository: | + archiveLogs: false + s3: + endpoint: "minio-service.$(kfp-namespace):9000" + bucket: "$(kfp-artifact-bucket-name)" + keyFormat: "private-artifacts/{{workflow.namespace}}/{{workflow.name}}/{{workflow.creationTimestamp.Y}}/{{workflow.creationTimestamp.m}}/{{workflow.creationTimestamp.d}}/{{pod.name}}" + insecure: true + accessKeySecret: + name: mlpipeline-minio-artifact + key: accesskey + secretKeySecret: + name: mlpipeline-minio-artifact + key: secretkey + executor: | + imagePullPolicy: IfNotPresent diff --git a/.github/resources/manifests/argo/overlays/proxy/proxy-env.yaml b/.github/resources/manifests/argo/overlays/proxy/proxy-env.yaml index 234122b6954..ff97e60b468 100644 --- a/.github/resources/manifests/argo/overlays/proxy/proxy-env.yaml +++ b/.github/resources/manifests/argo/overlays/proxy/proxy-env.yaml @@ -13,4 +13,6 @@ spec: - name: HTTPS_PROXY value: "http://squid.squid.svc.cluster.local:3128" - name: NO_PROXY - value: "localhost,127.0.0.1,.svc.cluster.local,kubernetes.default.svc,metadata-grpc-service,10.0.0.0/8,0,1,2,3,4,5,6,7,8,9" + value: "localhost,127.0.0.1,.svc.cluster.local,kubernetes.default.svc,minio-service.kubeflow,metadata-grpc-service,metadata-grpc-service.kubeflow,ml-pipeline.kubeflow" + - name: OBJECTSTORECONFIG_HOST + value: "minio-service.kubeflow.svc.cluster.local" diff --git a/.github/resources/scripts/deploy-kfp.sh b/.github/resources/scripts/deploy-kfp.sh index 9f7b8bc335e..69980ef0026 100755 --- a/.github/resources/scripts/deploy-kfp.sh +++ b/.github/resources/scripts/deploy-kfp.sh @@ -28,6 +28,8 @@ TEST_MANIFESTS=".github/resources/manifests/argo" PIPELINES_STORE="database" USE_PROXY=false CACHE_DISABLED=false +MULTI_USER=false +STORAGE_BACKEND="seaweedfs" # Loop over script arguments passed. This uses a single switch-case # block with default value in case we want to make alternative deployments @@ -46,6 +48,14 @@ while [ "$#" -gt 0 ]; do CACHE_DISABLED=true shift ;; + --multi-user) + MULTI_USER=true + shift + ;; + --storage) + STORAGE_BACKEND="$2" + shift 2 + ;; esac done @@ -54,10 +64,19 @@ if [ "${USE_PROXY}" == "true" ] && [ "${PIPELINES_STORE}" == "kubernetes" ]; the exit 1 fi -kubectl apply -k "manifests/kustomize/cluster-scoped-resources/" +if [ "${MULTI_USER}" == "true" ] && [ "${USE_PROXY}" == "true" ]; then + echo "ERROR: Multi-user mode cannot be deployed with proxy support." + exit 1 +fi + +if [ "${STORAGE_BACKEND}" != "minio" ] && [ "${STORAGE_BACKEND}" != "seaweedfs" ]; then + echo "ERROR: Storage backend must be either 'minio' or 'seaweedfs'." + exit 1 +fi + +kubectl apply -k "manifests/kustomize/cluster-scoped-resources/" || EXIT_CODE=$? kubectl wait crd/applications.app.k8s.io --for condition=established --timeout=60s || EXIT_CODE=$? -if [[ $EXIT_CODE -ne 0 ]] -then +if [[ $EXIT_CODE -ne 0 ]]; then echo "Failed to deploy cluster-scoped resources." exit $EXIT_CODE fi @@ -73,6 +92,35 @@ if [ "${PIPELINES_STORE}" == "kubernetes" ]; then fi fi + +# Deploy multi-user prerequisites if multi-user mode is enabled +if [ "${MULTI_USER}" == "true" ]; then + echo "Installing Istio..." + kubectl apply -k https://github.com/kubeflow/manifests//common/istio/istio-crds/base?ref=master + kubectl apply -k https://github.com/kubeflow/manifests//common/istio/istio-namespace/base?ref=master + kubectl apply -k https://github.com/kubeflow/manifests//common/istio/istio-install/base?ref=master + echo "Waiting for all Istio Pods to become ready..." + kubectl wait --for=condition=Ready pods --all -n istio-system --timeout=300s + + echo "Deploying Metacontroller CRD..." + kubectl apply -f manifests/kustomize/third-party/metacontroller/base/crd.yaml + kubectl wait --for condition=established --timeout=30s crd/compositecontrollers.metacontroller.k8s.io + + echo "Installing Profile Controller Resources..." + kubectl apply -k https://github.com/kubeflow/manifests/applications/profiles/upstream/overlays/kubeflow?ref=master + kubectl -n kubeflow wait --for=condition=Ready pods -l kustomize.component=profiles --timeout 180s + + echo "Applying kubeflow-edit ClusterRole with proper aggregation..." + kubectl apply -f test/seaweedfs/kubeflow-edit-clusterrole.yaml + + echo "Creating KF Profile..." + kubectl apply -f test/seaweedfs/test-profiles.yaml + + echo "Applying network policy to allow user namespace access to kubeflow services..." + kubectl apply -f test/seaweedfs/allow-user-namespace-access.yaml + +fi + # Manifests will be deployed according to the flag provided if $CACHE_DISABLED; then TEST_MANIFESTS="${TEST_MANIFESTS}/overlays/cache-disabled" @@ -80,6 +128,10 @@ elif $USE_PROXY; then TEST_MANIFESTS="${TEST_MANIFESTS}/overlays/proxy" elif [ "${PIPELINES_STORE}" == "kubernetes" ]; then TEST_MANIFESTS="${TEST_MANIFESTS}/overlays/kubernetes-native" +elif [ "${MULTI_USER}" == "true" ] && [ "${STORAGE_BACKEND}" == "seaweedfs" ]; then + TEST_MANIFESTS="${TEST_MANIFESTS}/overlays/multi-user" +elif [ "${STORAGE_BACKEND}" == "minio" ]; then + TEST_MANIFESTS="${TEST_MANIFESTS}/overlays/no-proxy-minio" else TEST_MANIFESTS="${TEST_MANIFESTS}/overlays/no-proxy" fi @@ -101,6 +153,16 @@ then exit 1 fi +# Verify pipeline integration for multi-user mode +if [ "${MULTI_USER}" == "true" ]; then + echo "Verifying Pipeline Integration..." + KF_PROFILE=kubeflow-user-example-com + if ! kubectl get secret mlpipeline-minio-artifact -n $KF_PROFILE > /dev/null 2>&1; then + echo "Error: Secret mlpipeline-minio-artifact not found in namespace $KF_PROFILE" + fi + kubectl get secret mlpipeline-minio-artifact -n "$KF_PROFILE" -o json | jq -r '.data | keys[] as $k | "\($k): \(. | .[$k] | @base64d)"' | tr '\n' ' ' +fi + collect_artifacts kubeflow echo "Finished KFP deployment." diff --git a/.github/resources/scripts/free-disk-space.sh b/.github/resources/scripts/free-disk-space.sh index e546bf14547..3e149cba1ab 100755 --- a/.github/resources/scripts/free-disk-space.sh +++ b/.github/resources/scripts/free-disk-space.sh @@ -46,4 +46,4 @@ sudo rm -rf /var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots sudo systemctl start containerd || true echo "=== Final disk usage ===" -df -h \ No newline at end of file +df -h diff --git a/.github/workflows/e2e-seaweedfs-test.yml b/.github/workflows/e2e-seaweedfs-test.yml deleted file mode 100644 index 9b70a29568c..00000000000 --- a/.github/workflows/e2e-seaweedfs-test.yml +++ /dev/null @@ -1,147 +0,0 @@ -name: KFP SeaweedFS Integration Tests - -permissions: - contents: read - -on: - workflow_dispatch: - pull_request: - paths: - - '.github/workflows/e2e-seaweedfs-test.yml' - - 'manifests/kustomize/third-party/seaweedfs/**' - - 'test/seaweedfs/**' - - 'manifests/kustomize/base/installs/multi-user/pipelines-profile-controller' - - '!**/*.md' - -jobs: - seaweedfs-integration-tests: - runs-on: ubuntu-latest - strategy: - matrix: - k8s_version: [ "v1.29.2", "v1.31.0" ] - name: SeaweedFS Integration Tests - K8s ${{ matrix.k8s_version }} - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: 3.9 - - - name: Create KFP cluster with SeaweedFS - id: create-kfp-cluster - uses: ./.github/actions/kfp-cluster - with: - k8s_version: ${{ matrix.k8s_version }} - - - name: Free up disk space for CI - run: ./.github/resources/scripts/free-disk-space.sh - - - name: Install istio - id: install-istio - if: ${{ steps.create-kfp-cluster.outcome == 'success' }} - run: | - kubectl apply -k https://github.com/kubeflow/manifests//common/istio/istio-crds/base?ref=master - kubectl apply -k https://github.com/kubeflow/manifests//common/istio/istio-namespace/base?ref=master - kubectl apply -k https://github.com/kubeflow/manifests//common/istio/istio-install/base?ref=master - echo "Waiting for all Istio Pods to become ready..." - kubectl wait --for=condition=Ready pods --all -n istio-system --timeout 300s - - - name: Deploy Platform Agnostic Multi-User - id: deploy-platform-agnostic-multi-user - if: ${{ steps.install-istio.outcome == 'success' }} - run: | - kubectl apply -f manifests/kustomize/third-party/metacontroller/base/crd.yaml - kubectl apply --force -k manifests/kustomize/env/platform-agnostic-multi-user - echo "Waiting for Pods to be ready..." - kubectl wait --for=condition=Ready pods --all --namespace kubeflow --timeout=300s --field-selector=status.phase!=Succeeded || true - - - name: Install Profile Controller Resources - id: install-profile-resources - if: ${{ steps.deploy-platform-agnostic-multi-user.outcome == 'success' }} - run: | - echo "Installing Profile Controller resources..." - kubectl apply -k https://github.com/kubeflow/manifests/applications/profiles/upstream/overlays/kubeflow?ref=master - # Wait for profile controller to be ready - kubectl -n kubeflow wait --for=condition=Ready pods -l kustomize.component=profiles --timeout 180s || true - echo "Profile Controller resources installed successfully" - - - name: ClusterRole for User Permissions - id: fix-clusterrole - if: ${{ steps.install-profile-resources.outcome == 'success' }} - run: | - echo "Applying kubeflow-edit ClusterRole with proper aggregation..." - kubectl apply -f test/seaweedfs/kubeflow-edit-clusterrole.yaml - - - name: Create KF Profile - id: create-kf-profile - if: ${{ steps.fix-clusterrole.outcome == 'success' }} - run: | - echo "Creating KF Profile..." - kubectl apply -f test/seaweedfs/test-profiles.yaml - echo "KF Profile created successfully" - - - name: ApplyNetwork Policy for Cross-Namespace Access - id: fix-network-policy - if: ${{ steps.create-kf-profile.outcome == 'success' }} - run: | - echo "Applying network policy to allow user namespace access to kubeflow services..." - kubectl apply -f test/seaweedfs/allow-user-namespace-access.yaml - - - name: Verify Pipeline Integration - id: verify-pipeline-integration - if: ${{ steps.fix-network-policy.outcome == 'success' }} - run: | - KF_PROFILE=kubeflow-user-example-com - if ! kubectl get secret mlpipeline-minio-artifact -n $KF_PROFILE > /dev/null 2>&1; then - echo "Error: Secret mlpipeline-minio-artifact not found in namespace $KF_PROFILE" - fi - kubectl get secret mlpipeline-minio-artifact -n "$KF_PROFILE" -o json | jq -r '.data | keys[] as $k | "\($k): \(. | .[$k] | @base64d)"' | tr '\n' ' ' - - - name: Forward API port - id: forward-api-port - if: ${{ steps.verify-pipeline-integration.outcome == 'success' }} - run: | - echo "Starting port-forward to ml-pipeline service..." - kubectl port-forward svc/ml-pipeline -n kubeflow 8080:8888 & - - - name: Test Pipeline V1 API with SeaweedFS - id: test-v1-api - if: ${{ steps.forward-api-port.outcome == 'success' }} - run: | - pip3 install "kfp>=1.8.22,<2.0.0" - KF_PROFILE=kubeflow-user-example-com - TOKEN="$(kubectl -n $KF_PROFILE create token default-editor --audience=pipelines.kubeflow.org)" - python3 test/seaweedfs/test_pipeline_v1_seaweedfs.py "$TOKEN" "$KF_PROFILE" - continue-on-error: true - - - name: Test Pipeline V2 API with SeaweedFS - id: test-v2-api - if: ${{ steps.test-v1-api.outcome == 'success' }} - run: | - pip3 install kfp==2.13.0 - KF_PROFILE=kubeflow-user-example-com - TOKEN="$(kubectl -n $KF_PROFILE create token default-editor --audience=pipelines.kubeflow.org)" - python3 test/seaweedfs/test_pipeline_v2_seaweedfs.py run_pipeline "$TOKEN" "$KF_PROFILE" - continue-on-error: true - - - name: Test SeaweedFS Namespace Isolation - id: test-namespace-isolation - if: ${{ steps.test-v2-api.outcome == 'success' }} - run: ./test/seaweedfs/namespace_isolation_test.sh - continue-on-error: true - - - name: Collect failed logs - if: ${{ steps.create-kfp-cluster.outcome != 'success' || steps.install-istio.outcome != 'success' || steps.install-profile-resources.outcome != 'success' || steps.fix-clusterrole.outcome != 'success' || steps.fix-network-policy.outcome != 'success' || steps.verify-pipeline-integration.outcome != 'success' || steps.forward-api-port.outcome != 'success' || steps.test-v1-api.outcome != 'success' || steps.test-v2-api.outcome != 'success' || steps.test-namespace-isolation.outcome != 'success' }} - run: | - ./.github/resources/scripts/collect-logs.sh --ns kubeflow --output /tmp/tmp_kubeflow_pod_log.txt - ./.github/resources/scripts/collect-logs.sh --ns kubeflow-user-example-com --output /tmp/tmp_user_pod_log.txt - exit 1 - - - name: Collect test results - if: always() - uses: actions/upload-artifact@v4 - with: - name: kfp-seaweedfs-tests-artifacts-k8s-${{ matrix.k8s_version }} - path: /tmp/tmp*/* diff --git a/.github/workflows/e2e-test.yml b/.github/workflows/e2e-test.yml index ee9b41d1334..40d9fa0d649 100644 --- a/.github/workflows/e2e-test.yml +++ b/.github/workflows/e2e-test.yml @@ -222,9 +222,6 @@ jobs: image_registry: ${{ needs.build.outputs.IMAGE_REGISTRY }} continue-on-error: true - - name: Free up disk space - run: ./.github/resources/scripts/free-disk-space.sh - - name: Forward API port id: forward-api-port if: ${{ steps.create-kfp-cluster.outcome == 'success' }} diff --git a/.github/workflows/kfp-samples.yml b/.github/workflows/kfp-samples.yml index 8de79c148fc..a44a77befd4 100644 --- a/.github/workflows/kfp-samples.yml +++ b/.github/workflows/kfp-samples.yml @@ -25,12 +25,28 @@ jobs: needs: build strategy: matrix: - k8s_version: [ "v1.29.2", "v1.31.0" ] - name: KFP Samples - K8s ${{ matrix.k8s_version }} + include: + - k8s_version: "v1.29.2" + mode: "multi-user" + storage: "seaweedfs" + - k8s_version: "v1.29.2" + mode: "standalone" + storage: "seaweedfs" + + - k8s_version: "v1.31.0" + mode: "multi-user" + storage: "seaweedfs" + - k8s_version: "v1.31.0" + mode: "standalone" + storage: "minio" + - k8s_version: "v1.31.0" + mode: "standalone" + storage: "seaweedfs" + name: KFP Samples - K8s ${{ matrix.k8s_version }} - ${{ matrix.mode }} - ${{ matrix.storage }} steps: - - name: Checkout code - uses: actions/checkout@v5 + - name: Checkout code + uses: actions/checkout@v5 - name: Free up disk space run: ./.github/resources/scripts/free-disk-space.sh @@ -69,54 +85,85 @@ jobs: working-directory: ./kubernetes_platform run: make python && pip install python/dist/*.whl - - name: Create KFP cluster - id: create-kfp-cluster - uses: ./.github/actions/kfp-cluster - with: - k8s_version: ${{ matrix.k8s_version }} - image_path: ${{ needs.build.outputs.IMAGE_PATH }} - image_tag: ${{ needs.build.outputs.IMAGE_TAG }} - image_registry: ${{ needs.build.outputs.IMAGE_REGISTRY }} - continue-on-error: true - - - name: Build and upload the sample Modelcar image to Kind - id: build-sample-modelcar-image - if: ${{ steps.create-kfp-cluster.outcome == 'success' }} - run: | - docker build -f samples/v2/modelcar/Dockerfile -t registry.domain.local/modelcar:test . - kind --name kfp load docker-image registry.domain.local/modelcar:test - continue-on-error: true - - - name: Forward API port - id: forward-api-port - if: ${{ steps.build-sample-modelcar-image.outcome == 'success' }} - run: ./.github/resources/scripts/forward-port.sh "kubeflow" "ml-pipeline" 8888 8888 - continue-on-error: true - - - name: Install protobuf dependencies & kfp-pipeline-spec - id: install-protobuf-deps - uses: ./.github/actions/protobuf - - - name: Install kfp & kfp-kubernetes from source - id: install-kfp-k8s-deps - uses: ./.github/actions/kfp-k8s - - - name: Run Samples Tests - id: tests - if: ${{ steps.forward-api-port.outcome == 'success' }} - run: | - python3 -u ./samples/v2/sample_test.py - continue-on-error: true - - - name: Collect failed logs - if: ${{ steps.create-kfp-cluster.outcome != 'success' || steps.forward-api-port.outcome != 'success' || steps.tests.outcome != 'success' }} - run: | - ./.github/resources/scripts/collect-logs.sh --ns kubeflow --output /tmp/tmp_pod_log.txt - exit 1 - - - name: Collect test results - if: always() - uses: actions/upload-artifact@v4 - with: - name: kfp-samples-tests-artifacts-k8s-${{ matrix.k8s_version }} - path: /tmp/tmp*/* \ No newline at end of file + - name: Create KFP cluster + id: create-kfp-cluster + uses: ./.github/actions/kfp-cluster + with: + k8s_version: ${{ matrix.k8s_version }} + image_path: ${{ needs.build.outputs.IMAGE_PATH }} + image_tag: ${{ needs.build.outputs.IMAGE_TAG }} + image_registry: ${{ needs.build.outputs.IMAGE_REGISTRY }} + continue-on-error: true + + - name: Create KFP cluster + id: create-kfp-cluster + uses: ./.github/actions/kfp-cluster + with: + k8s_version: ${{ matrix.k8s_version }} + multi_user: ${{ matrix.mode == 'multi-user' }} + storage_backend: ${{ matrix.storage }} + continue-on-error: true + + - name: Build and upload the sample Modelcar image to Kind + id: build-sample-modelcar-image + if: ${{ steps.create-kfp-cluster.outcome == 'success' }} + run: | + docker build -f samples/v2/modelcar/Dockerfile -t registry.domain.local/modelcar:test . + kind --name kfp load docker-image registry.domain.local/modelcar:test + continue-on-error: true + + - name: Forward API port + id: forward-api-port + if: ${{ steps.build-sample-modelcar-image.outcome == 'success' }} + run: ./.github/resources/scripts/forward-port.sh "kubeflow" "ml-pipeline" 8888 8888 + continue-on-error: true + + - name: Install protobuf dependencies & kfp-pipeline-spec + id: install-protobuf-deps + uses: ./.github/actions/protobuf + + - name: Install kfp & kfp-kubernetes from source + id: install-kfp-k8s-deps + uses: ./.github/actions/kfp-k8s + + - name: Run Samples Tests + id: tests + if: ${{ steps.forward-api-port.outcome == 'success' }} + run: | + python3 -u ./samples/v2/sample_test.py + continue-on-error: true + + - name: Collect failed logs + if: ${{ steps.create-kfp-cluster.outcome != 'success' || steps.forward-api-port.outcome != 'success' || steps.tests.outcome != 'success' }} + run: | + ./.github/resources/scripts/collect-logs.sh --ns kubeflow --output /tmp/tmp_pod_log.txt + exit 1 + + - name: Collect test results + if: always() + uses: actions/upload-artifact@v4 + with: + name: kfp-samples-tests-artifacts-k8s-${{ matrix.k8s_version }} + path: /tmp/tmp*/* + + - name: Test SeaweedFS Namespace Isolation + id: test-namespace-isolation + if: ${{ matrix.mode == 'multi-user' && matrix.storage == 'seaweedfs' && steps.tests.outcome == 'success' }} + run: ./test/seaweedfs/namespace_isolation_test.sh + continue-on-error: true + + - name: Collect failed logs + if: ${{ steps.create-kfp-cluster.outcome != 'success' || steps.forward-api-port.outcome != 'success' || steps.tests.outcome != 'success' }} + run: | + ./.github/resources/scripts/collect-logs.sh --ns kubeflow --output /tmp/tmp_pod_log.txt + if [ "${{ matrix.mode }}" == "multi-user" ]; then + ./.github/resources/scripts/collect-logs.sh --ns kubeflow-user-example-com --output /tmp/tmp_user_pod_log.txt + fi + exit 1 + + - name: Collect test results + if: always() + uses: actions/upload-artifact@v4 + with: + name: kfp-samples-tests-artifacts-k8s-${{ matrix.k8s_version }}-${{ matrix.mode }}-${{ matrix.storage }} + path: /tmp/tmp*/* diff --git a/backend/Dockerfile b/backend/Dockerfile index b7f277ff197..f93fd74a34f 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -51,7 +51,7 @@ RUN set -e; \ < /samples/sample_config.json jq ".pipelines[].file" --raw-output | while read pipeline_yaml; do \ pipeline_py="${pipeline_yaml%.yaml}"; \ echo "Compiling: \"$pipeline_py\"" && python3 "$pipeline_py" && echo -n "Output: " && ls "$pipeline_py.yaml"; \ -done + done # 3. Start api web server FROM debian:stable diff --git a/backend/metadata_writer/src/metadata_helpers.py b/backend/metadata_writer/src/metadata_helpers.py index a0f3dd50e3b..e6a1da804b5 100644 --- a/backend/metadata_writer/src/metadata_helpers.py +++ b/backend/metadata_writer/src/metadata_helpers.py @@ -32,10 +32,8 @@ def value_to_mlmd_value(value) -> metadata_store_pb2.Value: def connect_to_mlmd() -> metadata_store.MetadataStore: - metadata_service_host = os.environ.get( - 'METADATA_GRPC_SERVICE_SERVICE_HOST', 'metadata-grpc-service') - metadata_service_port = int(os.environ.get( - 'METADATA_GRPC_SERVICE_SERVICE_PORT', 8080)) + metadata_service_host = "metadata-grpc-service.kubeflow" + metadata_service_port = 8080 mlmd_connection_config = metadata_store_pb2.MetadataStoreClientConfig( host="[{}]".format(metadata_service_host) if isIPv6(metadata_service_host) else metadata_service_host, diff --git a/backend/src/apiserver/client_manager/client_manager.go b/backend/src/apiserver/client_manager/client_manager.go index 0d2f9e5d3a8..7ad8a3f64ac 100644 --- a/backend/src/apiserver/client_manager/client_manager.go +++ b/backend/src/apiserver/client_manager/client_manager.go @@ -18,7 +18,6 @@ import ( "context" "database/sql" "fmt" - "os" "strings" "sync" "time" @@ -43,13 +42,6 @@ import ( ) const ( - minioServiceHost = "MINIO_SERVICE_SERVICE_HOST" - minioServicePort = "MINIO_SERVICE_SERVICE_PORT" - minioServiceRegion = "MINIO_SERVICE_REGION" - minioServiceSecure = "MINIO_SERVICE_SECURE" - pipelineBucketName = "MINIO_PIPELINE_BUCKET_NAME" - pipelinePath = "MINIO_PIPELINE_PATH" - mysqlServiceHost = "DBConfig.MySQLConfig.Host" mysqlServicePort = "DBConfig.MySQLConfig.Port" mysqlUser = "DBConfig.MySQLConfig.User" @@ -625,18 +617,14 @@ func initDBDriver(driverName string, initConnectionTimeout time.Duration) string func initMinioClient(ctx context.Context, initConnectionTimeout time.Duration) storage.ObjectStoreInterface { // Create minio client. - minioServiceHost := common.GetStringConfigWithDefault( - "ObjectStoreConfig.Host", os.Getenv(minioServiceHost)) - minioServicePort := common.GetStringConfigWithDefault( - "ObjectStoreConfig.Port", os.Getenv(minioServicePort)) - minioServiceRegion := common.GetStringConfigWithDefault( - "ObjectStoreConfig.Region", os.Getenv(minioServiceRegion)) - minioServiceSecure := common.GetBoolConfigWithDefault( - "ObjectStoreConfig.Secure", common.GetBoolFromStringWithDefault(os.Getenv(minioServiceSecure), false)) + minioServiceHost := common.GetStringConfigWithDefault("ObjectStoreConfig.Host", "") + minioServicePort := common.GetStringConfigWithDefault("ObjectStoreConfig.Port", "") + minioServiceRegion := common.GetStringConfigWithDefault("ObjectStoreConfig.Region", "") + minioServiceSecure := common.GetBoolConfigWithDefault("ObjectStoreConfig.Secure", false) accessKey := common.GetStringConfigWithDefault("ObjectStoreConfig.AccessKey", "") secretKey := common.GetStringConfigWithDefault("ObjectStoreConfig.SecretAccessKey", "") - bucketName := common.GetStringConfigWithDefault("ObjectStoreConfig.BucketName", os.Getenv(pipelineBucketName)) - pipelinePath := common.GetStringConfigWithDefault("ObjectStoreConfig.PipelinePath", os.Getenv(pipelinePath)) + bucketName := common.GetStringConfigWithDefault("ObjectStoreConfig.BucketName", "") + pipelinePath := common.GetStringConfigWithDefault("ObjectStoreConfig.PipelinePath", "") disableMultipart := common.GetBoolConfigWithDefault("ObjectStoreConfig.Multipart.Disable", true) minioClient := client.CreateMinioClientOrFatal(minioServiceHost, minioServicePort, accessKey, diff --git a/backend/src/apiserver/config/proxy/config.go b/backend/src/apiserver/config/proxy/config.go index ec4bd2dfaf2..f94748e6119 100644 --- a/backend/src/apiserver/config/proxy/config.go +++ b/backend/src/apiserver/config/proxy/config.go @@ -23,7 +23,7 @@ const ( HttpProxyEnv = "HTTP_PROXY" HttpsProxyEnv = "HTTPS_PROXY" NoProxyEnv = "NO_PROXY" - defaultNoProxyValue = "localhost,127.0.0.1,.svc.cluster.local,kubernetes.default.svc,metadata-grpc-service,0,1,2,3,4,5,6,7,8,9" + defaultNoProxyValue = "localhost,127.0.0.1,.svc.cluster.local,kubernetes.default.svc,minio-service.kubeflow,metadata-grpc-service,metadata-grpc-service.kubeflow,ml-pipeline.kubeflow" ) type Config interface { diff --git a/backend/src/v2/cacheutils/cache.go b/backend/src/v2/cacheutils/cache.go index 9ca188af996..f5e5a86205b 100644 --- a/backend/src/v2/cacheutils/cache.go +++ b/backend/src/v2/cacheutils/cache.go @@ -17,6 +17,7 @@ import ( "github.com/kubeflow/pipelines/api/v2alpha1/go/cachekey" "github.com/kubeflow/pipelines/api/v2alpha1/go/pipelinespec" api "github.com/kubeflow/pipelines/backend/api/v1beta1/go_client" + "github.com/kubeflow/pipelines/backend/src/apiserver/config/proxy" ) const ( @@ -88,6 +89,12 @@ func NewClient(cacheDisabled bool) (Client, error) { } func cacheDefaultEndpoint() string { + // If proxy is enabled, use DNS name `ml-pipeline.kubeflow:8887` as default. + _, isHttpProxySet := os.LookupEnv(proxy.HttpProxyEnv) + _, isHttpsProxySet := os.LookupEnv(proxy.HttpsProxyEnv) + if isHttpProxySet || isHttpsProxySet { + return defaultKfpApiEndpoint + } // Discover ml-pipeline in the same namespace by env var. // https://kubernetes.io/docs/concepts/services-networking/service/#environment-variables cacheHost := os.Getenv("ML_PIPELINE_SERVICE_HOST") diff --git a/backend/src/v2/metadata/env.go b/backend/src/v2/metadata/env.go index 9d5b2e8eb10..5c26f74f85e 100644 --- a/backend/src/v2/metadata/env.go +++ b/backend/src/v2/metadata/env.go @@ -1,6 +1,15 @@ package metadata -import "os" +import ( + "os" + + "github.com/kubeflow/pipelines/backend/src/apiserver/config/proxy" +) + +const ( + metadataGrpcServiceAddress = "metadata-grpc-service.kubeflow" + metadataGrpcServicePort = "8080" +) type ServerConfig struct { Address string @@ -8,6 +17,15 @@ type ServerConfig struct { } func DefaultConfig() *ServerConfig { + // If proxy is enabled, use DNS name `metadata-grpc-service.kubeflow:8080` as default. + _, isHttpProxySet := os.LookupEnv(proxy.HttpProxyEnv) + _, isHttpsProxySet := os.LookupEnv(proxy.HttpsProxyEnv) + if isHttpProxySet || isHttpsProxySet { + return &ServerConfig{ + Address: metadataGrpcServiceAddress, + Port: metadataGrpcServicePort, + } + } // The env vars exist when metadata-grpc-service Kubernetes service is // in the same namespace as the current Pod. // https://kubernetes.io/docs/concepts/services-networking/service/#environment-variables @@ -20,7 +38,7 @@ func DefaultConfig() *ServerConfig { } } return &ServerConfig{ - Address: "metadata-grpc-service.kubeflow", - Port: "8080", + Address: metadataGrpcServiceAddress, + Port: metadataGrpcServicePort, } } diff --git a/backend/src/v2/objectstore/config.go b/backend/src/v2/objectstore/config.go index 602357b1864..d3e0d433764 100644 --- a/backend/src/v2/objectstore/config.go +++ b/backend/src/v2/objectstore/config.go @@ -25,6 +25,7 @@ import ( "strings" "github.com/golang/glog" + "github.com/kubeflow/pipelines/backend/src/apiserver/config/proxy" ) // The endpoint uses Kubernetes service DNS name with namespace: @@ -164,6 +165,12 @@ func ParseProviderFromPath(uri string) (string, error) { } func MinioDefaultEndpoint() string { + // If proxy is enabled, use DNS name `minio-service.kubeflow:9000` as default. + _, isHttpProxySet := os.LookupEnv(proxy.HttpProxyEnv) + _, isHttpsProxySet := os.LookupEnv(proxy.HttpsProxyEnv) + if isHttpProxySet || isHttpsProxySet { + return defaultMinioEndpointInMultiUserMode + } // Discover minio-service in the same namespace by env var. // https://kubernetes.io/docs/concepts/services-networking/service/#environment-variables minioHost := os.Getenv("MINIO_SERVICE_SERVICE_HOST") diff --git a/manifests/kustomize/base/pipeline/ml-pipeline-apiserver-deployment.yaml b/manifests/kustomize/base/pipeline/ml-pipeline-apiserver-deployment.yaml index 726d92eb6d2..1f535c5f3aa 100644 --- a/manifests/kustomize/base/pipeline/ml-pipeline-apiserver-deployment.yaml +++ b/manifests/kustomize/base/pipeline/ml-pipeline-apiserver-deployment.yaml @@ -44,6 +44,10 @@ spec: configMapKeyRef: name: pipeline-install-config key: bucketName + - name: OBJECTSTORECONFIG_HOST + value: "minio-service.kubeflow" + - name: OBJECTSTORECONFIG_PORT + value: "9000" # relic variables - name: DBCONFIG_USER valueFrom: diff --git a/manifests/kustomize/env/platform-agnostic-minio/kustomization.yaml b/manifests/kustomize/env/platform-agnostic-minio/kustomization.yaml new file mode 100644 index 00000000000..0a9d90b016e --- /dev/null +++ b/manifests/kustomize/env/platform-agnostic-minio/kustomization.yaml @@ -0,0 +1,21 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: +- ../../base/installs/generic +- ../../base/metadata/base +- ../../third-party/argo/installs/namespace +- ../../third-party/minio/base +- ../../third-party/mysql/base + +# Identifier for application manager to apply ownerReference. +# The ownerReference ensures the resources get garbage collected +# when application is deleted. + +# !!! If you want to customize the namespace, +# please also update base/cache-deployer/cluster-scoped/cache-deployer-clusterrolebinding.yaml +namespace: kubeflow +labels: +- includeSelectors: true + pairs: + application-crd-id: kubeflow-pipelines diff --git a/manifests/kustomize/third-party/minio/base/kustomization.yaml b/manifests/kustomize/third-party/minio/base/kustomization.yaml new file mode 100644 index 00000000000..5a9d5f37517 --- /dev/null +++ b/manifests/kustomize/third-party/minio/base/kustomization.yaml @@ -0,0 +1,9 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: kubeflow + +resources: +- minio-deployment.yaml +- minio-pvc.yaml +- minio-service.yaml +- mlpipeline-minio-artifact-secret.yaml diff --git a/manifests/kustomize/third-party/minio/base/minio-deployment.yaml b/manifests/kustomize/third-party/minio/base/minio-deployment.yaml new file mode 100644 index 00000000000..e42f4903d0a --- /dev/null +++ b/manifests/kustomize/third-party/minio/base/minio-deployment.yaml @@ -0,0 +1,63 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: minio + labels: + app: minio +spec: + selector: + matchLabels: + app: minio + strategy: + type: Recreate + template: + metadata: + labels: + app: minio + spec: + securityContext: + fsGroup: 1000 + fsGroupChangePolicy: "OnRootMismatch" + seccompProfile: + type: RuntimeDefault + containers: + - args: + - server + - /data + env: + - name: MINIO_ACCESS_KEY + valueFrom: + secretKeyRef: + name: mlpipeline-minio-artifact + key: accesskey + - name: MINIO_SECRET_KEY + valueFrom: + secretKeyRef: + name: mlpipeline-minio-artifact + key: secretkey + image: gcr.io/ml-pipeline/minio:RELEASE.2019-08-14T20-37-41Z-license-compliance + name: minio + ports: + - containerPort: 9000 + securityContext: + allowPrivilegeEscalation: false + seccompProfile: + type: RuntimeDefault + runAsNonRoot: true + runAsUser: 1000 + runAsGroup: 0 + capabilities: + drop: + - ALL + volumeMounts: + - mountPath: /data + name: data + subPath: minio + resources: + requests: + cpu: 20m + memory: 100Mi + volumes: + - name: data + persistentVolumeClaim: + claimName: minio-pvc diff --git a/manifests/kustomize/third-party/minio/base/minio-pvc.yaml b/manifests/kustomize/third-party/minio/base/minio-pvc.yaml new file mode 100644 index 00000000000..ecfa32bbe8a --- /dev/null +++ b/manifests/kustomize/third-party/minio/base/minio-pvc.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: minio-pvc +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 20Gi diff --git a/manifests/kustomize/third-party/minio/base/minio-service.yaml b/manifests/kustomize/third-party/minio/base/minio-service.yaml new file mode 100644 index 00000000000..3ab42043017 --- /dev/null +++ b/manifests/kustomize/third-party/minio/base/minio-service.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: Service +metadata: + name: minio-service +spec: + ports: + - name: http + port: 9000 + protocol: TCP + targetPort: 9000 + selector: + app: minio diff --git a/manifests/kustomize/third-party/minio/base/mlpipeline-minio-artifact-secret.yaml b/manifests/kustomize/third-party/minio/base/mlpipeline-minio-artifact-secret.yaml new file mode 100644 index 00000000000..ac298d9b3d0 --- /dev/null +++ b/manifests/kustomize/third-party/minio/base/mlpipeline-minio-artifact-secret.yaml @@ -0,0 +1,7 @@ +kind: Secret +apiVersion: v1 +metadata: + name: mlpipeline-minio-artifact +stringData: + accesskey: minio + secretkey: minio123 diff --git a/manifests/kustomize/third-party/minio/options/istio/istio-authorization-policy.yaml b/manifests/kustomize/third-party/minio/options/istio/istio-authorization-policy.yaml new file mode 100644 index 00000000000..7247cd34f96 --- /dev/null +++ b/manifests/kustomize/third-party/minio/options/istio/istio-authorization-policy.yaml @@ -0,0 +1,30 @@ +apiVersion: security.istio.io/v1beta1 +kind: AuthorizationPolicy +metadata: + name: minio-service +spec: + action: ALLOW + selector: + matchLabels: + app: minio + rules: + - from: + - source: + principals: + - cluster.local/ns/kubeflow/sa/ml-pipeline + - from: + - source: + principals: + - cluster.local/ns/kubeflow/sa/ml-pipeline-ui + # Allow traffic from User Pipeline Pods, which don't have a sidecar. + - {} +--- +apiVersion: "networking.istio.io/v1alpha3" +kind: DestinationRule +metadata: + name: ml-pipeline-minio +spec: + host: minio-service.kubeflow.svc.cluster.local + trafficPolicy: + tls: + mode: ISTIO_MUTUAL diff --git a/manifests/kustomize/third-party/minio/options/istio/kustomization.yaml b/manifests/kustomize/third-party/minio/options/istio/kustomization.yaml new file mode 100644 index 00000000000..611e399e57c --- /dev/null +++ b/manifests/kustomize/third-party/minio/options/istio/kustomization.yaml @@ -0,0 +1,5 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: +- istio-authorization-policy.yaml diff --git a/manifests/kustomize/third-party/seaweedfs/PSS_baseline_enable.sh b/manifests/kustomize/third-party/seaweedfs/PSS_baseline_enable.sh deleted file mode 100644 index ed8165fa2ac..00000000000 --- a/manifests/kustomize/third-party/seaweedfs/PSS_baseline_enable.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash -set -euo pipefail - -NAMESPACES=("istio-system" "auth" "cert-manager" "oauth2-proxy" "kubeflow" "knative-serving") - -for NAMESPACE in "${NAMESPACES[@]}"; do - if kubectl get namespace "$NAMESPACE" >/dev/null 2>&1; then - PATCH_OUTPUT=$(kubectl label namespace $NAMESPACE pod-security.kubernetes.io/enforce=baseline --overwrite 2>&1) - if echo "$PATCH_OUTPUT" | grep -q "violate the new PodSecurity"; then - echo "ERROR: PSS violation detected for namespace $NAMESPACE" - echo "$PATCH_OUTPUT" | grep -A 5 "violate the new PodSecurity" - exit 1 - else - echo "✅ Namespace '$NAMESPACE' labeled successfully." - fi - fi -done \ No newline at end of file diff --git a/manifests/kustomize/third-party/seaweedfs/PSS_restricted_enable.sh b/manifests/kustomize/third-party/seaweedfs/PSS_restricted_enable.sh deleted file mode 100644 index 166659ced74..00000000000 --- a/manifests/kustomize/third-party/seaweedfs/PSS_restricted_enable.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash -set -euo pipefail - -NAMESPACES=("istio-system" "auth" "cert-manager" "oauth2-proxy" "kubeflow" "knative-serving") -for NAMESPACE in "${NAMESPACES[@]}"; do - if kubectl get namespace "$NAMESPACE" >/dev/null 2>&1; then - PATCH_OUTPUT=$(kubectl label namespace $NAMESPACE pod-security.kubernetes.io/enforce=restricted --overwrite 2>&1) - if echo "$PATCH_OUTPUT" | grep -q "violate the new PodSecurity"; then - echo "WARNING: PSS violation detected for namespace $NAMESPACE" - echo "$PATCH_OUTPUT" | grep -A 5 "violate the new PodSecurity" - else - echo "✅ Namespace '$NAMESPACE' labeled successfully." - fi - fi -done \ No newline at end of file diff --git a/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-create-admin-user-job.yaml b/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-create-admin-user-job.yaml index eac8ed91a6d..0fcbfe00e2e 100644 --- a/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-create-admin-user-job.yaml +++ b/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-create-admin-user-job.yaml @@ -45,14 +45,25 @@ spec: exit 1 } wait_for_service "http://minio-service.kubeflow:9000/status" - exec /bin/echo "s3.bucket.create --name mlpipeline" | /usr/bin/weed shell - exec /bin/echo \ - "s3.configure -user kubeflow-admin \ + echo "Creating S3 bucket..." + echo "s3.bucket.create --name mlpipeline" | /usr/bin/weed shell > /dev/null 2>&1 + if [ $? -eq 0 ]; then + echo "Bucket created successfully" + else + echo "Failed to create bucket or bucket already exists" + fi + echo "Configuring S3 credentials..." + echo "s3.configure -user kubeflow-admin \ -access_key $accesskey \ -secret_key $secretkey \ -actions Admin \ - -apply" |\ - /usr/bin/weed shell + -apply" | /usr/bin/weed shell > /dev/null 2>&1 + if [ $? -eq 0 ]; then + echo "S3 credentials configured successfully" + else + echo "Failed to configure S3 credentials" + exit 1 + fi securityContext: # Using restricted profile allowPrivilegeEscalation: false privileged: false diff --git a/manifests/kustomize/third-party/seaweedfs/pipelines_swfs_install.sh b/manifests/kustomize/third-party/seaweedfs/pipelines_swfs_install.sh deleted file mode 100644 index e01abb67579..00000000000 --- a/manifests/kustomize/third-party/seaweedfs/pipelines_swfs_install.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash -set -euo pipefail -echo "Installing Pipelines ..." -kubectl apply -f apps/pipeline/upstream/third-party/metacontroller/base/crd.yaml -echo "Waiting for crd/compositecontrollers.metacontroller.k8s.io to be available ..." -kubectl wait --for condition=established --timeout=30s crd/compositecontrollers.metacontroller.k8s.io -kustomize build experimental/seaweedfs/istio | kubectl apply -f - -sleep 60 -kubectl wait --for=condition=Ready pods --all --all-namespaces --timeout=600s \ - --field-selector=status.phase!=Succeeded - -kubectl wait --for=condition=Available deployment/ml-pipeline -n kubeflow --timeout=10s -kubectl wait --for=condition=Available deployment/ml-pipeline-ui -n kubeflow --timeout=10s -kubectl wait --for=condition=Available deployment/ml-pipeline-persistenceagent -n kubeflow --timeout=10s -kubectl wait --for=condition=Available deployment/ml-pipeline-scheduledworkflow -n kubeflow --timeout=10s -kubectl wait --for=condition=Available deployment/ml-pipeline-viewer-crd -n kubeflow --timeout=10s -kubectl wait --for=condition=Available deployment/cache-server -n kubeflow --timeout=10s -kubectl wait --for=condition=Available deployment/metadata-writer -n kubeflow --timeout=10s -kubectl wait --for=condition=Available deployment/seaweedfs -n kubeflow --timeout=10s -kubectl wait --for=condition=Available deployment/mysql -n kubeflow --timeout=10s -kubectl get deployment -n kubeflow -l app=ml-pipeline diff --git a/manifests/kustomize/third-party/seaweedfs/port_forward_gateway.sh b/manifests/kustomize/third-party/seaweedfs/port_forward_gateway.sh deleted file mode 100644 index f12e25bd7cf..00000000000 --- a/manifests/kustomize/third-party/seaweedfs/port_forward_gateway.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -set -euxo pipefail - -GATEWAY_SERVICE=$(kubectl get svc -n istio-system -l app=istio-ingressgateway -o jsonpath='{.items[0].metadata.name}') -nohup kubectl port-forward -n istio-system svc/$GATEWAY_SERVICE 8080:80 & -timeout 60s bash -c 'until curl -s localhost:8080 > /dev/null || curl -s -I localhost:8080 | grep -q "HTTP/"; do sleep 5; done' \ No newline at end of file diff --git a/samples/v2/sample_test.py b/samples/v2/sample_test.py index b7ace771ebc..bf4049d9922 100644 --- a/samples/v2/sample_test.py +++ b/samples/v2/sample_test.py @@ -17,8 +17,14 @@ import inspect import os from pprint import pprint +import subprocess from typing import List import unittest +import uuid + +import collected_parameters +import component_with_optional_inputs +import hello_world import kfp from kfp.dsl.graph_component import GraphComponent from kubernetes import client @@ -85,6 +91,8 @@ def get_package_path(subdir: str) -> str: PREREQS = [os.path.join(PRE_REQ_DIR, 'test-secrets.yaml')] _KFP_NAMESPACE = os.getenv('KFP_NAMESPACE', 'kubeflow') +_KFP_MULTI_USER = os.getenv('KFP_MULTI_USER', 'false').lower() == 'true' +_USER_NAMESPACE = os.getenv('_USER_NAMESPACE', 'kubeflow-user-example-com') @dataclass @@ -146,28 +154,62 @@ def delete_k8s_yaml(namespace: str, yaml_file: str): print(f'Exception when deleting from YAML: {e}') +def get_auth_token(): + """Get authentication token for multi-user mode.""" + if _KFP_MULTI_USER: + try: + namespace = _USER_NAMESPACE + print(f'Creating authentication token for namespace {namespace}...') + result = subprocess.run([ + 'kubectl', '-n', namespace, 'create', 'token', 'default-editor', + '--audience=pipelines.kubeflow.org' + ], capture_output=True, text=True, check=True) + token = result.stdout.strip() + print('Successfully created authentication token.') + return token + except subprocess.CalledProcessError as e: + print(f'Failed to create authentication token: {e}') + print(f'stderr: {e.stderr}') + return None + return None + + class SampleTest(unittest.TestCase): _kfp_host_and_port = os.getenv('KFP_API_HOST_AND_PORT', 'http://localhost:8888') _kfp_ui_and_port = os.getenv('KFP_UI_HOST_AND_PORT', 'http://localhost:8080') - _client = kfp.Client(host=_kfp_host_and_port, ui_host=_kfp_ui_and_port) + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + # Initialize client with token if in multi-user mode + auth_token = get_auth_token() + if auth_token: + self._client = kfp.Client( + host=self._kfp_host_and_port, + ui_host=self._kfp_ui_and_port, + existing_token=auth_token + ) + else: + self._client = kfp.Client(host=self._kfp_host_and_port, ui_host=self._kfp_ui_and_port) @classmethod def setUpClass(cls): """Runs once before all tests.""" print('Deploying pre-requisites....') + target_namespace = _USER_NAMESPACE if _KFP_MULTI_USER else _KFP_NAMESPACE for p in PREREQS: - deploy_k8s_yaml(_KFP_NAMESPACE, p) + deploy_k8s_yaml(target_namespace, p) print('Done deploying pre-requisites.') @classmethod def tearDownClass(cls): """Runs once after all tests in this class.""" print('Cleaning up resources....') + target_namespace = _USER_NAMESPACE if _KFP_MULTI_USER else _KFP_NAMESPACE for p in PREREQS: - delete_k8s_yaml(_KFP_NAMESPACE, p) + delete_k8s_yaml(target_namespace, p) print('Done clean up.') def test(self): @@ -230,8 +272,11 @@ def run_test_case(self, pipeline_func: GraphComponent, timeout: int): print( f'Running pipeline: {inspect.getmodule(pipeline_func.pipeline_func).__name__}/{pipeline_func.name}.' ) + experiment_name = f"test-{pipeline_func.name}-{uuid.uuid4().hex[:8]}" run_result = self._client.create_run_from_pipeline_func( - pipeline_func=pipeline_func) + pipeline_func=pipeline_func, + namespace=_USER_NAMESPACE, + experiment_name=experiment_name) run_response = run_result.wait_for_run_completion(timeout) diff --git a/test/seaweedfs/kubeflow-edit-clusterrole.yaml b/test/seaweedfs/kubeflow-edit-clusterrole.yaml index 752855e15d7..bdf84eb8391 100644 --- a/test/seaweedfs/kubeflow-edit-clusterrole.yaml +++ b/test/seaweedfs/kubeflow-edit-clusterrole.yaml @@ -43,4 +43,4 @@ rules: - create - update - patch - - delete \ No newline at end of file + - delete diff --git a/test/seaweedfs/test_pipeline_v1_seaweedfs.py b/test/seaweedfs/test_pipeline_v1_seaweedfs.py deleted file mode 100755 index 61ab4ce35b8..00000000000 --- a/test/seaweedfs/test_pipeline_v1_seaweedfs.py +++ /dev/null @@ -1,57 +0,0 @@ -#!/usr/bin/env python3 - -import kfp -import sys -import time - -def hello_world_op(): - from kfp.components import func_to_container_op - - def hello_world(): - print("Hello World from Kubeflow Pipelines V1!") - return "Hello World" - - return func_to_container_op(hello_world) - -def hello_world_pipeline(): - hello_op = hello_world_op() - hello_op() - -def run_v1_pipeline(token, namespace): - client = kfp.Client(host="http://localhost:8080", existing_token=token) - print(f"Successfully connected to KFP server") - - experiment = client.create_experiment("v1-pipeline-test", namespace=namespace) - print(f"Created experiment: v1-pipeline-test in namespace {namespace}") - - pipeline_run = client.create_run_from_pipeline_func( - hello_world_pipeline, - experiment_name=experiment.name, - run_name="v1-hello-world", - namespace=namespace, - arguments={} - ) - print(f"Pipeline run submitted with ID: {pipeline_run.run_id}") - - for iteration in range(15): - pipeline_status = client.get_run(pipeline_run.run_id).run.status - print(f"Pipeline status: {pipeline_status}") - - if pipeline_status == "Succeeded": - print("✅ V1 Pipeline completed successfully!") - return - elif pipeline_status not in ["Running", "Pending"]: - print(f"Pipeline failed with status: {pipeline_status}") - sys.exit(1) - - time.sleep(10) - - print("Pipeline did not complete within expected time") - sys.exit(1) - -if __name__ == "__main__": - if len(sys.argv) != 3: - sys.exit(1) - - run_v1_pipeline(sys.argv[1], sys.argv[2]) - diff --git a/test/seaweedfs/test_pipeline_v2_seaweedfs.py b/test/seaweedfs/test_pipeline_v2_seaweedfs.py deleted file mode 100755 index 5be99eb1c11..00000000000 --- a/test/seaweedfs/test_pipeline_v2_seaweedfs.py +++ /dev/null @@ -1,96 +0,0 @@ -#!/usr/bin/env python3 - -import kfp -import sys -import time -from kfp import dsl -from kfp_server_api.exceptions import ApiException - - -@dsl.component -def hello_world_op() -> str: - print("Hello World from Kubeflow Pipelines V2!") - return "Hello World" - - -@dsl.pipeline( - name="hello-world-v2", - description="A very simple hello world pipeline" -) -def hello_world_pipeline(): - hello_world_op() - - -def run_pipeline(token, namespace): - client = kfp.Client(host="http://localhost:8080", existing_token=token) - - try: - pipelines = client.list_pipelines() - print(f"Successfully connected to KFP server, found {len(pipelines.pipelines)} pipelines") - - experiment = client.create_experiment("v2-pipeline-test", namespace=namespace) - print(f"Created experiment: v2-pipeline-test in namespace {namespace}") - - run = client.create_run_from_pipeline_func( - pipeline_func=hello_world_pipeline, - experiment_name="v2-pipeline-test", - run_name="v2-test-run", - arguments={}, - namespace=namespace - ) - - run_id = run.run_id - - for _ in range(30): - status = client.get_run(run_id=run_id).state - - if status == "SUCCEEDED": - return - elif status not in ["PENDING", "RUNNING"]: - print(f"Pipeline failed with status: {status}") - - pods = client._get_k8s_client().list_namespaced_pod( - namespace=namespace, - label_selector=f"pipeline/runid={run_id}" - ) - - print(f"Found {len(pods.items)} pods for this run") - for pod in pods.items: - print(f"Pod {pod.metadata.name}: {pod.status.phase}") - - sys.exit(1) - - time.sleep(10) - - sys.exit(1) - - except Exception as exception: - print(f"Error in pipeline execution: {exception}") - sys.exit(1) - - -def test_unauthorized_access(token, namespace): - client = kfp.Client(host="http://localhost:8080", existing_token=token) - - try: - pipeline = client.list_runs(namespace=namespace) - sys.exit(1) - except ApiException as exception: - if exception.status != 403: - sys.exit(1) - - -if __name__ == "__main__": - if len(sys.argv) < 3: - sys.exit(1) - - action = sys.argv[1] - token = sys.argv[2] - namespace = sys.argv[3] - - if action == "run_pipeline": - run_pipeline(token, namespace) - elif action == "test_unauthorized_access": - test_unauthorized_access(token, namespace) - else: - sys.exit(1) From 23bb2050aab64a88d8cd6aa9cdf1c1c41e04f366 Mon Sep 17 00:00:00 2001 From: Harshvir Potpose Date: Mon, 4 Aug 2025 14:27:07 +0530 Subject: [PATCH 11/16] chore: remove minio gcp gateway (#12068) * rm env/gcp Signed-off-by: Harshvir Potpose * fix Signed-off-by: Harshvir Potpose * fix Signed-off-by: Harshvir Potpose * rm inverse proxy Signed-off-by: Harshvir Potpose * add back cloudsql proxy and inverse proxy Signed-off-by: Harshvir Potpose * fix Signed-off-by: Harshvir Potpose * fix Signed-off-by: Harshvir Potpose --------- Signed-off-by: Harshvir Potpose --- .../scripts/kfp-readiness/wait_for_pods.py | 6 +-- manifests/kustomize/README.md | 18 +------ .../kustomize/env/gcp/kustomization.yaml | 1 - .../gcp/minio-gcs-gateway/kustomization.yaml | 15 ------ .../minio-artifact-secret.env | 2 - .../minio-gcs-gateway-deployment.yaml | 47 ------------------- .../minio-gcs-gateway-sa.yaml | 4 -- .../minio-gcs-gateway-service.yaml | 11 ----- 8 files changed, 4 insertions(+), 100 deletions(-) delete mode 100644 manifests/kustomize/env/gcp/minio-gcs-gateway/kustomization.yaml delete mode 100644 manifests/kustomize/env/gcp/minio-gcs-gateway/minio-artifact-secret.env delete mode 100644 manifests/kustomize/env/gcp/minio-gcs-gateway/minio-gcs-gateway-deployment.yaml delete mode 100644 manifests/kustomize/env/gcp/minio-gcs-gateway/minio-gcs-gateway-sa.yaml delete mode 100644 manifests/kustomize/env/gcp/minio-gcs-gateway/minio-gcs-gateway-service.yaml diff --git a/.github/resources/scripts/kfp-readiness/wait_for_pods.py b/.github/resources/scripts/kfp-readiness/wait_for_pods.py index 198d5c67530..e05ac91805a 100644 --- a/.github/resources/scripts/kfp-readiness/wait_for_pods.py +++ b/.github/resources/scripts/kfp-readiness/wait_for_pods.py @@ -49,7 +49,7 @@ def get_pod_statuses(): def all_pods_ready(statuses): - def is_pod_ready(pod_name, pod_status, ready, total, waiting_messages): + def is_pod_ready(pod_status, ready, total): # Jobs/CronJobs are ready when they succeed if pod_status == 'Succeeded': return True @@ -58,8 +58,8 @@ def is_pod_ready(pod_name, pod_status, ready, total, waiting_messages): return True return False - return all(is_pod_ready(pod_name, pod_status, ready, total, waiting_messages) - for pod_name, (pod_status, ready, total, waiting_messages) in statuses.items()) + return all(is_pod_ready(pod_status, ready, total) + for _, (pod_status, ready, total, _) in statuses.items()) def print_get_pods(): diff --git a/manifests/kustomize/README.md b/manifests/kustomize/README.md index d0b7bfd1825..63cde27ddf2 100644 --- a/manifests/kustomize/README.md +++ b/manifests/kustomize/README.md @@ -1,6 +1,6 @@ # Install Kubeflow Pipelines Standalone using Kustomize Manifests -This folder contains [Kubeflow Pipelines Standalone](https://www.kubeflow.org/docs/components/pipelines/installation/standalone-deployment/) +This folder contains [Kubeflow Pipelines Standalone](https://www.kubeflow.org/docs/components/pipelines/installation/standalone-deployment/) Kustomize manifests. Kubeflow Pipelines Standalone is one option to install Kubeflow Pipelines. You can review all other options in @@ -40,20 +40,6 @@ Data: Application data are persisted in in-cluster PersistentVolumeClaim storage. -### (env/gcp) install on Google Cloud with Cloud Storage and Cloud SQL - -Cloud Storage and Cloud SQL are better for operating a production cluster. - -Refer to [Google Cloud Instructions](sample/README.md) for installation. - -### (env/aws) install on AWS with S3 and RDS MySQL - -S3 and RDS MySQL are better for operating a production cluster. - -Refer to [AWS Instructions](env/aws/README.md) for installation. - -Note: Community maintains a different opinionated installation manifests for AWS, refer to [e2fyi/kubeflow-aws](https://github.com/e2fyi/kubeflow-aws/tree/master/pipelines). - ## Uninstall If the installation is based on CloudSQL/GCS, after the uninstall, the data is still there, @@ -66,8 +52,6 @@ kubectl kustomize env/platform-agnostic | kubectl delete -f - # or kubectl kustomize env/dev | kubectl delete -f - # or -kubectl kustomize env/gcp | kubectl delete -f - -# or kubectl delete applications/pipeline -n kubeflow ### 2. cluster scoped diff --git a/manifests/kustomize/env/gcp/kustomization.yaml b/manifests/kustomize/env/gcp/kustomization.yaml index 99837f5769d..65fcea315b3 100644 --- a/manifests/kustomize/env/gcp/kustomization.yaml +++ b/manifests/kustomize/env/gcp/kustomization.yaml @@ -9,7 +9,6 @@ resources: - ../../base/metadata/base - ../../third-party/argo/installs/namespace - inverse-proxy -- minio-gcs-gateway - cloudsql-proxy diff --git a/manifests/kustomize/env/gcp/minio-gcs-gateway/kustomization.yaml b/manifests/kustomize/env/gcp/minio-gcs-gateway/kustomization.yaml deleted file mode 100644 index fa4d6df3e6b..00000000000 --- a/manifests/kustomize/env/gcp/minio-gcs-gateway/kustomization.yaml +++ /dev/null @@ -1,15 +0,0 @@ -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization - -resources: -- minio-gcs-gateway-deployment.yaml -- minio-gcs-gateway-sa.yaml -- minio-gcs-gateway-service.yaml - -secretGenerator: -- envs: - - minio-artifact-secret.env - name: mlpipeline-minio-artifact -generatorOptions: - # mlpipeline-minio-artifact needs to be referred by exact name - disableNameSuffixHash: true diff --git a/manifests/kustomize/env/gcp/minio-gcs-gateway/minio-artifact-secret.env b/manifests/kustomize/env/gcp/minio-gcs-gateway/minio-artifact-secret.env deleted file mode 100644 index bc8613ce2a0..00000000000 --- a/manifests/kustomize/env/gcp/minio-gcs-gateway/minio-artifact-secret.env +++ /dev/null @@ -1,2 +0,0 @@ -accesskey=minio -secretkey=minio123 diff --git a/manifests/kustomize/env/gcp/minio-gcs-gateway/minio-gcs-gateway-deployment.yaml b/manifests/kustomize/env/gcp/minio-gcs-gateway/minio-gcs-gateway-deployment.yaml deleted file mode 100644 index bb85c95eb0d..00000000000 --- a/manifests/kustomize/env/gcp/minio-gcs-gateway/minio-gcs-gateway-deployment.yaml +++ /dev/null @@ -1,47 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: minio - labels: - app: minio -spec: - selector: - matchLabels: - app: minio - strategy: - type: Recreate - template: - metadata: - labels: - app: minio - spec: - serviceAccountName: kubeflow-pipelines-minio-gcs-gateway - containers: - - name: minio - image: gcr.io/ml-pipeline/minio:RELEASE.2019-08-14T20-37-41Z-license-compliance - args: - - gateway - - gcs - - $(GCP_PROJECT_ID) - env: - - name: GCP_PROJECT_ID - valueFrom: - configMapKeyRef: - name: pipeline-install-config - key: gcsProjectId - - name: MINIO_ACCESS_KEY - valueFrom: - secretKeyRef: - name: mlpipeline-minio-artifact - key: accesskey - - name: MINIO_SECRET_KEY - valueFrom: - secretKeyRef: - name: mlpipeline-minio-artifact - key: secretkey - resources: - requests: - cpu: 20m - memory: 25Mi - ports: - - containerPort: 9000 diff --git a/manifests/kustomize/env/gcp/minio-gcs-gateway/minio-gcs-gateway-sa.yaml b/manifests/kustomize/env/gcp/minio-gcs-gateway/minio-gcs-gateway-sa.yaml deleted file mode 100644 index 2aa4f937685..00000000000 --- a/manifests/kustomize/env/gcp/minio-gcs-gateway/minio-gcs-gateway-sa.yaml +++ /dev/null @@ -1,4 +0,0 @@ -apiVersion: v1 -kind: ServiceAccount -metadata: - name: kubeflow-pipelines-minio-gcs-gateway diff --git a/manifests/kustomize/env/gcp/minio-gcs-gateway/minio-gcs-gateway-service.yaml b/manifests/kustomize/env/gcp/minio-gcs-gateway/minio-gcs-gateway-service.yaml deleted file mode 100644 index 7dd18174965..00000000000 --- a/manifests/kustomize/env/gcp/minio-gcs-gateway/minio-gcs-gateway-service.yaml +++ /dev/null @@ -1,11 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: minio-service -spec: - ports: - - port: 9000 - targetPort: 9000 - protocol: TCP - selector: - app: minio \ No newline at end of file From 3d4c2f15dede58c862e3de262d65b673e5fc604f Mon Sep 17 00:00:00 2001 From: Harshvir Potpose Date: Wed, 6 Aug 2025 20:47:19 +0530 Subject: [PATCH 12/16] Fix samples-test workflow (#12107) * fix Signed-off-by: Harshvir Potpose * fix Signed-off-by: Harshvir Potpose * fix Signed-off-by: Harshvir Potpose * update pvc Signed-off-by: Harshvir Potpose * free-up more disk space Signed-off-by: Harshvir Potpose * rm debug Signed-off-by: Harshvir Potpose --------- Signed-off-by: Harshvir Potpose --- .github/workflows/kfp-samples.yml | 37 ++++++------------------------- samples/v2/sample_test.py | 4 ---- 2 files changed, 7 insertions(+), 34 deletions(-) diff --git a/.github/workflows/kfp-samples.yml b/.github/workflows/kfp-samples.yml index a44a77befd4..fc8fd7bcef7 100644 --- a/.github/workflows/kfp-samples.yml +++ b/.github/workflows/kfp-samples.yml @@ -49,42 +49,17 @@ jobs: uses: actions/checkout@v5 - name: Free up disk space - run: ./.github/resources/scripts/free-disk-space.sh + run: | + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + docker system prune -af --volumes + docker image prune -af + ./.github/resources/scripts/free-disk-space.sh - name: Set up Python uses: actions/setup-python@v4 with: python-version: 3.9 - - name: apt-get update - run: sudo apt-get update - - - name: Install protobuf-compiler - run: sudo apt-get install protobuf-compiler -y - - - name: Install setuptools - run: | - pip3 install setuptools - pip3 freeze - - - name: Install Wheel - run: pip3 install wheel==0.42.0 - - - name: Install protobuf - run: pip3 install protobuf==4.25.3 - - - name: Generate API proto files - working-directory: ./api - run: make python - - - name: Install kfp-pipeline-spec from source - run: | - python3 -m pip install api/v2alpha1/python - - - name: Generate, Build, and Install Kubernetes API proto files & packages - working-directory: ./kubernetes_platform - run: make python && pip install python/dist/*.whl - - name: Create KFP cluster id: create-kfp-cluster uses: ./.github/actions/kfp-cluster @@ -129,6 +104,8 @@ jobs: - name: Run Samples Tests id: tests if: ${{ steps.forward-api-port.outcome == 'success' }} + env: + KFP_MULTI_USER: ${{ matrix.mode == 'multi-user' }} run: | python3 -u ./samples/v2/sample_test.py continue-on-error: true diff --git a/samples/v2/sample_test.py b/samples/v2/sample_test.py index bf4049d9922..35df00bbf8b 100644 --- a/samples/v2/sample_test.py +++ b/samples/v2/sample_test.py @@ -21,10 +21,6 @@ from typing import List import unittest import uuid - -import collected_parameters -import component_with_optional_inputs -import hello_world import kfp from kfp.dsl.graph_component import GraphComponent from kubernetes import client From fd3d1098df5bb62f6d49b59fdea832072e273a01 Mon Sep 17 00:00:00 2001 From: Julius von Kohout <45896133+juliusvonkohout@users.noreply.github.com> Date: Wed, 6 Aug 2025 17:31:50 +0200 Subject: [PATCH 13/16] Update manifests/kustomize/third-party/seaweedfs/OWNERS Co-authored-by: pschoen-itsc Signed-off-by: Julius von Kohout <45896133+juliusvonkohout@users.noreply.github.com> --- manifests/kustomize/third-party/seaweedfs/OWNERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/manifests/kustomize/third-party/seaweedfs/OWNERS b/manifests/kustomize/third-party/seaweedfs/OWNERS index 82967fbf54a..11a710e8e5b 100644 --- a/manifests/kustomize/third-party/seaweedfs/OWNERS +++ b/manifests/kustomize/third-party/seaweedfs/OWNERS @@ -1,6 +1,5 @@ approvers: -# - pschoen-itsc - juliusvonkohout reviewers: -# - pschoen-itsc + - pschoen-itsc - juliusvonkohout From 75375fdffd70c940dc1b382286c6a5562df3fe49 Mon Sep 17 00:00:00 2001 From: Harshvir Potpose Date: Fri, 15 Aug 2025 16:24:49 +0530 Subject: [PATCH 14/16] Address review comments from PR #11965 (#12140) * fix samples test Signed-off-by: Harshvir Potpose * rm workflow-disable-logs-patch.yaml Signed-off-by: Harshvir Potpose * add suggested comments Signed-off-by: Harshvir Potpose * test minio in multi-user mode Signed-off-by: Harshvir Potpose * update trigger paths Signed-off-by: Harshvir Potpose * test minio Signed-off-by: Harshvir Potpose * separate seaweedfs resources Signed-off-by: Harshvir Potpose * fix Signed-off-by: Harshvir Potpose * fix execution test Signed-off-by: Harshvir Potpose * fix Signed-off-by: Harshvir Potpose * use default endpoint Signed-off-by: Harshvir Potpose * fix e2e test Signed-off-by: Harshvir Potpose * use proper authentication name Signed-off-by: Harshvir Potpose * move allow same namesapce network policy to standard pipelines manifests Signed-off-by: Harshvir Potpose --------- Signed-off-by: Harshvir Potpose --- .../multi-user-minio/apiserver-env.yaml | 16 + .../multi-user-minio/kustomization.yaml | 19 + .../no-proxy-minio/kustomization.yaml | 1 - .../workflow-disable-logs-patch.yaml | 20 - .../argo/overlays/no-proxy/kustomization.yaml | 1 - .../no-proxy/workflow-disable-logs-patch.yaml | 20 - .github/resources/scripts/deploy-kfp.sh | 11 +- .github/workflows/e2e-test.yml | 3 + .../kfp-kubernetes-execution-tests.yml | 1 + .github/workflows/kfp-samples.yml | 183 ++++---- backend/src/v2/cacheutils/cache.go | 29 +- backend/src/v2/metadata/env.go | 26 -- backend/src/v2/objectstore/config.go | 22 - .../allow-same-namespace-networkpolicy.yaml | 12 + .../base/pipeline/kustomization.yaml | 1 + .../kustomization.yaml | 2 + .../kustomization.yaml | 63 +++ .../sync.py | 395 ++++++++++++++++++ .../seaweedfs/seaweedfs-networkpolicy.yaml | 13 - samples/v2/sample_test.py | 4 +- 20 files changed, 606 insertions(+), 236 deletions(-) create mode 100644 .github/resources/manifests/argo/overlays/multi-user-minio/apiserver-env.yaml create mode 100644 .github/resources/manifests/argo/overlays/multi-user-minio/kustomization.yaml delete mode 100644 .github/resources/manifests/argo/overlays/no-proxy-minio/workflow-disable-logs-patch.yaml delete mode 100644 .github/resources/manifests/argo/overlays/no-proxy/workflow-disable-logs-patch.yaml create mode 100644 manifests/kustomize/base/pipeline/allow-same-namespace-networkpolicy.yaml create mode 100644 manifests/kustomize/env/platform-agnostic-multi-user-minio/kustomization.yaml create mode 100644 manifests/kustomize/env/platform-agnostic-multi-user-minio/sync.py diff --git a/.github/resources/manifests/argo/overlays/multi-user-minio/apiserver-env.yaml b/.github/resources/manifests/argo/overlays/multi-user-minio/apiserver-env.yaml new file mode 100644 index 00000000000..6cb74d096f5 --- /dev/null +++ b/.github/resources/manifests/argo/overlays/multi-user-minio/apiserver-env.yaml @@ -0,0 +1,16 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: ml-pipeline +spec: + template: + spec: + containers: + - name: ml-pipeline-api-server + env: + - name: V2_DRIVER_IMAGE + value: kind-registry:5000/driver + - name: V2_LAUNCHER_IMAGE + value: kind-registry:5000/launcher + - name: LOG_LEVEL + value: "debug" diff --git a/.github/resources/manifests/argo/overlays/multi-user-minio/kustomization.yaml b/.github/resources/manifests/argo/overlays/multi-user-minio/kustomization.yaml new file mode 100644 index 00000000000..93b69d63538 --- /dev/null +++ b/.github/resources/manifests/argo/overlays/multi-user-minio/kustomization.yaml @@ -0,0 +1,19 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: +- ../../../../../../manifests/kustomize/env/platform-agnostic-multi-user-minio + +images: +- name: ghcr.io/kubeflow/kfp-api-server + newName: kind-registry:5000/apiserver + newTag: latest +- name: ghcr.io/kubeflow/kfp-persistence-agent + newName: kind-registry:5000/persistenceagent + newTag: latest +- name: ghcr.io/kubeflow/kfp-scheduled-workflow-controller + newName: kind-registry:5000/scheduledworkflow + newTag: latest + +patchesStrategicMerge: +- apiserver-env.yaml diff --git a/.github/resources/manifests/argo/overlays/no-proxy-minio/kustomization.yaml b/.github/resources/manifests/argo/overlays/no-proxy-minio/kustomization.yaml index 8ffdcb12129..dc1be5a9791 100644 --- a/.github/resources/manifests/argo/overlays/no-proxy-minio/kustomization.yaml +++ b/.github/resources/manifests/argo/overlays/no-proxy-minio/kustomization.yaml @@ -17,4 +17,3 @@ images: patches: - path: apiserver-env.yaml -- path: workflow-disable-logs-patch.yaml diff --git a/.github/resources/manifests/argo/overlays/no-proxy-minio/workflow-disable-logs-patch.yaml b/.github/resources/manifests/argo/overlays/no-proxy-minio/workflow-disable-logs-patch.yaml deleted file mode 100644 index 109296e3d10..00000000000 --- a/.github/resources/manifests/argo/overlays/no-proxy-minio/workflow-disable-logs-patch.yaml +++ /dev/null @@ -1,20 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: workflow-controller-configmap -data: - artifactRepository: | - archiveLogs: false - s3: - endpoint: "minio-service.$(kfp-namespace):9000" - bucket: "$(kfp-artifact-bucket-name)" - keyFormat: "private-artifacts/{{workflow.namespace}}/{{workflow.name}}/{{workflow.creationTimestamp.Y}}/{{workflow.creationTimestamp.m}}/{{workflow.creationTimestamp.d}}/{{pod.name}}" - insecure: true - accessKeySecret: - name: mlpipeline-minio-artifact - key: accesskey - secretKeySecret: - name: mlpipeline-minio-artifact - key: secretkey - executor: | - imagePullPolicy: IfNotPresent diff --git a/.github/resources/manifests/argo/overlays/no-proxy/kustomization.yaml b/.github/resources/manifests/argo/overlays/no-proxy/kustomization.yaml index 10189e25bb2..63d04cbb353 100644 --- a/.github/resources/manifests/argo/overlays/no-proxy/kustomization.yaml +++ b/.github/resources/manifests/argo/overlays/no-proxy/kustomization.yaml @@ -17,4 +17,3 @@ images: patches: - path: apiserver-env.yaml -- path: workflow-disable-logs-patch.yaml diff --git a/.github/resources/manifests/argo/overlays/no-proxy/workflow-disable-logs-patch.yaml b/.github/resources/manifests/argo/overlays/no-proxy/workflow-disable-logs-patch.yaml deleted file mode 100644 index 109296e3d10..00000000000 --- a/.github/resources/manifests/argo/overlays/no-proxy/workflow-disable-logs-patch.yaml +++ /dev/null @@ -1,20 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: workflow-controller-configmap -data: - artifactRepository: | - archiveLogs: false - s3: - endpoint: "minio-service.$(kfp-namespace):9000" - bucket: "$(kfp-artifact-bucket-name)" - keyFormat: "private-artifacts/{{workflow.namespace}}/{{workflow.name}}/{{workflow.creationTimestamp.Y}}/{{workflow.creationTimestamp.m}}/{{workflow.creationTimestamp.d}}/{{pod.name}}" - insecure: true - accessKeySecret: - name: mlpipeline-minio-artifact - key: accesskey - secretKeySecret: - name: mlpipeline-minio-artifact - key: secretkey - executor: | - imagePullPolicy: IfNotPresent diff --git a/.github/resources/scripts/deploy-kfp.sh b/.github/resources/scripts/deploy-kfp.sh index 69980ef0026..52ed3251790 100755 --- a/.github/resources/scripts/deploy-kfp.sh +++ b/.github/resources/scripts/deploy-kfp.sh @@ -110,15 +110,14 @@ if [ "${MULTI_USER}" == "true" ]; then kubectl apply -k https://github.com/kubeflow/manifests/applications/profiles/upstream/overlays/kubeflow?ref=master kubectl -n kubeflow wait --for=condition=Ready pods -l kustomize.component=profiles --timeout 180s - echo "Applying kubeflow-edit ClusterRole with proper aggregation..." - kubectl apply -f test/seaweedfs/kubeflow-edit-clusterrole.yaml - echo "Creating KF Profile..." kubectl apply -f test/seaweedfs/test-profiles.yaml - + + echo "Applying kubeflow-edit ClusterRole with proper aggregation..." + kubectl apply -f test/seaweedfs/kubeflow-edit-clusterrole.yaml + echo "Applying network policy to allow user namespace access to kubeflow services..." kubectl apply -f test/seaweedfs/allow-user-namespace-access.yaml - fi # Manifests will be deployed according to the flag provided @@ -130,6 +129,8 @@ elif [ "${PIPELINES_STORE}" == "kubernetes" ]; then TEST_MANIFESTS="${TEST_MANIFESTS}/overlays/kubernetes-native" elif [ "${MULTI_USER}" == "true" ] && [ "${STORAGE_BACKEND}" == "seaweedfs" ]; then TEST_MANIFESTS="${TEST_MANIFESTS}/overlays/multi-user" +elif [ "${MULTI_USER}" == "true" ] && [ "${STORAGE_BACKEND}" == "minio" ]; then + TEST_MANIFESTS="${TEST_MANIFESTS}/overlays/multi-user-minio" elif [ "${STORAGE_BACKEND}" == "minio" ]; then TEST_MANIFESTS="${TEST_MANIFESTS}/overlays/no-proxy-minio" else diff --git a/.github/workflows/e2e-test.yml b/.github/workflows/e2e-test.yml index 40d9fa0d649..eee7489de35 100644 --- a/.github/workflows/e2e-test.yml +++ b/.github/workflows/e2e-test.yml @@ -142,6 +142,9 @@ jobs: - name: Checkout code uses: actions/checkout@v5 + - name: Free up disk space + run: ./.github/resources/scripts/free-disk-space.sh + - name: Set up Python uses: actions/setup-python@v4 with: diff --git a/.github/workflows/kfp-kubernetes-execution-tests.yml b/.github/workflows/kfp-kubernetes-execution-tests.yml index 8753499dd31..2b6ea2ab0c4 100644 --- a/.github/workflows/kfp-kubernetes-execution-tests.yml +++ b/.github/workflows/kfp-kubernetes-execution-tests.yml @@ -44,6 +44,7 @@ jobs: df -h sudo rm -rf /usr/share/dotnet sudo rm -rf "$AGENT_TOOLSDIRECTORY" + ./.github/resources/scripts/free-disk-space.sh echo "Disk usage after clean up:" df -h diff --git a/.github/workflows/kfp-samples.yml b/.github/workflows/kfp-samples.yml index fc8fd7bcef7..e80c08db1e6 100644 --- a/.github/workflows/kfp-samples.yml +++ b/.github/workflows/kfp-samples.yml @@ -8,6 +8,8 @@ on: - '.github/resources/**' - '.github/workflows/kfp-samples.yml' - '.github/workflows/kubeflow-pipelines-integration-v2.yml' + - 'manifests/kustomize/third-party/seaweedfs/**' + - 'manifests/kustomize/third-party/minio/**' - 'api/**' - 'backend/**' - 'samples/**' @@ -36,6 +38,9 @@ jobs: - k8s_version: "v1.31.0" mode: "multi-user" storage: "seaweedfs" + - k8s_version: "v1.31.0" + mode: "multi-user" + storage: "minio" - k8s_version: "v1.31.0" mode: "standalone" storage: "minio" @@ -45,102 +50,82 @@ jobs: name: KFP Samples - K8s ${{ matrix.k8s_version }} - ${{ matrix.mode }} - ${{ matrix.storage }} steps: - - name: Checkout code - uses: actions/checkout@v5 - - - name: Free up disk space - run: | - sudo rm -rf "$AGENT_TOOLSDIRECTORY" - docker system prune -af --volumes - docker image prune -af - ./.github/resources/scripts/free-disk-space.sh - - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: 3.9 - - - name: Create KFP cluster - id: create-kfp-cluster - uses: ./.github/actions/kfp-cluster - with: - k8s_version: ${{ matrix.k8s_version }} - image_path: ${{ needs.build.outputs.IMAGE_PATH }} - image_tag: ${{ needs.build.outputs.IMAGE_TAG }} - image_registry: ${{ needs.build.outputs.IMAGE_REGISTRY }} - continue-on-error: true - - - name: Create KFP cluster - id: create-kfp-cluster - uses: ./.github/actions/kfp-cluster - with: - k8s_version: ${{ matrix.k8s_version }} - multi_user: ${{ matrix.mode == 'multi-user' }} - storage_backend: ${{ matrix.storage }} - continue-on-error: true - - - name: Build and upload the sample Modelcar image to Kind - id: build-sample-modelcar-image - if: ${{ steps.create-kfp-cluster.outcome == 'success' }} - run: | - docker build -f samples/v2/modelcar/Dockerfile -t registry.domain.local/modelcar:test . - kind --name kfp load docker-image registry.domain.local/modelcar:test - continue-on-error: true - - - name: Forward API port - id: forward-api-port - if: ${{ steps.build-sample-modelcar-image.outcome == 'success' }} - run: ./.github/resources/scripts/forward-port.sh "kubeflow" "ml-pipeline" 8888 8888 - continue-on-error: true - - - name: Install protobuf dependencies & kfp-pipeline-spec - id: install-protobuf-deps - uses: ./.github/actions/protobuf - - - name: Install kfp & kfp-kubernetes from source - id: install-kfp-k8s-deps - uses: ./.github/actions/kfp-k8s - - - name: Run Samples Tests - id: tests - if: ${{ steps.forward-api-port.outcome == 'success' }} - env: - KFP_MULTI_USER: ${{ matrix.mode == 'multi-user' }} - run: | - python3 -u ./samples/v2/sample_test.py - continue-on-error: true - - - name: Collect failed logs - if: ${{ steps.create-kfp-cluster.outcome != 'success' || steps.forward-api-port.outcome != 'success' || steps.tests.outcome != 'success' }} - run: | - ./.github/resources/scripts/collect-logs.sh --ns kubeflow --output /tmp/tmp_pod_log.txt - exit 1 - - - name: Collect test results - if: always() - uses: actions/upload-artifact@v4 - with: - name: kfp-samples-tests-artifacts-k8s-${{ matrix.k8s_version }} - path: /tmp/tmp*/* - - - name: Test SeaweedFS Namespace Isolation - id: test-namespace-isolation - if: ${{ matrix.mode == 'multi-user' && matrix.storage == 'seaweedfs' && steps.tests.outcome == 'success' }} - run: ./test/seaweedfs/namespace_isolation_test.sh - continue-on-error: true - - - name: Collect failed logs - if: ${{ steps.create-kfp-cluster.outcome != 'success' || steps.forward-api-port.outcome != 'success' || steps.tests.outcome != 'success' }} - run: | - ./.github/resources/scripts/collect-logs.sh --ns kubeflow --output /tmp/tmp_pod_log.txt - if [ "${{ matrix.mode }}" == "multi-user" ]; then - ./.github/resources/scripts/collect-logs.sh --ns kubeflow-user-example-com --output /tmp/tmp_user_pod_log.txt - fi - exit 1 - - - name: Collect test results - if: always() - uses: actions/upload-artifact@v4 - with: - name: kfp-samples-tests-artifacts-k8s-${{ matrix.k8s_version }}-${{ matrix.mode }}-${{ matrix.storage }} - path: /tmp/tmp*/* + - name: Checkout code + uses: actions/checkout@v5 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: 3.9 + + - name: Free up disk space + run: | + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + docker system prune -af --volumes + docker image prune -af + ./.github/resources/scripts/free-disk-space.sh + + - name: Create KFP cluster + id: create-kfp-cluster + uses: ./.github/actions/kfp-cluster + with: + k8s_version: ${{ matrix.k8s_version }} + multi_user: ${{ matrix.mode == 'multi-user' }} + storage_backend: ${{ matrix.storage }} + image_path: ${{ needs.build.outputs.IMAGE_PATH }} + image_tag: ${{ needs.build.outputs.IMAGE_TAG }} + image_registry: ${{ needs.build.outputs.IMAGE_REGISTRY }} + continue-on-error: true + + - name: Build and upload the sample Modelcar image to Kind + id: build-sample-modelcar-image + if: ${{ steps.create-kfp-cluster.outcome == 'success' }} + run: | + docker build -f samples/v2/modelcar/Dockerfile -t registry.domain.local/modelcar:test . + kind --name kfp load docker-image registry.domain.local/modelcar:test + continue-on-error: true + + - name: Forward API port + id: forward-api-port + if: ${{ steps.build-sample-modelcar-image.outcome == 'success' }} + run: ./.github/resources/scripts/forward-port.sh "kubeflow" "ml-pipeline" 8888 8888 + continue-on-error: true + + - name: Install protobuf dependencies & kfp-pipeline-spec + id: install-protobuf-deps + uses: ./.github/actions/protobuf + + - name: Install kfp & kfp-kubernetes from source + id: install-kfp-k8s-deps + uses: ./.github/actions/kfp-k8s + + - name: Run Samples Tests + id: tests + if: ${{ steps.forward-api-port.outcome == 'success' }} + env: + KFP_MULTI_USER: ${{ matrix.mode == 'multi-user' }} + run: | + python3 -u ./samples/v2/sample_test.py + continue-on-error: true + + - name: Test SeaweedFS Namespace Isolation + id: test-namespace-isolation + if: ${{ matrix.mode == 'multi-user' && matrix.storage == 'seaweedfs' && steps.tests.outcome == 'success' }} + run: ./test/seaweedfs/namespace_isolation_test.sh + continue-on-error: true + + - name: Collect failed logs + if: ${{ steps.create-kfp-cluster.outcome != 'success' || steps.forward-api-port.outcome != 'success' || steps.tests.outcome != 'success' }} + run: | + ./.github/resources/scripts/collect-logs.sh --ns kubeflow --output /tmp/tmp_pod_log.txt + if [ "${{ matrix.mode }}" == "multi-user" ]; then + ./.github/resources/scripts/collect-logs.sh --ns kubeflow-user-example-com --output /tmp/tmp_user_pod_log.txt + fi + exit 1 + + - name: Collect test results + if: always() + uses: actions/upload-artifact@v4 + with: + name: kfp-samples-tests-artifacts-k8s-${{ matrix.k8s_version }}-${{ matrix.mode }}-${{ matrix.storage }} + path: /tmp/tmp*/* diff --git a/backend/src/v2/cacheutils/cache.go b/backend/src/v2/cacheutils/cache.go index f5e5a86205b..2c128fc284a 100644 --- a/backend/src/v2/cacheutils/cache.go +++ b/backend/src/v2/cacheutils/cache.go @@ -6,7 +6,6 @@ import ( "encoding/hex" "encoding/json" "fmt" - "os" "google.golang.org/grpc" "google.golang.org/grpc/credentials/insecure" @@ -17,7 +16,6 @@ import ( "github.com/kubeflow/pipelines/api/v2alpha1/go/cachekey" "github.com/kubeflow/pipelines/api/v2alpha1/go/pipelinespec" api "github.com/kubeflow/pipelines/backend/api/v1beta1/go_client" - "github.com/kubeflow/pipelines/backend/src/apiserver/config/proxy" ) const ( @@ -74,9 +72,8 @@ func NewClient(cacheDisabled bool) (Client, error) { return &disabledCacheClient{}, nil } - cacheEndPoint := cacheDefaultEndpoint() - glog.Infof("Connecting to cache endpoint %s", cacheEndPoint) - conn, err := grpc.Dial(cacheEndPoint, + glog.Infof("Connecting to cache endpoint %s", defaultKfpApiEndpoint) + conn, err := grpc.Dial(defaultKfpApiEndpoint, grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(MaxClientGRPCMessageSize)), grpc.WithTransportCredentials(insecure.NewCredentials())) if err != nil { @@ -88,28 +85,6 @@ func NewClient(cacheDisabled bool) (Client, error) { }, nil } -func cacheDefaultEndpoint() string { - // If proxy is enabled, use DNS name `ml-pipeline.kubeflow:8887` as default. - _, isHttpProxySet := os.LookupEnv(proxy.HttpProxyEnv) - _, isHttpsProxySet := os.LookupEnv(proxy.HttpsProxyEnv) - if isHttpProxySet || isHttpsProxySet { - return defaultKfpApiEndpoint - } - // Discover ml-pipeline in the same namespace by env var. - // https://kubernetes.io/docs/concepts/services-networking/service/#environment-variables - cacheHost := os.Getenv("ML_PIPELINE_SERVICE_HOST") - cachePort := os.Getenv("ML_PIPELINE_SERVICE_PORT_GRPC") - if cacheHost != "" && cachePort != "" { - // If there is a ml-pipeline Kubernetes service in the same namespace, - // ML_PIPELINE_SERVICE_HOST and ML_PIPELINE_SERVICE_PORT env vars should - // exist by default, so we use it as default. - return cacheHost + ":" + cachePort - } - // If the env vars do not exist, use default ml-pipeline grpc endpoint `ml-pipeline.kubeflow:8887`. - glog.Infof("Cannot detect ml-pipeline in the same namespace, default to %s as KFP endpoint.", defaultKfpApiEndpoint) - return defaultKfpApiEndpoint -} - func (c *client) GetExecutionCache(fingerPrint, pipelineName, namespace string) (string, error) { fingerPrintPredicate := &api.Predicate{ Op: api.Predicate_EQUALS, diff --git a/backend/src/v2/metadata/env.go b/backend/src/v2/metadata/env.go index 5c26f74f85e..86481ef4ffe 100644 --- a/backend/src/v2/metadata/env.go +++ b/backend/src/v2/metadata/env.go @@ -1,11 +1,5 @@ package metadata -import ( - "os" - - "github.com/kubeflow/pipelines/backend/src/apiserver/config/proxy" -) - const ( metadataGrpcServiceAddress = "metadata-grpc-service.kubeflow" metadataGrpcServicePort = "8080" @@ -17,26 +11,6 @@ type ServerConfig struct { } func DefaultConfig() *ServerConfig { - // If proxy is enabled, use DNS name `metadata-grpc-service.kubeflow:8080` as default. - _, isHttpProxySet := os.LookupEnv(proxy.HttpProxyEnv) - _, isHttpsProxySet := os.LookupEnv(proxy.HttpsProxyEnv) - if isHttpProxySet || isHttpsProxySet { - return &ServerConfig{ - Address: metadataGrpcServiceAddress, - Port: metadataGrpcServicePort, - } - } - // The env vars exist when metadata-grpc-service Kubernetes service is - // in the same namespace as the current Pod. - // https://kubernetes.io/docs/concepts/services-networking/service/#environment-variables - hostEnv := os.Getenv("METADATA_GRPC_SERVICE_SERVICE_HOST") - portEnv := os.Getenv("METADATA_GRPC_SERVICE_SERVICE_PORT") - if hostEnv != "" && portEnv != "" { - return &ServerConfig{ - Address: hostEnv, - Port: portEnv, - } - } return &ServerConfig{ Address: metadataGrpcServiceAddress, Port: metadataGrpcServicePort, diff --git a/backend/src/v2/objectstore/config.go b/backend/src/v2/objectstore/config.go index d3e0d433764..bff464fd690 100644 --- a/backend/src/v2/objectstore/config.go +++ b/backend/src/v2/objectstore/config.go @@ -18,14 +18,10 @@ package objectstore import ( "encoding/json" "fmt" - "os" "path" "regexp" "strconv" "strings" - - "github.com/golang/glog" - "github.com/kubeflow/pipelines/backend/src/apiserver/config/proxy" ) // The endpoint uses Kubernetes service DNS name with namespace: @@ -165,24 +161,6 @@ func ParseProviderFromPath(uri string) (string, error) { } func MinioDefaultEndpoint() string { - // If proxy is enabled, use DNS name `minio-service.kubeflow:9000` as default. - _, isHttpProxySet := os.LookupEnv(proxy.HttpProxyEnv) - _, isHttpsProxySet := os.LookupEnv(proxy.HttpsProxyEnv) - if isHttpProxySet || isHttpsProxySet { - return defaultMinioEndpointInMultiUserMode - } - // Discover minio-service in the same namespace by env var. - // https://kubernetes.io/docs/concepts/services-networking/service/#environment-variables - minioHost := os.Getenv("MINIO_SERVICE_SERVICE_HOST") - minioPort := os.Getenv("MINIO_SERVICE_SERVICE_PORT") - if minioHost != "" && minioPort != "" { - // If there is a minio-service Kubernetes service in the same namespace, - // MINIO_SERVICE_SERVICE_HOST and MINIO_SERVICE_SERVICE_PORT env vars should - // exist by default, so we use it as default. - return minioHost + ":" + minioPort - } - // If the env vars do not exist, we guess that we are running in KFP multi user mode, so default minio service should be `minio-service.kubeflow:9000`. - glog.Infof("Cannot detect minio-service in the same namespace, default to %s as MinIO endpoint.", defaultMinioEndpointInMultiUserMode) return defaultMinioEndpointInMultiUserMode } diff --git a/manifests/kustomize/base/pipeline/allow-same-namespace-networkpolicy.yaml b/manifests/kustomize/base/pipeline/allow-same-namespace-networkpolicy.yaml new file mode 100644 index 00000000000..91ba34c619c --- /dev/null +++ b/manifests/kustomize/base/pipeline/allow-same-namespace-networkpolicy.yaml @@ -0,0 +1,12 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: default-allow-same-namespace + namespace: kubeflow +spec: + podSelector: {} + ingress: + - from: + - podSelector: {} + policyTypes: + - Ingress diff --git a/manifests/kustomize/base/pipeline/kustomization.yaml b/manifests/kustomize/base/pipeline/kustomization.yaml index f1803a9c251..d088a2ba028 100644 --- a/manifests/kustomize/base/pipeline/kustomization.yaml +++ b/manifests/kustomize/base/pipeline/kustomization.yaml @@ -34,6 +34,7 @@ resources: - container-builder-sa.yaml - viewer-sa.yaml - kfp-launcher-configmap.yaml + - allow-same-namespace-networkpolicy.yaml images: - name: ghcr.io/kubeflow/kfp-api-server newTag: 2.14.0 diff --git a/manifests/kustomize/env/platform-agnostic-minio/kustomization.yaml b/manifests/kustomize/env/platform-agnostic-minio/kustomization.yaml index 0a9d90b016e..d8ba71c4a4e 100644 --- a/manifests/kustomize/env/platform-agnostic-minio/kustomization.yaml +++ b/manifests/kustomize/env/platform-agnostic-minio/kustomization.yaml @@ -1,3 +1,5 @@ +# Minio deployment manifests in KFP will be removed in 2.16.0, in the future end-users will need to provision their own minio deployments +# The default will be SeaweedFS (Gateway docs: https://github.com/kubeflow/pipelines/blob/master/manifests/kustomize/third-party/seaweedfs/README.md) apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization diff --git a/manifests/kustomize/env/platform-agnostic-multi-user-minio/kustomization.yaml b/manifests/kustomize/env/platform-agnostic-multi-user-minio/kustomization.yaml new file mode 100644 index 00000000000..f7990f8466f --- /dev/null +++ b/manifests/kustomize/env/platform-agnostic-multi-user-minio/kustomization.yaml @@ -0,0 +1,63 @@ +# Minio deployment manifests in KFP will be removed in 2.16.0, in the future end-users will need to provision their own minio deployments +# The default will be SeaweedFS (Gateway docs: https://github.com/kubeflow/pipelines/blob/master/manifests/kustomize/third-party/seaweedfs/README.md) +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: +- ../../third-party/metacontroller/base +- ../../base/installs/multi-user +- ../../base/metadata/base +- ../../base/metadata/options/istio +- ../../third-party/argo/installs/cluster +- ../../third-party/mysql/base +- ../../third-party/mysql/options/istio +- ../../third-party/minio/base +- ../../third-party/minio/options/istio + + +# !!! If you want to customize the namespace, +# please also update base/cache-deployer/cluster-scoped/cache-deployer-clusterrolebinding.yaml +namespace: kubeflow + +# Identifier for application manager to apply ownerReference. +# The ownerReference ensures the resources get garbage collected +# when application is deleted. +labels: +- includeSelectors: true + pairs: + application-crd-id: kubeflow-pipelines + +configMapGenerator: +- name: kubeflow-pipelines-profile-controller-code + behavior: replace + files: + # swap sync.py with minio compatible sync.py + - sync.py=sync.py + +patches: +- target: + group: apps + version: v1 + kind: Deployment + name: kubeflow-pipelines-profile-controller + patch: | + apiVersion: apps/v1 + kind: Deployment + metadata: + name: kubeflow-pipelines-profile-controller + spec: + template: + spec: + containers: + - name: profile-controller + env: + - name: MINIO_ACCESS_KEY + valueFrom: + secretKeyRef: + name: mlpipeline-minio-artifact + key: accesskey + - name: MINIO_SECRET_KEY + valueFrom: + secretKeyRef: + name: mlpipeline-minio-artifact + key: secretkey diff --git a/manifests/kustomize/env/platform-agnostic-multi-user-minio/sync.py b/manifests/kustomize/env/platform-agnostic-multi-user-minio/sync.py new file mode 100644 index 00000000000..3d39e3187f9 --- /dev/null +++ b/manifests/kustomize/env/platform-agnostic-multi-user-minio/sync.py @@ -0,0 +1,395 @@ +# Copyright 2020-2021 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from http.server import BaseHTTPRequestHandler, HTTPServer +import json +import os +import base64 + + +def main(): + settings = get_settings_from_env() + server = server_factory(**settings) + server.serve_forever() + + +def get_settings_from_env(controller_port=None, + visualization_server_image=None, frontend_image=None, + visualization_server_tag=None, frontend_tag=None, disable_istio_sidecar=None, + minio_access_key=None, minio_secret_key=None, kfp_default_pipeline_root=None): + """ + Returns a dict of settings from environment variables relevant to the controller + + Environment settings can be overridden by passing them here as arguments. + + Settings are pulled from the all-caps version of the setting name. The + following defaults are used if those environment variables are not set + to enable backwards compatibility with previous versions of this script: + visualization_server_image: ghcr.io/kubeflow/kfp-visualization-server + visualization_server_tag: value of KFP_VERSION environment variable + frontend_image: ghcr.io/kubeflow/kfp-frontend + frontend_tag: value of KFP_VERSION environment variable + disable_istio_sidecar: Required (no default) + minio_access_key: Required (no default) + minio_secret_key: Required (no default) + """ + settings = dict() + settings["controller_port"] = \ + controller_port or \ + os.environ.get("CONTROLLER_PORT", "8080") + + settings["visualization_server_image"] = \ + visualization_server_image or \ + os.environ.get("VISUALIZATION_SERVER_IMAGE", "ghcr.io/kubeflow/kfp-visualization-server") + + settings["frontend_image"] = \ + frontend_image or \ + os.environ.get("FRONTEND_IMAGE", "ghcr.io/kubeflow/kfp-frontend") + + # Look for specific tags for each image first, falling back to + # previously used KFP_VERSION environment variable for backwards + # compatibility + settings["visualization_server_tag"] = \ + visualization_server_tag or \ + os.environ.get("VISUALIZATION_SERVER_TAG") or \ + os.environ["KFP_VERSION"] + + settings["frontend_tag"] = \ + frontend_tag or \ + os.environ.get("FRONTEND_TAG") or \ + os.environ["KFP_VERSION"] + + settings["disable_istio_sidecar"] = \ + disable_istio_sidecar if disable_istio_sidecar is not None \ + else os.environ.get("DISABLE_ISTIO_SIDECAR") == "true" + + settings["minio_access_key"] = \ + minio_access_key or \ + base64.b64encode(bytes(os.environ.get("MINIO_ACCESS_KEY"), 'utf-8')).decode('utf-8') + + settings["minio_secret_key"] = \ + minio_secret_key or \ + base64.b64encode(bytes(os.environ.get("MINIO_SECRET_KEY"), 'utf-8')).decode('utf-8') + + # KFP_DEFAULT_PIPELINE_ROOT is optional + settings["kfp_default_pipeline_root"] = \ + kfp_default_pipeline_root or \ + os.environ.get("KFP_DEFAULT_PIPELINE_ROOT") + + return settings + + +def server_factory(visualization_server_image, + visualization_server_tag, frontend_image, frontend_tag, + disable_istio_sidecar, minio_access_key, + minio_secret_key, kfp_default_pipeline_root=None, + url="", controller_port=8080): + """ + Returns an HTTPServer populated with Handler with customized settings + """ + class Controller(BaseHTTPRequestHandler): + def sync(self, parent, attachments): + # parent is a namespace + namespace = parent.get("metadata", {}).get("name") + pipeline_enabled = parent.get("metadata", {}).get( + "labels", {}).get("pipelines.kubeflow.org/enabled") + + if pipeline_enabled != "true": + return {"status": {}, "attachments": []} + + desired_configmap_count = 1 + desired_resources = [] + if kfp_default_pipeline_root: + desired_configmap_count = 2 + desired_resources += [{ + "apiVersion": "v1", + "kind": "ConfigMap", + "metadata": { + "name": "kfp-launcher", + "namespace": namespace, + }, + "data": { + "defaultPipelineRoot": kfp_default_pipeline_root, + }, + }] + + + # Compute status based on observed state. + desired_status = { + "kubeflow-pipelines-ready": + len(attachments["Secret.v1"]) == 1 and + len(attachments["ConfigMap.v1"]) == desired_configmap_count and + len(attachments["Deployment.apps/v1"]) == 2 and + len(attachments["Service.v1"]) == 2 and + len(attachments["DestinationRule.networking.istio.io/v1alpha3"]) == 1 and + len(attachments["AuthorizationPolicy.security.istio.io/v1beta1"]) == 1 and + "True" or "False" + } + + # Generate the desired attachment object(s). + desired_resources += [ + { + "apiVersion": "v1", + "kind": "ConfigMap", + "metadata": { + "name": "metadata-grpc-configmap", + "namespace": namespace, + }, + "data": { + "METADATA_GRPC_SERVICE_HOST": + "metadata-grpc-service.kubeflow", + "METADATA_GRPC_SERVICE_PORT": "8080", + }, + }, + # Visualization server related manifests below + { + "apiVersion": "apps/v1", + "kind": "Deployment", + "metadata": { + "labels": { + "app": "ml-pipeline-visualizationserver" + }, + "name": "ml-pipeline-visualizationserver", + "namespace": namespace, + }, + "spec": { + "selector": { + "matchLabels": { + "app": "ml-pipeline-visualizationserver" + }, + }, + "template": { + "metadata": { + "labels": { + "app": "ml-pipeline-visualizationserver" + }, + "annotations": disable_istio_sidecar and { + "sidecar.istio.io/inject": "false" + } or {}, + }, + "spec": { + "containers": [{ + "image": f"{visualization_server_image}:{visualization_server_tag}", + "imagePullPolicy": + "IfNotPresent", + "name": + "ml-pipeline-visualizationserver", + "ports": [{ + "containerPort": 8888 + }], + "resources": { + "requests": { + "cpu": "50m", + "memory": "200Mi" + }, + "limits": { + "cpu": "500m", + "memory": "1Gi" + }, + } + }], + "serviceAccountName": + "default-editor", + }, + }, + }, + }, + { + "apiVersion": "networking.istio.io/v1alpha3", + "kind": "DestinationRule", + "metadata": { + "name": "ml-pipeline-visualizationserver", + "namespace": namespace, + }, + "spec": { + "host": "ml-pipeline-visualizationserver", + "trafficPolicy": { + "tls": { + "mode": "ISTIO_MUTUAL" + } + } + } + }, + { + "apiVersion": "security.istio.io/v1beta1", + "kind": "AuthorizationPolicy", + "metadata": { + "name": "ml-pipeline-visualizationserver", + "namespace": namespace, + }, + "spec": { + "selector": { + "matchLabels": { + "app": "ml-pipeline-visualizationserver" + } + }, + "rules": [{ + "from": [{ + "source": { + "principals": ["cluster.local/ns/kubeflow/sa/ml-pipeline"] + } + }] + }] + } + }, + { + "apiVersion": "v1", + "kind": "Service", + "metadata": { + "name": "ml-pipeline-visualizationserver", + "namespace": namespace, + }, + "spec": { + "ports": [{ + "name": "http", + "port": 8888, + "protocol": "TCP", + "targetPort": 8888, + }], + "selector": { + "app": "ml-pipeline-visualizationserver", + }, + }, + }, + # Artifact fetcher related resources below. + { + "apiVersion": "apps/v1", + "kind": "Deployment", + "metadata": { + "labels": { + "app": "ml-pipeline-ui-artifact" + }, + "name": "ml-pipeline-ui-artifact", + "namespace": namespace, + }, + "spec": { + "selector": { + "matchLabels": { + "app": "ml-pipeline-ui-artifact" + } + }, + "template": { + "metadata": { + "labels": { + "app": "ml-pipeline-ui-artifact" + }, + "annotations": disable_istio_sidecar and { + "sidecar.istio.io/inject": "false" + } or {}, + }, + "spec": { + "containers": [{ + "name": + "ml-pipeline-ui-artifact", + "image": f"{frontend_image}:{frontend_tag}", + "imagePullPolicy": + "IfNotPresent", + "ports": [{ + "containerPort": 3000 + }], + "env": [ + { + "name": "MINIO_ACCESS_KEY", + "valueFrom": { + "secretKeyRef": { + "key": "accesskey", + "name": "mlpipeline-minio-artifact" + } + } + }, + { + "name": "MINIO_SECRET_KEY", + "valueFrom": { + "secretKeyRef": { + "key": "secretkey", + "name": "mlpipeline-minio-artifact" + } + } + } + ], + "resources": { + "requests": { + "cpu": "10m", + "memory": "70Mi" + }, + "limits": { + "cpu": "100m", + "memory": "500Mi" + }, + } + }], + "serviceAccountName": + "default-editor" + } + } + } + }, + { + "apiVersion": "v1", + "kind": "Service", + "metadata": { + "name": "ml-pipeline-ui-artifact", + "namespace": namespace, + "labels": { + "app": "ml-pipeline-ui-artifact" + } + }, + "spec": { + "ports": [{ + "name": + "http", # name is required to let istio understand request protocol + "port": 80, + "protocol": "TCP", + "targetPort": 3000 + }], + "selector": { + "app": "ml-pipeline-ui-artifact" + } + } + }, + ] + print('Received request:\n', json.dumps(parent, sort_keys=True)) + print('Desired resources except secrets:\n', json.dumps(desired_resources, sort_keys=True)) + # Moved after the print argument because this is sensitive data. + desired_resources.append({ + "apiVersion": "v1", + "kind": "Secret", + "metadata": { + "name": "mlpipeline-minio-artifact", + "namespace": namespace, + }, + "data": { + "accesskey": minio_access_key, + "secretkey": minio_secret_key, + }, + }) + + return {"status": desired_status, "attachments": desired_resources} + + def do_POST(self): + # Serve the sync() function as a JSON webhook. + observed = json.loads( + self.rfile.read(int(self.headers.get("content-length")))) + desired = self.sync(observed["object"], observed["attachments"]) + + self.send_response(200) + self.send_header("Content-type", "application/json") + self.end_headers() + self.wfile.write(bytes(json.dumps(desired), 'utf-8')) + + return HTTPServer((url, int(controller_port)), Controller) + + +if __name__ == "__main__": + main() diff --git a/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-networkpolicy.yaml b/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-networkpolicy.yaml index ab1a6cc6657..0375f83270a 100644 --- a/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-networkpolicy.yaml +++ b/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-networkpolicy.yaml @@ -1,18 +1,5 @@ apiVersion: networking.k8s.io/v1 kind: NetworkPolicy -metadata: - name: default-allow-same-namespace - namespace: kubeflow -spec: - podSelector: {} - ingress: - - from: - - podSelector: {} - policyTypes: - - Ingress ---- -apiVersion: networking.k8s.io/v1 -kind: NetworkPolicy metadata: name: seaweedfs spec: diff --git a/samples/v2/sample_test.py b/samples/v2/sample_test.py index 35df00bbf8b..4ad4ec4559c 100644 --- a/samples/v2/sample_test.py +++ b/samples/v2/sample_test.py @@ -150,7 +150,7 @@ def delete_k8s_yaml(namespace: str, yaml_file: str): print(f'Exception when deleting from YAML: {e}') -def get_auth_token(): +def get_authentication_token(): """Get authentication token for multi-user mode.""" if _KFP_MULTI_USER: try: @@ -179,7 +179,7 @@ class SampleTest(unittest.TestCase): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # Initialize client with token if in multi-user mode - auth_token = get_auth_token() + auth_token = get_authentication_token() if auth_token: self._client = kfp.Client( host=self._kfp_host_and_port, From 4eac7349764978a7aa57be4978bdd91fda7393d0 Mon Sep 17 00:00:00 2001 From: Harshvir Potpose Date: Fri, 15 Aug 2025 21:34:14 +0530 Subject: [PATCH 15/16] Fix failing backend test (#12149) * remove Test_GetMinioDefaultEndpoint Signed-off-by: Harshvir Potpose * use env directly Signed-off-by: Harshvir Potpose --------- Signed-off-by: Harshvir Potpose --- backend/src/v2/config/env.go | 2 +- backend/src/v2/objectstore/config.go | 6 +-- .../src/v2/objectstore/object_store_test.go | 48 ------------------- 3 files changed, 2 insertions(+), 54 deletions(-) diff --git a/backend/src/v2/config/env.go b/backend/src/v2/config/env.go index 74c5cf8b4a6..32643dd7dad 100644 --- a/backend/src/v2/config/env.go +++ b/backend/src/v2/config/env.go @@ -166,7 +166,7 @@ func getDefaultMinioSessionInfo() (objectstore.SessionInfo, error) { Provider: "minio", Params: map[string]string{ "region": "minio", - "endpoint": objectstore.MinioDefaultEndpoint(), + "endpoint": objectstore.DefaultMinioEndpointInMultiUserMode, "disableSSL": strconv.FormatBool(true), "fromEnv": strconv.FormatBool(false), "secretName": minioArtifactSecretName, diff --git a/backend/src/v2/objectstore/config.go b/backend/src/v2/objectstore/config.go index bff464fd690..8130fc8acaf 100644 --- a/backend/src/v2/objectstore/config.go +++ b/backend/src/v2/objectstore/config.go @@ -26,7 +26,7 @@ import ( // The endpoint uses Kubernetes service DNS name with namespace: // https://kubernetes.io/docs/concepts/services-networking/service/#dns -const defaultMinioEndpointInMultiUserMode = "minio-service.kubeflow:9000" +const DefaultMinioEndpointInMultiUserMode = "minio-service.kubeflow:9000" type Config struct { Scheme string @@ -160,10 +160,6 @@ func ParseProviderFromPath(uri string) (string, error) { return strings.TrimSuffix(bucketConfig.Scheme, "://"), nil } -func MinioDefaultEndpoint() string { - return defaultMinioEndpointInMultiUserMode -} - func GetSessionInfoFromString(sessionInfoJSON string) (*SessionInfo, error) { sessionInfo := &SessionInfo{} if sessionInfoJSON == "" { diff --git a/backend/src/v2/objectstore/object_store_test.go b/backend/src/v2/objectstore/object_store_test.go index e8f33cbb9d9..d758c987f35 100644 --- a/backend/src/v2/objectstore/object_store_test.go +++ b/backend/src/v2/objectstore/object_store_test.go @@ -17,7 +17,6 @@ package objectstore import ( "context" "fmt" - "os" "reflect" "testing" @@ -211,53 +210,6 @@ func Test_bucketConfig_KeyFromURI(t *testing.T) { } } -func Test_GetMinioDefaultEndpoint(t *testing.T) { - defer func() { - os.Unsetenv("MINIO_SERVICE_SERVICE_HOST") - os.Unsetenv("MINIO_SERVICE_SERVICE_PORT") - }() - tests := []struct { - name string - minioServiceHostEnv string - minioServicePortEnv string - want string - }{ - { - name: "In full Kubeflow, KFP multi-user mode on", - minioServiceHostEnv: "", - minioServicePortEnv: "", - want: "minio-service.kubeflow:9000", - }, - { - name: "In KFP standalone without multi-user mode", - minioServiceHostEnv: "1.2.3.4", - minioServicePortEnv: "4321", - want: "1.2.3.4:4321", - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - if tt.minioServiceHostEnv != "" { - os.Setenv("MINIO_SERVICE_SERVICE_HOST", tt.minioServiceHostEnv) - } else { - os.Unsetenv("MINIO_SERVICE_SERVICE_HOST") - } - if tt.minioServicePortEnv != "" { - os.Setenv("MINIO_SERVICE_SERVICE_PORT", tt.minioServicePortEnv) - } else { - os.Unsetenv("MINIO_SERVICE_SERVICE_PORT") - } - got := MinioDefaultEndpoint() - if got != tt.want { - t.Errorf( - "MinioDefaultEndpoint() = %q, want %q\nwhen MINIO_SERVICE_SERVICE_HOST=%q MINIO_SERVICE_SERVICE_PORT=%q", - got, tt.want, tt.minioServiceHostEnv, tt.minioServicePortEnv, - ) - } - }) - } -} - func Test_createS3BucketSession(t *testing.T) { tt := []struct { msg string From 6d211e353ca016f0ca045308df8076cb4bffadd5 Mon Sep 17 00:00:00 2001 From: Harshvir Potpose Date: Wed, 20 Aug 2025 03:01:05 +0530 Subject: [PATCH 16/16] chore: Update seaweedfs pvc volume size (#12156) Signed-off-by: Harshvir Potpose --- .../third-party/seaweedfs/base/seaweedfs/seaweedfs-pvc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-pvc.yaml b/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-pvc.yaml index 7d47bee65de..b0302f9cb7f 100644 --- a/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-pvc.yaml +++ b/manifests/kustomize/third-party/seaweedfs/base/seaweedfs/seaweedfs-pvc.yaml @@ -8,4 +8,4 @@ spec: - ReadWriteOnce resources: requests: - storage: 5Gi + storage: 20Gi