Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/dex_oauth2-proxy_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ on:
- experimental/security/PSS/*
- common/dex/base/**
- tests/gh-actions/install_istio*.sh
- tests/gh-actions/test_dex_login.py

jobs:
build:
Expand Down
208 changes: 121 additions & 87 deletions tests/gh-actions/test_dex_login.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#!/usr/bin/env python3

import re
import time
from urllib.parse import urlsplit, urlencode

import requests
import urllib3

Expand Down Expand Up @@ -52,94 +52,128 @@ def get_session_cookies(self) -> str:
Get the session cookies by authenticating against Dex
:return: a string of session cookies in the form "key1=value1; key2=value2"
"""

# use a persistent session (for cookies)
s = requests.Session()

# GET the endpoint_url, which should redirect to Dex
resp = s.get(
self._endpoint_url, allow_redirects=True, verify=not self._skip_tls_verify
)
if resp.status_code == 200:
pass
elif resp.status_code == 403:
# if we get 403, we might be at the oauth2-proxy sign-in page
# the default path to start the sign-in flow is `/oauth2/start?rd=<url>`
url_obj = urlsplit(resp.url)
url_obj = url_obj._replace(
path="/oauth2/start", query=urlencode({"rd": url_obj.path})
)
resp = s.get(
url_obj.geturl(), allow_redirects=True, verify=not self._skip_tls_verify
)
else:
raise RuntimeError(
f"HTTP status code '{resp.status_code}' for GET against: {self._endpoint_url}"
)

# if we were NOT redirected, then the endpoint is unsecured
if len(resp.history) == 0:
# no cookies are needed
return ""

# if we are at `../auth` path, we need to select an auth type
url_obj = urlsplit(resp.url)
if re.search(r"/auth$", url_obj.path):
url_obj = url_obj._replace(
path=re.sub(r"/auth$", f"/auth/{self._dex_auth_type}", url_obj.path)
)

# if we are at `../auth/xxxx/login` path, then we are at the login page
if re.search(r"/auth/.*/login$", url_obj.path):
dex_login_url = url_obj.geturl()
else:
# otherwise, we need to follow a redirect to the login page
resp = s.get(
url_obj.geturl(), allow_redirects=True, verify=not self._skip_tls_verify
)
if resp.status_code != 200:
raise RuntimeError(
f"HTTP status code '{resp.status_code}' for GET against: {url_obj.geturl()}"
max_retries = 3
retry_delay = 2

# Use a persistent session for cookies
session = requests.Session()

for attempt in range(max_retries):
try:
# GET the endpoint_url, which should redirect to Dex
response = session.get(
self._endpoint_url,
allow_redirects=True,
verify=not self._skip_tls_verify
)
dex_login_url = resp.url

# attempt Dex login
resp = s.post(
dex_login_url,
data={"login": self._dex_username, "password": self._dex_password},
allow_redirects=True,
verify=not self._skip_tls_verify,
)
if resp.status_code != 200:
raise RuntimeError(
f"HTTP status code '{resp.status_code}' for POST against: {dex_login_url}"
)

# if we were NOT redirected, then the login credentials were probably invalid
if len(resp.history) == 0:
raise RuntimeError(
f"Login credentials are probably invalid - "
f"No redirect after POST to: {dex_login_url}"
)

# if we are at `../approval` path, we need to approve the login
url_obj = urlsplit(resp.url)
if re.search(r"/approval$", url_obj.path):
dex_approval_url = url_obj.geturl()

# approve the login
resp = s.post(
dex_approval_url,
data={"approval": "approve"},
allow_redirects=True,
verify=not self._skip_tls_verify,
)
if resp.status_code != 200:
raise RuntimeError(
f"HTTP status code '{resp.status_code}' for POST against: {url_obj.geturl()}"
if response.status_code == 200:
pass
elif response.status_code == 403:
# if we get 403, we might be at the oauth2-proxy sign-in page
# the default path to start the sign-in flow is `/oauth2/start?rd=<url>`
url_object = urlsplit(response.url)
url_object = url_object._replace(
path="/oauth2/start",
query=urlencode({"rd": url_object.path})
)
response = session.get(
url_object.geturl(),
allow_redirects=True,
verify=not self._skip_tls_verify
)
else:
raise RuntimeError(
f"HTTP status code '{response.status_code}' for GET against: {self._endpoint_url}"
)

# if we were NOT redirected, then the endpoint is unsecured
if len(response.history) == 0:
# No cookies are needed
return ""

# if we are at `../auth` path, we need to select an auth type
url_object = urlsplit(response.url)
if re.search(r"/auth$", url_object.path):
url_object = url_object._replace(
path=re.sub(r"/auth$", f"/auth/{self._dex_auth_type}", url_object.path)
)

# if we are at `../auth/xxxx/login` path, then we are at the login page
if re.search(r"/auth/.*/login$", url_object.path):
dex_login_url = url_object.geturl()
else:
# otherwise, we need to follow a redirect to the login page
response = session.get(
url_object.geturl(),
allow_redirects=True,
verify=not self._skip_tls_verify
)
if response.status_code != 200:
raise RuntimeError(
f"HTTP status code '{response.status_code}' for GET against: {url_object.geturl()}"
)
dex_login_url = response.url

# attempt Dex login
response = session.post(
dex_login_url,
data={"login": self._dex_username, "password": self._dex_password},
allow_redirects=True,
verify=not self._skip_tls_verify,
)

return "; ".join([f"{c.name}={c.value}" for c in s.cookies])
# Handle 403 specifically - might need to restart oauth flow
if response.status_code == 403:
# Try one more approach - go through the oauth2 flow again
oauth_url = f"{urlsplit(self._endpoint_url).scheme}://{urlsplit(self._endpoint_url).netloc}/oauth2/start"
response = session.get(
oauth_url,
allow_redirects=True,
verify=not self._skip_tls_verify,
)
# Continue with normal flow after restart
Comment on lines +125 to +134

@juliusvonkohout juliusvonkohout Apr 10, 2025

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The difference is really hard to read, but here I see a relevant change. I am worried that this could hide the underlying problem. it is more a workaround than a real solution. Why do we need to retry in the first place ?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why does a bit of load fail the test so easily ?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That bit of load did fail the test at that time, but when I checked again the next day, it was able to handle it. So I deliberately triggered a 403 using this code block, and as seen in the output, the old version exited with the error, while the new one worked.

def patched_post(original_post):
    def _patched_post(*args, **kwargs):
        if random.random() < 0.2:
            mock_response = requests.Response()
            mock_response.status_code = 403
            mock_response._content = b"Simulated 403 error"
            mock_response.url = args[0]
            return mock_response
        return original_post(*args, **kwargs)
    return _patched_post

original_post = requests.Session.post
requests.Session.post = patched_post(original_post)
Screenshot 2025-04-13 at 5 50 33 PM

You're right that this is more of a workaround than a solution to the underlying problem. The 403 errors happen when the Dex authentication flow gets disrupted under load. While this fix makes the test more reliable, we should investigate why these errors occur in the first place.

The error occurs randomly and is a bit hard to reproduce. But yes, when it fails, the same error occurs — a 403 against the URL.

Shall I investigate the logs of these pods and run it again to find the root cause?

kubectl logs -n auth $(kubectl get pods -n auth -l app=dex -o name) -f
kubectl logs -n oauth2-proxy $(kubectl get pods -n oauth2-proxy -o name) -f

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes please investigate, but i will merge it for now

if response.status_code == 200 and session.cookies:
return "; ".join([f"{c.name}={c.value}" for c in session.cookies])

if response.status_code != 200:
raise RuntimeError(
f"HTTP status code '{response.status_code}' for POST against: {dex_login_url}"
)

# if we were NOT redirected, then the login credentials were probably invalid
if len(response.history) == 0:
raise RuntimeError(
f"Login credentials are probably invalid - "
f"No redirect after POST to: {dex_login_url}"
)

# if we are at `../approval` path, we need to approve the login
url_object = urlsplit(response.url)
if re.search(r"/approval$", url_object.path):
dex_approval_url = url_object.geturl()
# Approve the login
response = session.post(
dex_approval_url,
data={"approval": "approve"},
allow_redirects=True,
verify=not self._skip_tls_verify,
)
if response.status_code != 200:
raise RuntimeError(
f"HTTP status code '{response.status_code}' for POST against: {url_object.geturl()}"
)

return "; ".join([f"{c.name}={c.value}" for c in session.cookies])

except Exception as e:
if attempt == max_retries - 1: # Last attempt
print(f"All {max_retries} attempts failed. Last error: {str(e)}")
raise
print(f"Attempt {attempt + 1} failed: {str(e)}")
print(f"Retrying in {retry_delay} seconds...")
time.sleep(retry_delay)
retry_delay *= 2 # Exponential backoff


KUBEFLOW_ENDPOINT = "http://localhost:8080"
KUBEFLOW_USERNAME = "user@example.com"
Expand All @@ -156,4 +190,4 @@ def get_session_cookies(self) -> str:

# try to get the session cookies
# NOTE: this will raise an exception if something goes wrong
session_cookies = dex_session_manager.get_session_cookies()
session_cookies = dex_session_manager.get_session_cookies()