Skip to content

Commit b251733

Browse files
authored
Log thread dump when health check exceeds 10s (#11266)
1 parent 313c6ff commit b251733

File tree

1 file changed

+48
-10
lines changed

1 file changed

+48
-10
lines changed

core/src/main/java/jenkins/health/HealthCheckAction.java

Lines changed: 48 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,19 @@
2626

2727
import hudson.Extension;
2828
import hudson.ExtensionList;
29+
import hudson.FilePath;
2930
import hudson.model.InvisibleAction;
3031
import hudson.model.UnprotectedRootAction;
32+
import hudson.util.RemotingDiagnostics;
33+
import java.io.IOException;
34+
import java.time.Duration;
35+
import java.util.Timer;
36+
import java.util.TimerTask;
37+
import java.util.logging.Level;
38+
import java.util.logging.Logger;
39+
import java.util.stream.Collectors;
40+
import jenkins.util.JenkinsJVM;
41+
import jenkins.util.SystemProperties;
3142
import net.sf.json.JSONArray;
3243
import net.sf.json.JSONObject;
3344
import org.kohsuke.accmod.Restricted;
@@ -42,6 +53,10 @@
4253
@Restricted(NoExternalUse.class)
4354
public final class HealthCheckAction extends InvisibleAction implements UnprotectedRootAction {
4455

56+
private static final Logger LOGGER = Logger.getLogger(HealthCheckAction.class.getName());
57+
private static final Duration THRESHOLD_TIMEOUT = SystemProperties.getDuration(
58+
HealthCheckAction.class.getName() + ".thresholdTimeout", Duration.ofSeconds(10));
59+
4560
@Override
4661
public String getUrlName() {
4762
return "health";
@@ -50,17 +65,40 @@ public String getUrlName() {
5065
public HttpResponse doIndex() {
5166
boolean success = true;
5267
var failing = new JSONArray();
53-
for (var healthCheck : ExtensionList.lookup(HealthCheck.class)) {
54-
var check = healthCheck.check();
55-
success &= check;
56-
if (!check) {
57-
failing.add(healthCheck.getName());
68+
69+
var watchdog = new Timer("HealthCheckActionWatchdog", true);
70+
watchdog.schedule(new TimerTask() {
71+
@Override
72+
public void run() {
73+
if (JenkinsJVM.isJenkinsJVM()) {
74+
try {
75+
var threadDump = RemotingDiagnostics.getThreadDump(FilePath.localChannel);
76+
LOGGER.severe(() -> "health check did not complete in timely fashion:\n\n"
77+
+ threadDump.values().stream().collect(Collectors.joining()).trim());
78+
} catch (IOException e) {
79+
LOGGER.log(Level.WARNING, "Failed to get thread dump during slow health check", e);
80+
} catch (InterruptedException e) {
81+
Thread.currentThread().interrupt();
82+
}
83+
}
5884
}
85+
}, THRESHOLD_TIMEOUT.toMillis());
86+
87+
try {
88+
for (var healthCheck : ExtensionList.lookup(HealthCheck.class)) {
89+
var check = healthCheck.check();
90+
success &= check;
91+
if (!check) {
92+
failing.add(healthCheck.getName());
93+
}
94+
}
95+
var payload = new JSONObject().element("status", success);
96+
if (!success) {
97+
payload = payload.element("failures", failing);
98+
}
99+
return new JsonHttpResponse(payload, success ? 200 : 503);
100+
} finally {
101+
watchdog.cancel();
59102
}
60-
var payload = new JSONObject().element("status", success);
61-
if (!success) {
62-
payload = payload.element("failures", failing);
63-
}
64-
return new JsonHttpResponse(payload, success ? 200 : 503);
65103
}
66104
}

0 commit comments

Comments
 (0)