2626
2727import hudson .Extension ;
2828import hudson .ExtensionList ;
29+ import hudson .FilePath ;
2930import hudson .model .InvisibleAction ;
3031import hudson .model .UnprotectedRootAction ;
32+ import hudson .util .RemotingDiagnostics ;
33+ import java .io .IOException ;
34+ import java .time .Duration ;
35+ import java .util .Timer ;
36+ import java .util .TimerTask ;
37+ import java .util .logging .Level ;
38+ import java .util .logging .Logger ;
39+ import java .util .stream .Collectors ;
40+ import jenkins .util .JenkinsJVM ;
41+ import jenkins .util .SystemProperties ;
3142import net .sf .json .JSONArray ;
3243import net .sf .json .JSONObject ;
3344import org .kohsuke .accmod .Restricted ;
4253@ Restricted (NoExternalUse .class )
4354public final class HealthCheckAction extends InvisibleAction implements UnprotectedRootAction {
4455
56+ private static final Logger LOGGER = Logger .getLogger (HealthCheckAction .class .getName ());
57+ private static final Duration THRESHOLD_TIMEOUT = SystemProperties .getDuration (
58+ HealthCheckAction .class .getName () + ".thresholdTimeout" , Duration .ofSeconds (10 ));
59+
4560 @ Override
4661 public String getUrlName () {
4762 return "health" ;
@@ -50,17 +65,40 @@ public String getUrlName() {
5065 public HttpResponse doIndex () {
5166 boolean success = true ;
5267 var failing = new JSONArray ();
53- for (var healthCheck : ExtensionList .lookup (HealthCheck .class )) {
54- var check = healthCheck .check ();
55- success &= check ;
56- if (!check ) {
57- failing .add (healthCheck .getName ());
68+
69+ var watchdog = new Timer ("HealthCheckActionWatchdog" , true );
70+ watchdog .schedule (new TimerTask () {
71+ @ Override
72+ public void run () {
73+ if (JenkinsJVM .isJenkinsJVM ()) {
74+ try {
75+ var threadDump = RemotingDiagnostics .getThreadDump (FilePath .localChannel );
76+ LOGGER .severe (() -> "health check did not complete in timely fashion:\n \n "
77+ + threadDump .values ().stream ().collect (Collectors .joining ()).trim ());
78+ } catch (IOException e ) {
79+ LOGGER .log (Level .WARNING , "Failed to get thread dump during slow health check" , e );
80+ } catch (InterruptedException e ) {
81+ Thread .currentThread ().interrupt ();
82+ }
83+ }
5884 }
85+ }, THRESHOLD_TIMEOUT .toMillis ());
86+
87+ try {
88+ for (var healthCheck : ExtensionList .lookup (HealthCheck .class )) {
89+ var check = healthCheck .check ();
90+ success &= check ;
91+ if (!check ) {
92+ failing .add (healthCheck .getName ());
93+ }
94+ }
95+ var payload = new JSONObject ().element ("status" , success );
96+ if (!success ) {
97+ payload = payload .element ("failures" , failing );
98+ }
99+ return new JsonHttpResponse (payload , success ? 200 : 503 );
100+ } finally {
101+ watchdog .cancel ();
59102 }
60- var payload = new JSONObject ().element ("status" , success );
61- if (!success ) {
62- payload = payload .element ("failures" , failing );
63- }
64- return new JsonHttpResponse (payload , success ? 200 : 503 );
65103 }
66104}
0 commit comments