Skip to content

Commit a68faf9

Browse files
authored
Speed up Jenkins._cleanUpDisconnectComputers (#11102)
2 parents 36c17f5 + 9e11341 commit a68faf9

File tree

4 files changed

+27
-33
lines changed

4 files changed

+27
-33
lines changed

core/src/main/java/hudson/model/AbstractCIBase.java

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -94,10 +94,6 @@ protected void interruptReloadThread() {
9494
ViewJob.interruptReloadThread();
9595
}
9696

97-
protected void killComputer(Computer c) {
98-
c.kill();
99-
}
100-
10197
private final Set<String> disabledAdministrativeMonitors = new HashSet<>();
10298

10399
/**
@@ -267,12 +263,12 @@ protected void updateComputerList(final boolean automaticAgentLaunch, @NonNull C
267263
// we need to start the process of reducing the executors on all computers as distinct
268264
// from the killing action which should not excessively use the Queue lock.
269265
for (Computer c : old) {
270-
c.inflictMortalWound();
266+
c.setNumExecutors(0);
271267
}
272268
});
273269
for (Computer c : old) {
274270
// when we get to here, the number of executors should be zero so this call should not need the Queue.lock
275-
killComputer(c);
271+
c.kill();
276272
}
277273
getQueue().scheduleMaintenance();
278274
Listeners.notify(ComputerListener.class, false, ComputerListener::onConfigurationChange);

core/src/main/java/hudson/model/Computer.java

Lines changed: 3 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -827,23 +827,6 @@ protected void kill() {
827827
setNumExecutors(0);
828828
}
829829

830-
/**
831-
* Called by {@link Jenkins#updateComputerList(boolean, Collection)} to notify {@link Computer} that it will be discarded.
832-
*
833-
* <p>
834-
* Note that at this point {@link #getNode()} returns null.
835-
*
836-
* <p>
837-
* Note that the Queue lock is already held when this method is called.
838-
*
839-
* @see #onRemoved()
840-
*/
841-
@Restricted(NoExternalUse.class)
842-
@GuardedBy("hudson.model.Queue.lock")
843-
/*package*/ void inflictMortalWound() {
844-
setNumExecutors(0);
845-
}
846-
847830
/**
848831
* Called by {@link Jenkins} when this computer is removed.
849832
*
@@ -865,16 +848,17 @@ protected void onRemoved(){
865848
* Calling path, *means protected by Queue.withLock
866849
*
867850
* Computer.doConfigSubmit -> Computer.replaceBy ->Jenkins.setNodes* ->Computer.setNode
868-
* AbstractCIBase.updateComputerList->Computer.inflictMortalWound*
851+
* AbstractCIBase.updateComputerList->Computer.setNumExecutors*
869852
* AbstractCIBase.updateComputerList->AbstractCIBase.updateComputer* ->Computer.setNode
870853
* AbstractCIBase.updateComputerList->AbstractCIBase.killComputer->Computer.kill
871854
* Computer.constructor->Computer.setNode
872855
* Computer.kill is called after numExecutors set to zero(Computer.inflictMortalWound) so not need the Queue.lock
873856
*
874857
* @param n number of executors
875858
*/
859+
@Restricted(NoExternalUse.class)
876860
@GuardedBy("hudson.model.Queue.lock")
877-
private void setNumExecutors(int n) {
861+
public void setNumExecutors(int n) {
878862
this.numExecutors = n;
879863
final int diff = executors.size() - n;
880864

core/src/main/java/hudson/slaves/SlaveComputer.java

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -916,19 +916,23 @@ public Object getTarget() {
916916
protected void kill() {
917917
super.kill();
918918
closeChannel();
919-
try {
920-
log.close();
921-
} catch (IOException x) {
922-
LOGGER.log(Level.WARNING, "Failed to close agent log", x);
923-
}
924-
919+
closeLog();
925920
try {
926921
Util.deleteRecursive(getLogDir());
927922
} catch (IOException ex) {
928923
logger.log(Level.WARNING, "Unable to delete agent logs", ex);
929924
}
930925
}
931926

927+
@Restricted(NoExternalUse.class)
928+
public void closeLog() {
929+
try {
930+
log.close();
931+
} catch (IOException x) {
932+
LOGGER.log(Level.WARNING, "Failed to close agent log", x);
933+
}
934+
}
935+
932936
@Override
933937
public RetentionStrategy getRetentionStrategy() {
934938
Slave n = getNode();

core/src/main/java/jenkins/model/Jenkins.java

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,7 @@
176176
import hudson.slaves.NodeProvisioner;
177177
import hudson.slaves.OfflineCause;
178178
import hudson.slaves.RetentionStrategy;
179+
import hudson.slaves.SlaveComputer;
179180
import hudson.tasks.BuildWrapper;
180181
import hudson.tasks.Builder;
181182
import hudson.tasks.Publisher;
@@ -3775,7 +3776,10 @@ private Set<Future<?>> _cleanUpDisconnectComputers(final List<Throwable> errors)
37753776
for (Computer c : getComputersCollection()) {
37763777
try {
37773778
c.interrupt();
3778-
killComputer(c);
3779+
c.setNumExecutors(0);
3780+
if (Main.isUnitTest && c instanceof SlaveComputer sc) {
3781+
sc.closeLog(); // help TemporaryDirectoryAllocator.dispose esp. on Windows
3782+
}
37793783
pending.add(c.disconnect(null));
37803784
} catch (OutOfMemoryError e) {
37813785
// we should just propagate this, no point trying to log
@@ -3950,9 +3954,15 @@ private void _cleanUpAwaitDisconnects(List<Throwable> errors, Set<Future<?>> pen
39503954
if (!pending.isEmpty()) {
39513955
LOGGER.log(Main.isUnitTest ? Level.FINE : Level.INFO, "Waiting for node disconnection completion");
39523956
}
3957+
long end = System.nanoTime() + Duration.ofSeconds(10).toNanos();
39533958
for (Future<?> f : pending) {
39543959
try {
3955-
f.get(10, TimeUnit.SECONDS); // if clean up operation didn't complete in time, we fail the test
3960+
long remaining = end - System.nanoTime();
3961+
if (remaining <= 0) {
3962+
LOGGER.warning("Ran out of time waiting for agents to disconnect");
3963+
break;
3964+
}
3965+
f.get(remaining, TimeUnit.NANOSECONDS);
39563966
} catch (InterruptedException e) {
39573967
Thread.currentThread().interrupt();
39583968
break; // someone wants us to die now. quick!

0 commit comments

Comments
 (0)