21
21
import java .io .File ;
22
22
import java .io .FileInputStream ;
23
23
import java .io .IOException ;
24
- import java .util .concurrent .ConcurrentHashMap ;
24
+ import java .util .Iterator ;
25
+ import java .util .Map ;
26
+ import java .util .concurrent .ConcurrentMap ;
27
+ import java .util .concurrent .Executor ;
28
+ import java .util .concurrent .Executors ;
25
29
26
30
import com .google .common .annotations .VisibleForTesting ;
31
+ import com .google .common .base .Objects ;
32
+ import com .google .common .collect .Maps ;
27
33
import org .slf4j .Logger ;
28
34
import org .slf4j .LoggerFactory ;
29
35
43
49
public class ExternalShuffleBlockManager {
44
50
private final Logger logger = LoggerFactory .getLogger (ExternalShuffleBlockManager .class );
45
51
46
- // Map from "appId-execId" to the executor's configuration.
47
- private final ConcurrentHashMap <String , ExecutorShuffleInfo > executors =
48
- new ConcurrentHashMap <String , ExecutorShuffleInfo >();
52
+ // Map containing all registered executors' metadata.
53
+ private final ConcurrentMap <AppExecId , ExecutorShuffleInfo > executors ;
49
54
50
- // Returns an id suitable for a single executor within a single application.
51
- private String getAppExecId (String appId , String execId ) {
52
- return appId + "-" + execId ;
55
+ // Single-threaded Java executor used to perform expensive recursive directory deletion.
56
+ private final Executor directoryCleaner ;
57
+
58
+ public ExternalShuffleBlockManager () {
59
+ // TODO: Give this thread a name.
60
+ this (Executors .newSingleThreadExecutor ());
61
+ }
62
+
63
+ // Allows tests to have more control over when directories are cleaned up.
64
+ @ VisibleForTesting
65
+ ExternalShuffleBlockManager (Executor directoryCleaner ) {
66
+ this .executors = Maps .newConcurrentMap ();
67
+ this .directoryCleaner = directoryCleaner ;
53
68
}
54
69
55
70
/** Registers a new Executor with all the configuration we need to find its shuffle files. */
56
71
public void registerExecutor (
57
72
String appId ,
58
73
String execId ,
59
74
ExecutorShuffleInfo executorInfo ) {
60
- String fullId = getAppExecId (appId , execId );
75
+ AppExecId fullId = new AppExecId (appId , execId );
61
76
logger .info ("Registered executor {} with {}" , fullId , executorInfo );
62
77
executors .put (fullId , executorInfo );
63
78
}
@@ -78,7 +93,7 @@ public ManagedBuffer getBlockData(String appId, String execId, String blockId) {
78
93
int mapId = Integer .parseInt (blockIdParts [2 ]);
79
94
int reduceId = Integer .parseInt (blockIdParts [3 ]);
80
95
81
- ExecutorShuffleInfo executor = executors .get (getAppExecId (appId , execId ));
96
+ ExecutorShuffleInfo executor = executors .get (new AppExecId (appId , execId ));
82
97
if (executor == null ) {
83
98
throw new RuntimeException (
84
99
String .format ("Executor is not registered (appId=%s, execId=%s)" , appId , execId ));
@@ -94,6 +109,56 @@ public ManagedBuffer getBlockData(String appId, String execId, String blockId) {
94
109
}
95
110
}
96
111
112
+ /**
113
+ * Removes our metadata of all executors registered for the given application, and optionally
114
+ * also deletes the local directories associated with the executors of that application in a
115
+ * separate thread.
116
+ *
117
+ * It is not valid to call registerExecutor() for an executor with this appId after invoking
118
+ * this method.
119
+ */
120
+ public void applicationRemoved (String appId , boolean cleanupLocalDirs ) {
121
+ logger .info ("Application {} removed, cleanupLocalDirs = {}" , appId , cleanupLocalDirs );
122
+ Iterator <Map .Entry <AppExecId , ExecutorShuffleInfo >> it = executors .entrySet ().iterator ();
123
+ while (it .hasNext ()) {
124
+ Map .Entry <AppExecId , ExecutorShuffleInfo > entry = it .next ();
125
+ AppExecId fullId = entry .getKey ();
126
+ final ExecutorShuffleInfo executor = entry .getValue ();
127
+
128
+ // Only touch executors associated with the appId that was removed.
129
+ if (appId .equals (fullId .appId )) {
130
+ it .remove ();
131
+
132
+ if (cleanupLocalDirs ) {
133
+ logger .info ("Cleaning up executor {}'s {} local dirs" , fullId , executor .localDirs .length );
134
+
135
+ // Execute the actual deletion in a different thread, as it may take some time.
136
+ directoryCleaner .execute (new Runnable () {
137
+ @ Override
138
+ public void run () {
139
+ deleteExecutorDirs (executor .localDirs );
140
+ }
141
+ });
142
+ }
143
+ }
144
+ }
145
+ }
146
+
147
+ /**
148
+ * Synchronously deletes each directory one at a time.
149
+ * Should be executed in its own thread, as this may take a long time.
150
+ */
151
+ private void deleteExecutorDirs (String [] dirs ) {
152
+ for (String localDir : dirs ) {
153
+ try {
154
+ JavaUtils .deleteRecursively (new File (localDir ));
155
+ logger .debug ("Successfully cleaned up directory: " + localDir );
156
+ } catch (Exception e ) {
157
+ logger .error ("Failed to delete directory: " + localDir , e );
158
+ }
159
+ }
160
+ }
161
+
97
162
/**
98
163
* Hash-based shuffle data is simply stored as one file per block.
99
164
* This logic is from FileShuffleBlockManager.
@@ -146,9 +211,36 @@ static File getFile(String[] localDirs, int subDirsPerLocalDir, String filename)
146
211
return new File (new File (localDir , String .format ("%02x" , subDirId )), filename );
147
212
}
148
213
149
- /** For testing, clears all registered executors. */
150
- @ VisibleForTesting
151
- void clearRegisteredExecutors () {
152
- executors .clear ();
214
+ /** Simply encodes an executor's full ID, which is appId + execId. */
215
+ private static class AppExecId {
216
+ final String appId ;
217
+ final String execId ;
218
+
219
+ private AppExecId (String appId , String execId ) {
220
+ this .appId = appId ;
221
+ this .execId = execId ;
222
+ }
223
+
224
+ @ Override
225
+ public boolean equals (Object o ) {
226
+ if (this == o ) return true ;
227
+ if (o == null || getClass () != o .getClass ()) return false ;
228
+
229
+ AppExecId appExecId = (AppExecId ) o ;
230
+ return Objects .equal (appId , appExecId .appId ) && Objects .equal (execId , appExecId .execId );
231
+ }
232
+
233
+ @ Override
234
+ public int hashCode () {
235
+ return Objects .hashCode (appId , execId );
236
+ }
237
+
238
+ @ Override
239
+ public String toString () {
240
+ return Objects .toStringHelper (this )
241
+ .add ("appId" , appId )
242
+ .add ("execId" , execId )
243
+ .toString ();
244
+ }
153
245
}
154
246
}
0 commit comments