Skip to content

Commit f481110

Browse files
idzikovskyiekrivokonmapr
authored andcommitted
EZAF-1127 Fix of container startup time (apache#1077)
* EZAF-1127 Minor fix of container startup time by improvement in configure.sh / fix for Spark 3.3.2 * Improve changePermission function in configure.sh * [classpathfilter] Add classpathfilter go module (apache#1078) * Add classpathfilter go module * Properly copy classpathfilter into distribution * Update bin/mapr-classpath.sh * Change content of dep-blacklist.txt (apache#1079)
1 parent 25c7972 commit f481110

File tree

8 files changed

+276
-45
lines changed

8 files changed

+276
-45
lines changed

assembly/src/main/assembly/assembly.xml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,15 @@
5555
<include>**/*</include>
5656
</includes>
5757
</fileSet>
58+
<fileSet>
59+
<directory>
60+
${project.parent.basedir}/common/classpath-filter/target/bin/
61+
</directory>
62+
<outputDirectory>bin/</outputDirectory>
63+
<includes>
64+
<include>classpathfilter</include>
65+
</includes>
66+
</fileSet>
5867
<fileSet>
5968
<directory>
6069
${project.parent.basedir}/assembly/target/${spark.jar.dir}

bin/configure.sh

Lines changed: 14 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -229,17 +229,17 @@ function changeWardenConfig() {
229229
# Change permission
230230
#
231231

232-
function change_permissions() {
233-
if [ -f $DAEMON_CONF ]; then
234-
if [ ! -z "$MAPR_USER" ]; then
235-
chown -R ${MAPR_USER} ${SPARK_HOME}
232+
function changePermissions() {
233+
if [ -f "$DAEMON_CONF" ]; then
234+
if [ -n "$MAPR_USER" ] && [ -n "$MAPR_GROUP" ]; then
235+
chown -R "${MAPR_USER}:${MAPR_GROUP}" "$SPARK_HOME"
236+
elif [ -n "$MAPR_USER" ]; then
237+
chown -R "$MAPR_USER" "$SPARK_HOME"
238+
elif [ -n "$MAPR_GROUP" ]; then
239+
chgrp -R "$MAPR_GROUP" "$SPARK_HOME"
236240
fi
237-
238-
if [ ! -z "$MAPR_GROUP" ]; then
239-
chgrp -R ${MAPR_GROUP} ${SPARK_HOME}
240-
fi
241-
chmod -f u+x $SPARK_HOME/bin/*
242241
fi
242+
chmod -f u+x "$SPARK_HOME"/bin/*
243243
}
244244

245245
#
@@ -345,8 +345,10 @@ function createAppsSparkFolder() {
345345
export MAPR_TICKETFILE_LOCATION="${MAPR_HOME}/conf/mapruserticket"
346346
fi
347347
fi
348-
hadoop fs -mkdir -p /apps/spark > /dev/null 2>&1
349-
hadoop fs -chmod 777 /apps/spark > /dev/null 2>&1
348+
if ! hadoop fs -test -d /apps/spark >/dev/null 2>&1; then
349+
hadoop fs -mkdir -p /apps/spark >/dev/null 2>&1
350+
hadoop fs -chmod 777 /apps/spark >/dev/null 2>&1
351+
fi
350352
}
351353

352354
#
@@ -465,35 +467,6 @@ function registerPortHistoryServer() {
465467
fi
466468
}
467469

468-
function configureDepBlackList() {
469-
dep_blacklist_path=$MAPR_HOME/spark/spark-$SPARK_VERSION/conf/dep-blacklist.txt
470-
471-
slf4j_reload4j_name=$(ls $MAPR_HOME/hadoop/hadoop-$HADOOP_VERSION/share/hadoop/common/lib/ | grep slf4j-reload4j)
472-
slf4j_reload4j_hadoop_path=$MAPR_HOME/hadoop/hadoop-$HADOOP_VERSION/share/hadoop/common/lib/$slf4j_reload4j_name
473-
slf4j_reload4j_lib_path=$MAPR_HOME/lib/$slf4j_reload4j_name
474-
475-
hadoop_common_name=$(ls $MAPR_HOME/hadoop/hadoop-$HADOOP_VERSION/share/hadoop/common/sources/ | grep hadoop-common | grep -v test)
476-
hadoop_common_path=$MAPR_HOME/hadoop/hadoop-$HADOOP_VERSION/share/hadoop/common/sources/$hadoop_common_name
477-
478-
log4j2_slf4j_impl_path=$MAPR_HOME/lib/log4j2/$(ls /opt/mapr/lib/log4j2/ | grep log4j-slf4j-impl)
479-
480-
if ! grep -q $hadoop_common_path $dep_blacklist_path; then
481-
echo $hadoop_common_path >> $dep_blacklist_path
482-
fi
483-
484-
if ! grep -q $slf4j_reload4j_hadoop_path $dep_blacklist_path; then
485-
echo $slf4j_reload4j_hadoop_path >> $dep_blacklist_path
486-
fi
487-
488-
if ! grep -q $slf4j_reload4j_lib_path $dep_blacklist_path; then
489-
echo $slf4j_reload4j_lib_path >> $dep_blacklist_path
490-
fi
491-
492-
if ! grep -q $log4j2_slf4j_impl_path $dep_blacklist_path; then
493-
echo $log4j2_slf4j_impl_path >> $dep_blacklist_path
494-
fi
495-
}
496-
497470
function registerServicePorts() {
498471
registerPortMaster
499472
registerPortThriftServer
@@ -656,9 +629,8 @@ fi
656629
if [ ! "$isSecure" -eq 2 ] ; then
657630
configureSecurity
658631
fi
659-
configureDepBlackList
660632
createAppsSparkFolder
661-
change_permissions
633+
changePermissions
662634

663635
copyWardenConfFiles
664636
stopServicesForRestartByWarden

bin/mapr-classpath.sh

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
1-
#!/usr/bin/env bash
1+
#!/bin/sh
22

33
if [ ! "$(command -v java)" ]; then
44
echo "JAVA_HOME is not set" >&2
55
exit 1
66
fi
77

8-
SPARK_HOME=$(readlink -f "/usr/local/spark")
9-
java -cp $SPARK_HOME'/jars/*' org.apache.spark.classpath.ClasspathFilter $(mapr classpath) $SPARK_HOME'/conf/dep-blacklist.txt'
8+
SPARK_HOME=${SPARK_HOME:-"$(readlink -f /usr/local/spark)"}
9+
mapr_classpath="$(mapr classpath)"
10+
exec "${SPARK_HOME}/bin/classpathfilter" -b "${SPARK_HOME}/conf/dep-blacklist.txt" "$mapr_classpath"

common/classpath-filter/pom.xml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,28 @@
7979
</execution>
8080
</executions>
8181
</plugin>
82+
<plugin>
83+
<artifactId>exec-maven-plugin</artifactId>
84+
<groupId>org.codehaus.mojo</groupId>
85+
<executions>
86+
<execution>
87+
<id>build-classpathfilter-go-binary</id>
88+
<phase>compile</phase>
89+
<goals>
90+
<goal>exec</goal>
91+
</goals>
92+
<configuration>
93+
<workingDirectory>src/main/go/classpathfilter</workingDirectory>
94+
<executable>go</executable>
95+
<arguments>
96+
<argument>build</argument>
97+
<argument>-o</argument>
98+
<argument>${project.build.directory}/bin/classpathfilter</argument>
99+
</arguments>
100+
</configuration>
101+
</execution>
102+
</executions>
103+
</plugin>
82104
</plugins>
83105
</build>
84106
</project>
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
module classpathfilter
2+
3+
go 1.16
Lines changed: 220 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,220 @@
1+
package main
2+
3+
import "errors"
4+
import "flag"
5+
import "fmt"
6+
import "log"
7+
import "os"
8+
import "os/exec"
9+
import "path/filepath"
10+
import "strings"
11+
12+
func getSparkHome() (sparkHome string, err error) {
13+
var sparkHomeEnv = os.Getenv("SPARK_HOME")
14+
if sparkHomeEnv != "" {
15+
sparkHome = sparkHomeEnv
16+
} else {
17+
maprHome := os.Getenv("MAPR_HOME")
18+
if maprHome != "" {
19+
sparkVersionFileLocation := filepath.Join(maprHome, "spark/sparkversion")
20+
sparkVersionBytes, e := os.ReadFile(sparkVersionFileLocation)
21+
if e == nil {
22+
var sparkVersion = string(sparkVersionBytes)
23+
sparkVersion = strings.Trim(sparkVersion, " \n")
24+
if sparkVersion != "" {
25+
sparkHome = filepath.Join(maprHome, fmt.Sprintf("spark/spark-%s", sparkVersion))
26+
}
27+
}
28+
}
29+
if sparkHome == "" {
30+
sparkHome, _ = os.Readlink("/usr/local/spark")
31+
}
32+
}
33+
34+
if sparkHome == "" {
35+
err = errors.New("Can not find SPARK_HOME!")
36+
}
37+
return sparkHome, err
38+
}
39+
40+
func getBlacklist(blacklistFileLocation string) (blacklist []string, err error) {
41+
blacklistBytes, err := os.ReadFile(blacklistFileLocation)
42+
if err != nil {
43+
return blacklist, fmt.Errorf("Can not read dep-blacklist.txt configuration file: %s\n", err)
44+
}
45+
blacklistString := string(blacklistBytes)
46+
blacklist = strings.Fields(blacklistString)
47+
48+
return blacklist, nil
49+
}
50+
51+
func parseClasspathString(classpathString string) []string {
52+
classpathSeparator := func(c rune) bool {
53+
return c == ':'
54+
}
55+
return strings.FieldsFunc(classpathString, classpathSeparator)
56+
}
57+
58+
func getMaprClasspath() (maprClasspath []string, err error) {
59+
maprClasspathCmd := exec.Command("mapr", "classpath")
60+
maprClasspathOutput, err := maprClasspathCmd.Output()
61+
if err != nil {
62+
return maprClasspath, fmt.Errorf("Error executing 'mapr classpath' command: %s\n", err)
63+
}
64+
65+
var maprClasspathString = string(maprClasspathOutput)
66+
maprClasspathString = strings.Trim(maprClasspathString, " \n")
67+
if maprClasspathString == "" {
68+
return maprClasspath, errors.New("Output of 'mapr classpath' command is empty!")
69+
}
70+
71+
maprClasspath = parseClasspathString(maprClasspathString)
72+
73+
return maprClasspath, nil
74+
}
75+
76+
type Classpath struct {
77+
entries []string
78+
_duplicateDict map[string]bool
79+
_blacklistDict map[string]bool
80+
_blacklistPatterns []string
81+
}
82+
83+
func NewClasspath() *Classpath {
84+
cp := new(Classpath)
85+
cp.entries = make([]string, 0, 2048)
86+
cp._duplicateDict = make(map[string]bool, 2048)
87+
cp._blacklistDict = make(map[string]bool, 256)
88+
cp._blacklistPatterns = make([]string, 256)
89+
90+
return cp
91+
}
92+
93+
func (cp *Classpath) SetBlacklist(blacklist []string) {
94+
isPattern := func (path string) bool {
95+
magicChars := `*?[\`
96+
return strings.ContainsAny(path, magicChars)
97+
}
98+
99+
for _, entry := range blacklist {
100+
if isPattern(entry) {
101+
cp._blacklistPatterns = append(cp._blacklistPatterns, entry)
102+
} else {
103+
cp._blacklistDict[entry] = true
104+
}
105+
}
106+
}
107+
108+
func (cp *Classpath) AppendEntry(entry string) {
109+
if _, found := cp._duplicateDict[entry]; found {
110+
return
111+
}
112+
if _, found := cp._blacklistDict[entry]; found {
113+
return
114+
}
115+
for _, pattern := range cp._blacklistPatterns {
116+
if matched, _ := filepath.Match(pattern, entry); matched {
117+
return
118+
}
119+
}
120+
121+
cp.entries = append(cp.entries, entry)
122+
cp._duplicateDict[entry] = true
123+
}
124+
125+
func (cp *Classpath) AsString() string {
126+
return strings.Join(cp.entries, ":")
127+
}
128+
129+
func classpathFilter(classpath []string, blacklist []string) string {
130+
cp := NewClasspath()
131+
cp.SetBlacklist(blacklist)
132+
133+
for _, classpathEntry := range classpath {
134+
if classpathEntry[0] == '/' && strings.HasSuffix(classpathEntry, "/*") {
135+
classpathEntryLocation := classpathEntry[:len(classpathEntry)-1]
136+
137+
/*
138+
* Original ClasspathFilter from Spark scan directories recursively.
139+
* However, it's most likely this is a bug.
140+
*/
141+
// files, err := filepath.WalkDir(classpathEntryLocation)
142+
// ...
143+
144+
files, err := os.ReadDir(classpathEntryLocation)
145+
if err != nil {
146+
// log.Printf("Can not read '%s' directory: %s\n", classpathEntry, err)
147+
}
148+
for _, file := range files {
149+
filename := file.Name()
150+
filetype := file.Type()
151+
152+
if (filetype == 0 || filetype&os.ModeSymlink != 0) &&
153+
strings.HasSuffix(filename, ".jar") {
154+
fullpath := filepath.Join(classpathEntryLocation, filename)
155+
cp.AppendEntry(fullpath)
156+
}
157+
}
158+
} else {
159+
if _, err := os.Stat(classpathEntry); err == nil { // remove non existing files
160+
cp.AppendEntry(classpathEntry)
161+
}
162+
}
163+
}
164+
165+
return cp.AsString()
166+
}
167+
168+
func main() {
169+
flag.Usage = func() {
170+
filename := filepath.Base(os.Args[0])
171+
msg := "Usage of %s: [<CLASSPATH>]\n" +
172+
" -b path\n" +
173+
" path to blacklist configuration (default \"$SPARK_HOME/conf/dep-blacklist.txt\")\n" +
174+
"\n" +
175+
"Filter mapr classpath using rules from dep-blacklist.txt.\n"
176+
fmt.Printf(msg, filename)
177+
}
178+
blacklistLocationArg := flag.String("b", "", "path to blacklist configuration")
179+
flag.Parse()
180+
181+
args := flag.Args()
182+
183+
var classpathArg string
184+
if len(args) > 1 {
185+
fmt.Println("invalid number of arguments")
186+
flag.Usage()
187+
os.Exit(1)
188+
} else if len(args) == 1 {
189+
classpathArg = args[0]
190+
}
191+
192+
var blacklistFileLocation string
193+
if *blacklistLocationArg != "" {
194+
blacklistFileLocation = *blacklistLocationArg
195+
} else {
196+
sparkHome, err := getSparkHome()
197+
if err != nil {
198+
log.Fatalln(err)
199+
}
200+
blacklistFileLocation = filepath.Join(sparkHome, "conf/dep-blacklist.txt")
201+
}
202+
203+
blacklist, err := getBlacklist(blacklistFileLocation)
204+
if err != nil {
205+
log.Fatalln(err)
206+
}
207+
208+
var maprClasspath []string
209+
if classpathArg != "" {
210+
maprClasspath = parseClasspathString(classpathArg)
211+
} else {
212+
maprClasspath, err = getMaprClasspath()
213+
if err != nil {
214+
log.Fatalln(err)
215+
}
216+
}
217+
218+
classpath := classpathFilter(maprClasspath, blacklist)
219+
fmt.Print(classpath)
220+
}

conf/dep-blacklist.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
/opt/mapr/lib/log4j2/log4j-slf4j-impl-*.jar
2+
/opt/mapr/lib/slf4j-reload4j-*.jar
3+
/opt/mapr/hadoop/hadoop-*/share/hadoop/common/lib/slf4j-reload4j-*.jar

dev/make-distribution.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,7 @@ mkdir "$DISTDIR/conf"
265265
cp "$SPARK_HOME"/conf/* "$DISTDIR/conf"
266266
cp "$SPARK_HOME/README.md" "$DISTDIR"
267267
cp -r "$SPARK_HOME/bin" "$DISTDIR"
268+
cp "${SPARK_HOME}/common/classpath-filter/target/bin/classpathfilter" "$DISTDIR/bin"
268269
cp -r "$SPARK_HOME/python" "$DISTDIR"
269270

270271
# Remove the python distribution from dist/ if we built it

0 commit comments

Comments
 (0)