forked from LLNL/magpie
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmagpie-run-spark-sparkwordcount
executable file
·105 lines (89 loc) · 4.14 KB
/
magpie-run-spark-sparkwordcount
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#!/bin/bash
#############################################################################
# Copyright (C) 2013 Lawrence Livermore National Security, LLC.
# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
# Written by Albert Chu <chu11@llnl.gov>
# LLNL-CODE-644248
#
# This file is part of Magpie, scripts for running Hadoop on
# traditional HPC systems. For details, see <URL>.
#
# Magpie is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Magpie is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Magpie. If not, see <http://www.gnu.org/licenses/>.
#############################################################################
# This script is the core sparkwordcount running script. For the most
# part, it shouldn't be editted. See job submission files for
# configuration details.
source ${MAGPIE_SCRIPTS_HOME}/magpie-common-exports
source ${MAGPIE_SCRIPTS_HOME}/magpie-common-functions
sparkwordcountclass="org.apache.spark.examples.JavaWordCount"
cd ${SPARK_HOME}
# For some reason, the examples jar is named with seemingly random
# versioning, so we'll have to "find" it for each case
if [ "${SPARK_SPARKWORDCOUNT_COPY_IN_FILE}X" != "X" ]
then
if echo ${SPARK_SPARKWORDCOUNT_FILE} | grep -q -E "hdfs:\/\/"
then
# Make directory of destination file
filedir=`dirname ${SPARK_SPARKWORDCOUNT_FILE}`
command="${HADOOP_HOME}/${hadoopcmdprefix}/hadoop fs -mkdir -p ${filedir}"
echo "Running $command" >&2
$command
if echo ${SPARK_SPARKWORDCOUNT_COPY_IN_FILE} | grep -q -E "^file:\/\/"
then
command="${HADOOP_HOME}/${hadoopcmdprefix}/hadoop fs -copyFromLocal ${SPARK_SPARKWORDCOUNT_COPY_IN_FILE} ${SPARK_SPARKWORDCOUNT_FILE}"
elif echo ${SPARK_SPARKWORDCOUNT_COPY_IN_FILE} | grep -q -E "^hdfs:\/\/"
then
command="${HADOOP_HOME}/${hadoopcmdprefix}/hadoop fs -cp ${SPARK_SPARKWORDCOUNT_COPY_IN_FILE} ${SPARK_SPARKWORDCOUNT_FILE}"
fi
elif echo ${SPARK_SPARKWORDCOUNT_FILE} | grep -q -E "file:\/\/"
then
# Make directory of destination file
filedir=`dirname ${SPARK_SPARKWORDCOUNT_FILE}`
filedir=`echo ${filedir} | sed "s/^file:\/\///"`
command="mkdir -p ${filedir}"
echo "Running $command" >&2
$command
if echo ${SPARK_SPARKWORDCOUNT_COPY_IN_FILE} | grep -q -E "^file:\/\/"
then
command="${HADOOP_HOME}/${hadoopcmdprefix}/hadoop fs -cp ${SPARK_SPARKWORDCOUNT_COPY_IN_FILE} ${SPARK_SPARKWORDCOUNT_FILE}"
elif echo ${SPARK_SPARKWORDCOUNT_COPY_IN_FILE} | grep -q -E "^hdfs:\/\/"
then
command="${HADOOP_HOME}/${hadoopcmdprefix}/hadoop fs -cpToLocal ${SPARK_SPARKWORDCOUNT_COPY_IN_FILE} ${SPARK_SPARKWORDCOUNT_FILE}"
fi
fi
echo "Running copy in $command" >&2
$command
fi
if echo ${SPARK_VERSION} | grep -q -E "1\.[0-9]\.[0-9]"
then
sparkexamplesjar=`find ${SPARK_HOME}/lib | grep jar | grep examples`
command="${sparkcmdprefix}/spark-submit --class ${sparkwordcountclass} ${sparkexamplesjar} ${SPARK_SPARKWORDCOUNT_FILE}"
else
sparkexamplesjar=`find ${SPARK_HOME}/examples | grep jar | grep examples | grep -v assembly | grep -v sources`
# Ugh, Spark 0.9.1 doesn't have a way to pass in specific jars
# except via the SPARK_CLASSPATH environment variable.
#
# Because user may have set SPARK_JOB_CLASSPATH, we can't just set
# it here. So we'll reput it into spark-env.sh
if [ "${SPARK_JOB_CLASSPATH}X" != "X" ]
then
echo "export SPARK_CLASSPATH=\"${SPARK_JOB_CLASSPATH};${sparkexamplesjar}\"" >> ${SPARK_CONF_DIR}/spark-env.sh
else
echo "export SPARK_CLASSPATH=\"${sparkexamplesjar}\"" >> ${SPARK_CONF_DIR}/spark-env.sh
fi
command="${sparkcmdprefix}/spark-class ${sparkwordcountclass} spark://$SPARK_MASTER_NODE:$SPARK_MASTER_PORT ${SPARK_SPARKWORDCOUNT_FILE}"
fi
echo "Running $command" >&2
$command
exit 0