Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master'
Browse files Browse the repository at this point in the history
  • Loading branch information
christopherbozeman committed Jul 17, 2015
2 parents f8b5b66 + 9e303f0 commit 7fc762e
Show file tree
Hide file tree
Showing 16 changed files with 829 additions and 44 deletions.
4 changes: 2 additions & 2 deletions drill/setup_drill
Original file line number Diff line number Diff line change
Expand Up @@ -202,8 +202,8 @@ end

def installDrill(targetDir, runDir, logDir)
drillTarGZUrl="http://getdrill.org/drill/download"
drillTarGZ="apache-drill-0.8.0.tar.gz"
drillRoot="apache-drill-0.8.0"
drillTarGZ="apache-drill-1.0.0.tar.gz"
drillRoot="apache-drill-1.0.0"

println "Going to download Apache Drill distribution #{drillTarGZ} from #{drillTarGZUrl}"
sudo "curl -L --silent --show-error --fail --connect-timeout 60 --max-time 720 --retry 5 -O #{drillTarGZUrl}/#{drillTarGZ}"
Expand Down
14 changes: 14 additions & 0 deletions drill/setup_drill_jets3t
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/bin/sh
#Run this after ./setup_drill

pushd /tmp
rm -rf jets3t-0.9.3.zip jets3t-0.9.3
wget http://bitbucket.org/jmurty/jets3t/downloads/jets3t-0.9.3.zip
unzip jets3t-0.9.3.zip
cp -f jets3t-0.9.3/jars/jets3t-0.9.3.jar /home/hadoop/drill/jars/3rdparty
pushd /home/hadoop/drill/bin
cp -af hadoop-excludes.txt hadoop-excludes.txt.old
grep -v jets3t < hadoop-excludes.txt.old > hadoop-excludes.txt
popd
popd
sudo /home/hadoop/drill/bin/drillbit.sh restart
16 changes: 8 additions & 8 deletions elasticsearch/elasticsearch_install.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def install_pleaserun
@run_dir = "/home/hadoop/elasticsearch/"
# this is where additional logs are sent in case terminal output needs to be caught
@log_dir = "/home/hadoop/elasticsearch/"
@elasticsearch_version = "1.5.1"
@elasticsearch_version = "1.6.0"
@elasticsearch_port_master = 9200
@elasticsearch_port_slaves = 9202

Expand Down Expand Up @@ -74,24 +74,24 @@ def install_elasticsearch(target_dir, run_dir, log_dir, elasticsearch_version)

install_dir = "#{target_dir}elasticsearch-#{elasticsearch_version}/"
# installing elasticsearch aws plugin
run("#{install_dir}bin/plugin -install elasticsearch/elasticsearch-cloud-aws/2.5.0")
run("#{install_dir}bin/plugin -install elasticsearch/elasticsearch-cloud-aws/2.6.0")
# installing hdfs repository
run("#{install_dir}bin/plugin -install elasticsearch/elasticsearch-repository-hdfs/2.1.0.Beta3-hadoop2")
run("#{install_dir}bin/plugin -install elasticsearch/elasticsearch-repository-hdfs/2.1.0-hadoop2")

# replace yaml with new config file
run("mv elasticsearch.yml #{install_dir}config/elasticsearch.yml")
sudo("/usr/local/bin/pleaserun --install -p sysv -v lsb-3.1 #{install_dir}/bin/elasticsearch")
end

def install_hadoop_plugin(target_dir, run_dir)
run("wget https://download.elasticsearch.org/hadoop/elasticsearch-hadoop-2.1.0.Beta3.zip --no-check-certificate")
run("mv elasticsearch-hadoop-2.1.0.Beta3.zip #{target_dir}")
run("unzip #{target_dir}elasticsearch-hadoop-2.1.0.Beta3.zip -d #{target_dir}")
run("echo export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:#{@target_dir}elasticsearch-hadoop-2.1.0.Beta3/dist/* >> ~/.bashrc")
run("wget https://download.elasticsearch.org/hadoop/elasticsearch-hadoop-2.1.0.zip --no-check-certificate")
run("mv elasticsearch-hadoop-2.1.0.zip #{target_dir}")
run("unzip #{target_dir}elasticsearch-hadoop-2.1.0.zip -d #{target_dir}")
run("echo export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:#{@target_dir}elasticsearch-hadoop-2.1.0/dist/* >> ~/.bashrc")
end

def clean_up
run "rm -Rf #{@target_dir}elasticsearch-hadoop-2.1.0.Beta3.zip"
run "rm -Rf #{@target_dir}elasticsearch-hadoop-2.1.0.zip"
run "rm elasticsearch-#{@elasticsearch_version}.tar.gz"
end

Expand Down
2 changes: 1 addition & 1 deletion elasticsearch/kibananginx_install.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def sudo(cmd)
end

@is_master = Emr::JsonInfoFile.new('instance')['isMaster'].to_s == 'true'
@kibana_version = "4.0.2-linux-x64"
@kibana_version = "4.1.1-linux-x64"
@target_dir = "/home/hadoop/kibana/"
@nginx_dir = "/etc/nginx/"
@es_port_num = 9200
Expand Down
57 changes: 57 additions & 0 deletions hama/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
Hama on EMR
=====================

This script is a sample of installing and configuring Hama on EMR.

## Quick start guide

* Using AWS Command Line Interface(for more information, see http://docs.aws.amazon.com/cli/latest/userguide/cli-chap-welcome.html)
* Script : s3://hamacluster/install-hama.sh

### Arguments (optional)

-u, --url
Hama release download URL. Only tarball file is possible.
ex)
-u http://apache.mirror.cdnetworks.com/hama/hama-{version}/hama-{version}.tar.gz
--url s3://[your_bucket]/[path_to]/hama-{version}.tar.gz

-c, --conf
Addtional properties for Hama configuration.(Space-separated delimiter)
ex)
-c "bsp.master.address=host1.mydomain.com:40000 hama.zookeeper.quorum=host1.mydomain.com,host2.mydomain.com"

-e, --env
Set environment variables in hama-env.sh.(Space-separated delimiter)
ex)
-e "HAMA_LOG_DIR=[path_to_log_dir] HAMA_MANAGE_ZK=true"

### Example

#### 1. Launching a Hama cluster with default configuration.

We provide only AMI 3.7.0(hadoop 2.4.0) with default configuration. If you want to bootstrap hama for the other hadoop version, you would use hama to compile other hadoop version and then run -u(--url) argument before launching this script.

```
$ aws emr create-cluster \
--name="Test Cluster" \
--ami-version=3.7.0 \
--no-auto-terminate \
--use-default-roles --ec2-attributes KeyName=[your keyname] \
--applications Name=Ganglia \
--instance-groups InstanceGroupType=MASTER,InstanceCount=1,InstanceType=m3.xlarge InstanceGroupType=CORE,InstanceCount=9,InstanceType=m3.xlarge \
--bootstrap-action Name="Install Hama",Path=s3://hamacluster/install-hama.sh
```

#### 2. Launching a Hama cluster with additional configuration.

```
$ aws emr create-cluster \
--name="Test Cluster" \
--ami-version=3.7.0 \
--no-auto-terminate \
--use-default-roles --ec2-attributes KeyName=[your keyname] \
--applications Name=Ganglia \
--instance-groups InstanceGroupType=MASTER,InstanceCount=1,InstanceType=m3.xlarge InstanceGroupType=CORE,InstanceCount=9,InstanceType=m3.xlarge \
--bootstrap-action Name="Install Hama",Path=s3://hamacluster/install-hama.sh,Args=["-c","hama.graph.thread.pool.size=256 bsp.child.java.opts=-Xmx3072m","-e","HAMA_HOME=/home/hadoop/hama-0.7.0"]
```
263 changes: 263 additions & 0 deletions hama/install-hama.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,263 @@
#!/bin/bash

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#
# AWS EMR booptstrap script
# for install Apache Hama on EMR
#
# Arguments
# -h, --help
# Show help page.
#
# -u, --url (Optional)
# Hama release download URL. Only tarball file is possible.
# ex)
# -u http://apache.mirror.cdnetworks.com/hama/hama-{version}/hama-{version}.tar.gz
# --url s3://[your_bucket]/[path_to]/hama-{version}.tar.gz
#
# -c, --conf (Optional)
# Addtional properties for Hama configuration.(Space-separated delimiter)
# ex)
# -c "bsp.master.address=host1.mydomain.com:40000 hama.zookeeper.quorum=host1.mydomain.com,host2.mydomain.com"
#
# -e, --env (Optional)
# Set environment variables in hama-env.sh.(Space-separated delimiter)
# ex)
# -e "HAMA_LOG_DIR=[path_to_log_dir] HAMA_MANAGE_ZK=true"
#
#

# Setting up header part for hama configuration.
function start_configuration() {
echo "<?xml version=\"1.0\"?>"
echo "<?xml-stylesheet type=\"text/xsl\" href=\"configuration.xsl\"?>"
echo "<configuration>"
}

# Setting up bottom part for hama configuration.
function end_configuration() {
echo "</configuration>"
}

# Setting up property for hama configuration.
# $1 : Configuration name
# $2 : Configuration value
function set_property() {
echo "<property><name>$1</name><value>$2</value></property>"
}

# Calcurate the number of tasks, depending on Amazon EC2 instance type.
# $1: Memory size of java child task
function cal_num_task() {
instance_group=$(cat /mnt/var/lib/info/job-flow.json | jp 'instanceGroups[0].instanceGroupName' | awk -F "\"" '{print $2}')
instance_type=
if [ "${instance_group}" = "CORE" ]
then
instance_type=$(cat /mnt/var/lib/info/job-flow.json | jp 'instanceGroups[0].instanceType' | awk -F "\"" '{print $2}')
else
instance_type=$(cat /mnt/var/lib/info/job-flow.json | jp 'instanceGroups[1].instanceType' | awk -F "\"" '{print $2}')
fi

if [ -n $instance_type ]
then
hadoop fs -copyToLocal s3://hamacluster/instance_info /home/hadoop
mem_info=$(cat /home/hadoop/instance_info | awk '/'$instance_type'/ {print $2}')
mem_size=$(echo "$mem_info" | awk -F "." '{print $1}')
HAMA_TASK_NUM=$(echo "($mem_size-2)/$1" | bc)
fi
}

# Make hama configuration run on Amazon EMR
# for hadoop version 1.0.3
function make_hama_conf() {
echo "Info: Make hama configuration..."

# Add hama environment variables.
echo "" >> $HAMA_HOME/conf/hama-env.sh
echo "export JAVA_HOME=$JAVA_HOME" >> $HAMA_HOME/conf/hama-env.sh
if [ -n "$HAMA_ENV" ]
then
for var in $(echo "$HAMA_ENV" | awk -F " " '{print}')
do
echo "export $var" >> $HAMA_HOME/conf/hama-env.sh
done
fi

echo $(start_configuration) > ${HAMA_HOME}/conf/hama-site.xml

echo $(set_property "bsp.master.address" "${HAMA_MASTER}:${HAMA_MASTER_PORT}") >> ${HAMA_HOME}/conf/hama-site.xml
echo $(set_property "fs.default.name" "$(grep -i "fs.default.name<" $HADOOP_HOME/conf/core-site.xml | grep -oP '(?<=value>)[^<]+')") >> ${HAMA_HOME}/conf/hama-site.xml
echo $(set_property "hama.zookeeper.quorum" "${HAMA_MASTER}") >> ${HAMA_HOME}/conf/hama-site.xml
echo $(set_property "dfs.block.size" "134217728") >> ${HAMA_HOME}/conf/hama-site.xml
# echo $(set_property "hama.graph.thread.pool.size" "256") >> ${HAMA_HOME}/conf/hama-site.xml

if [ -n "$HAMA_SITE_PROPERTIES" ]
then
for property in $(echo "$HAMA_SITE_PROPERTIES" | awk -F " " '{print}')
do
name=$(echo "$property" | awk -F "\"*=\"*" '{print $1}')
value=$(echo "$property" | awk -F "\"*=\"*" '{print $2}')
echo $(set_property "$name" "$value") >> ${HAMA_HOME}/conf/hama-site.xml
done
fi

# Calculate the number of tasks for Hama configuration properly.
# This script assumes that java heap size per Hama task is 3072m as default value.
bsp_child_java_opt=$(cat ${HAMA_HOME}/conf/hama-site.xml | grep -i 'bsp.child.java.opts' | grep -oP '(?<=value>)[^<]+' | grep -oP '[0-9]+')
echo "bsp_child_java_opt ${bsp_child_java_opt}m"
if [ $bsp_child_java_opt ]
then
echo "bsp child is not null"
heap_size=$(echo "scale=1; $bsp_child_java_opt/1000" | bc)
cal_num_task $heap_size
else
echo "HAMA_MAX_HEAP_SIZE: $HAMA_MAX_HEAP_SIZE"
echo $(set_property "bsp.child.java.opts" "-Xmx3072m") >> ${HAMA_HOME}/conf/hama-site.xml
heap_size=$(echo "scale=1; $HAMA_MAX_HEAP_SIZE/1000" | bc)
cal_num_task $heap_size
fi

if [ -n $HAMA_TASK_NUM ]
then
max_task_num=$(cat ${HAMA_HOME}/conf/hama-site.xml | grep -i 'bsp.tasks.maximum')
if [ -z $max_task_num ]; then
echo $(set_property "bsp.tasks.maximum" "$HAMA_TASK_NUM") >> ${HAMA_HOME}/conf/hama-site.xml
fi
fi

echo $(end_configuration) >> ${HAMA_HOME}/conf/hama-site.xml
}

# Create starting Hama file
function create_hama_start_file() {
echo "Info: Creating hama start file..."
echo '#!/bin/bash' >> ${HAMA_HOME}/$1
echo 'grep -Fq "\"isMaster\": true" /mnt/var/lib/info/instance.json' >> ${HAMA_HOME}/$1
echo 'if [ $? -eq 0 ]; then' >> ${HAMA_HOME}/$1
echo " ${HAMA_HOME}/bin/hama-daemon.sh --config ${HAMA_HOME}/conf start zookeeper" >> ${HAMA_HOME}/$1
echo " ${HAMA_HOME}/bin/hama-daemon.sh --config ${HAMA_HOME}/conf start bspmaster" >> ${HAMA_HOME}/$1
echo "else" >> ${HAMA_HOME}/$1
echo " nc -z $HAMA_MASTER $HAMA_MASTER_PORT" >> ${HAMA_HOME}/$1
echo ' while [ $? -eq 1 ]; do' >> ${HAMA_HOME}/$1
echo " sleep 5" >> ${HAMA_HOME}/$1
echo " nc -z $HAMA_MASTER $HAMA_MASTER_PORT" >> ${HAMA_HOME}/$1
echo " done" >> ${HAMA_HOME}/$1
echo " ${HAMA_HOME}/bin/hama-daemon.sh --config ${HAMA_HOME}/conf start groom" >> ${HAMA_HOME}/$1
echo "fi" >> ${HAMA_HOME}/$1
chmod +x ${HAMA_HOME}/$1
}

# Only hama-trunk version
function download() {
echo "Info: Downloading hama package..."
cd /home/hadoop
if [ -n "$HAMA_RELEASE_URL" ]
then
protocol=`echo "$HAMA_RELEASE_URL" | awk -F ":" '{print $1}'`
if [ "$protocol" = "s3" ]
then
hadoop fs -copyToLocal $HAMA_RELEASE_URL /home/hadoop
else
wget --no-check-certificate $HAMA_RELEASE_URL
fi
HAMA_TARBALL=`echo "$HAMA_RELEASE_URL" | awk -F"/" '{print $NF}'`
tar zxvf $HAMA_TARBALL
HAMA_HOME=${HADOOP_HOME}`echo "$HAMA_TARBALL" | awk -F".tar" '{print $1}'`
echo "$HAMA_TARBALL"
echo "$HAMA_HOME"
chmod +x $HAMA_HOME/bin/*
else
wget --no-check-certificate http://people.apache.org/~edwardyoon/dist/0.7.0-RC1/hama-0.7.0-SNAPSHOT-for-Hadoop2.4.0.tar.gz
tar zxvf /home/hadoop/hama-0.7.0-SNAPSHOT-for-Hadoop2.4.0.tar.gz
mv hama-0.7.0-SNAPSHOT hama-0.7.0
chmod +x $HAMA_HOME/bin/*
fi
}

# Initialization
function init() {
echo "Info: Initializing..."

HAMA_MASTER=$(grep -i "fs.default.name<" $HADOOP_HOME/conf/core-site.xml | grep -oP '(?<=value>)[^<]+' | awk -F/ '{print $3}' | awk -F: '{print $1}')
NAME_NODE=$HAMA_MASTER
}

function print_help() {
echo 'Usage: ./install-hama.sh [OPTIONS]'
echo ' -h, --help'
echo ' Display help page.'
echo ' -u, --url'
echo ' Hama release download URL.'
echo ' ex)'
echo ' -u http://apache.mirror.cdnetworks.com/hama/hama-0.6.4/hama-0.6.4.tar.gz'
echo ' --url s3://[your_bucket]/[path_to]/hama-{version}.tar.gz'
echo ' -c, --conf'
echo ' Addional properties for Hama configuration.(Space-separated delimiter)'
echo ' ex) -c "bsp.master.address=host1.mydomain.com:40000 hama.zookeeper.quorum=host1.mydomain.com,host2.mydomain.com"'
echo ' -c s3://[your_bucket]/[path_to]/hama-site.xml'
echo ' -e, --env'
echo ' The environment variables for hama-env.sh'
}

# Global variables
HAMA_HOME=/home/hadoop/hama-0.7.0
HADOOP_HOME=/home/hadoop/
INSTANCE_TYPE_URL=https://s3-ap-northeast-1.amazonaws.com/hamacluster/instance_info
# Hama default max heap size -Xmx3072m
HAMA_MAX_HEAP_SIZE=3072
HAMA_MASTER=
HAMA_MASTER_PORT=40000
HAMA_RELEASE_URL=
HAMA_TARBALL=
HAMA_SITE_PROPERTIES=
HAMA_ENV=
HAMA_TASK_NUM=
NAME_NODE=
START_HAMA_FILE="hama-start-emr.sh"

# Set up arguments
while [ $# -gt 0 ]
do
case "$1" in
-u|--url)
HAMA_RELEASE_URL=$2
shift;;
-c|--conf)
HAMA_SITE_PROPERTIES=$2
shift;;
-e|--env)
HAMA_ENV=$2
shift;;
-h|--help)
print_help; exit 0;;
-*)
echo "Unknown option: $1"; exit 0;;
*)
break; ;;
esac
shift
done

init

download

make_hama_conf

create_hama_start_file $START_HAMA_FILE
${HAMA_HOME}/$START_HAMA_FILE &
Loading

0 comments on commit 7fc762e

Please sign in to comment.