diff --git a/saltstack/readme.md b/saltstack/readme.md index 2b1938d..8ae6d0f 100644 --- a/saltstack/readme.md +++ b/saltstack/readme.md @@ -1,59 +1,142 @@ -# Bootstrap - SaltStack Setup +Install and configure SaltStack +=============================== +This Bootstrap Action will install and configure [SaltStack](https://docs.saltstack.com/en/2015.5/) on the EMR nodes. It will add some +useful configurations in the form of [grains](https://docs.saltstack.com/en/2015.5/topics/targeting/grains.html) (like 'Facts' in other simliar software) and [nodegroups](https://docs.saltstack.com/en/2015.5/topics/targeting/nodegroups.html). -In many cases, remote execution can be a useful tool in clustered systems. EMR supports this functionality through a custom jar step [EMR 3.X](http://docs.aws.amazon.com/ElasticMapReduce/latest/DeveloperGuide/emr-hadoop-script.html) / [EMR 4.X](http://docs.aws.amazon.com/ElasticMapReduce/latest/ReleaseGuide/emr-4.0.0/emr-hadoop-script.html), which can be comberson and does not easily support real-time filtering of stdout. This bootstrap action provides a light weight remote execution engine by installing and configuring [SaltStack](http://www.saltstack.com/). Available in the Amazon Linux epel, the _salt master_ is installed on the Master node, allowing open minion connections from all Core|Task instances. +## Bootstrap Location ## +* For us-east-1: -## Usage +s3://awssupportdatasvcs.com/bootstrap-actions/saltstack/salt-setup.sh - USAGE: ./salt-setup.sh -options +* Rest of regions: - OPTIONS DESCRIPTION - - OPTIONAL - -l See docs.saltstack.com for valid levels. Default: +s3://.awssupportdatasvcs.com/bootstrap-actions/saltstack/salt-setup.sh + +## Usage ## +There are basically three modes. If no argument given, **-I** is assumed. + + MODES: + -I (DEFAULT) Independent mode. EMR Master node is the salt + master, slave nodes (task, core) are minions. If + no argument is used, this mode will be deployed. + + -E External master mode. Register all EMR nodes as + minions on the external master specified + + -S Syndicated mode. Like -I but also syndicates EMR + master node to the specified external master + + Important: If the external master (-E/-S modes) is not reachable, the bootstrap + action will fail. + + OPTIONS + -l See docs.saltstack.com for valid levels. Default: info - -f See man syslog.h for valid facilities. Default: + -f See man syslog.h for valid facilities. Default: LOG_LOCAL0 - + FLAGS - -D Enable debug mode - -V Print version - -h Print usage - -## Testing - -Ssh into the namenode, and run the salt test.ping command to see registered minions. See [](http://docs.saltstack.com/) for a list of all supported commands and modules. - - ## Test connected nodes - $ sudo salt '*' test.ping - ip-10-20-128-250.us-west-2.compute.internal: - True - ip-10-120-202-205.us-west-2.compute.internal: - True - ip-10-120-7-56.us-west-2.compute.internal: - True - - ## List java proc_nodemanager process on a single node - $ sudo salt 'ip-10-120-7-56.us-west-2.compute.internal' cmd.run 'ps ax | grep \[p\]roc_nodemanager - 3361 ? Sl 1:41 /usr/lib/jvm/java-openjdk/bin/java -Dproc_nodemanager -Xmx2048m -XX:OnOutOfMemoryError=kill -9 %p -XX:OnOutOfMemoryError=kill -9 %p -server -Dhadoop.log.dir=/var/log/hadoop-yarn ... - - ## Distribute file - $ echo 'go bears' | sudo tee /srv/salt/bar - $ sudo salt '*' cp.get_file salt://bar /tmp/foo/bar makedirs=True - ip-10-20-128-250.us-west-2.compute.internal: - /tmp/foo/bar - ip-10-120-7-56.us-west-2.compute.internal: - /tmp/foo/bar - ip-10-120-202-205.us-west-2.compute.internal: - /tmp/foo/bar - $ sudo salt '*' cmd.run 'cat /tmp/foo/bar' - ip-10-20-128-250.us-west-2.compute.internal: - go bears - ip-10-120-202-205.us-west-2.compute.internal: - go bears - ip-10-120-7-56.us-west-2.compute.internal: - go bears - - - + -d Enable debug mode + -V Print version + -h Print usage + + +## SaltStack on EMR: remote command execution cheatsheet ## + __NOTE:__ all the commands will run on the minions as root. The commands need to be executed from a salt master, this would be: + + - Independent mode: EMR master node. + + - External mode: external master + + - Syndicated mode: EMR master node (will contact all nodes in the cluster) or external master (will contact all nodes in all clusters). + +Example: + +- Check connectivity to all registered nodes: + + sudo salt '\*' test.ping + +We can leverage the predefined configuration via _grains_ and _nodegroups_. + +### Examples using nodegroups ### + +- Execute command (for example, __whoami__) on core nodes: + + sudo salt -N core cmd.run whoami + +- Execute script located in S3 on task nodes: + + sudo salt -N task cmd.script s3://bucket/command + +- Copy file from salt master to every EMR slave node (core, task): + + sudo cp /path/to/myfile /srv/salt/ + sudo salt -N slave cp.get_file salt://myfile /path/to/myfile makedirs=True + +### Examples using grains ### + +- Execute script /srv/salt/myscript from master on all nodes in instance group ig-FFFFFFFFFFFF: + + sudo salt -G 'emr:instance_group_id:ig-FFFFFFFFFFFF' cmd.script salt://myscript + +- Check status of the nodemanager service on every c3.2xlarge: + + sudo salt -G 'instance_type:c3.2xlarge' service.status hadoop-yarn-nodemanager + +- Examples useful in external or syndicated mode: + - Check uptime of every EMR master node on every cluster with release 4.7.2: + + sudo salt -C 'G@emr:version:4.7.2 and G@emr:instance_role:master' status.uptime + + - Execute script on all nodes of a particular cluster-id (managed by external SaltStack master): + + sudo salt -G 'emr:job_flow_id:j-FFFFFFFFFFFFF' cmd.run myscript + +## Grains and nodegroups provided by this Bootstrap action ## +Each instance has its grains, they are intended to be static (or semi-static) data that gives information about the underlying system. + + emr: + instance_group_id: ig-XXXXXXXXXXXXX + instance_group_name: Arbitrary name of the instance group (user given) + instance_role: master/core/task + cluster_name: Arbitrary name of the cluster (user given) + job_flow_id: j-FFFFFFFFFFFFF + type: ami (3.11 or less)/bigtop (4.0 onwards) + version: 3.11 or 4.7.2 or 5.0.0, etc + instance_type: c3.xlarge (or whatever) + instance_id: i-XXXXXXXX + +The nodegroups are defined based on grains rules: + + nodegroups: + core: 'G@emr:instance_role:Core' + master: 'G@emr:instance_role:Master' + task: 'G@emr:instance_role:Task' + slave: 'G@emr:instance_role:Core or G@emr:instance_role:Task' + + +## Brief introduction to SaltStack ## +[SaltStack](https://docs.saltstack.com/en/2015.5/) is an open source tool for automation and infrastructure management (such as Chef or Puppet). It started as a remote execution engine, it's based on ZeroMQ. + +What's the benefit of this? Among others: + +- Fast parallel remote command execution in every node of the cluster, or a selection of them. +- Scales much better to large number of nodes than SSH-based solutions. +- Easy way to change configurations on running EMR clusters. +- Possibility to manage several clusters from a central location. +- .. many more.. + +In SaltStack lingo, the master sends commands or configurations to the minions (slaves). This bootstrap action by default installs and configures the SaltStack master in the EMR master node and all the rest of the nodes get installed and configured as minions, and they autoregister with the master. + +Optionally, the master can also be registered as minion, so the commands could be run on the whole cluster. Alternatively, all the EMR nodes (master and the rest) can be minions and register to an external master (an EC2 instance for example). This enables control from that EC2 instance to several clusters. + +The bootstrap also configures some SaltStack [grains](https://docs.saltstack.com/en/2015.5/topics/targeting/grains.html) and [nodegroups](https://docs.saltstack.com/en/2015.5/topics/targeting/nodegroups.html). + + +## Tested releases ## +Tested on EMR AMI 3.11 and releases 4.7.X and 5.0.0. It should work on any 4.X, 5.X and probably on most 3.X. +## Latest revision ## +2016-09-14 +On latest EMR version (5.0.0) it's installing SaltStack version 2015.05.10. diff --git a/saltstack/salt-setup.sh b/saltstack/salt-setup.sh index 67903d3..0aa08b7 100755 --- a/saltstack/salt-setup.sh +++ b/saltstack/salt-setup.sh @@ -1,101 +1,142 @@ #!/bin/bash -e +# Based on /~https://github.com/awslabs/emr-bootstrap-actions/tree/master/saltstack +VERSION="20160904" +SELF=$(basename $0 .sh) -infodir="/mnt/var/lib/info" -shutdown="/mnt/var/lib/instance-controller/public/shutdown-actions" -facility="LOG_LOCAL0" -loglevel="info" - -VERSION="0.0.1" -SELF=`basename $0 .sh` -print_version() -{ - echo $SELF version:$VERSION +print_version() { + echo $SELF version:$VERSION } -print_usage() -{ - cat << EOF +print_usage() { + cat << EOF + +USAGE: ${0} -mode [-options] [-flags] + + MODES: + -I (DEFAULT) Independent mode. EMR Master node is the salt + master, slave nodes (task, core) are minions. If + no argument is used, this mode will be deployed. -USAGE: ${0} -options + -E External master mode. Register all EMR nodes as + minions on the external master specified -OPTIONS DESCRIPTION - - OPTIONAL - -l See docs.saltstack.com for valid levels. Default: + -S Syndicated mode. Like -I but also syndicates EMR + master node to the specified external master + + + OPTIONS + -l See docs.saltstack.com for valid levels. Default: info - -f See man syslog.h for valid facilities. Default: + -f See man syslog.h for valid facilities. Default: LOG_LOCAL0 FLAGS - -D Enable debug mode - -V Print version - -h Print usage - + -d Enable debug mode + -V Print version + -h Print usage + EOF } -while getopts ":f:l:DVh" optname; do - case $optname in - D) set -x ;; - f) facility="$OPTARG" ;; - l) loglevel="$OPTARG" ;; - h) print_usage - exit 0 ;; - V) print_version - exit 0 ;; - ?) if [ "$optname" == ":" ]; then - echo "Option ${OPTARG} requires a parameter" 1>&2 - else - echo "Option ${OPTARG} unkown" 1>&2 - fi - exit 1;; - esac -done +write_grain_file() { + grain_file=/etc/salt/grains + instanceType=$(curl -s http://169.254.169.254/latest/meta-data/instance-type) + instanceId=$(curl -s http://169.254.169.254/latest/meta-data/instance-id) + + instanceGroupId=$(jq -r '.instanceGroupId' $infodir/instance.json) + jobFlowId=$(jq -r '.jobFlowId' $infodir/job-flow.json) + version=$(grep releaseLabel $infodir/job-flow-state.txt | cut -d '"' -f 2) + cluster_name=$(aws emr describe-cluster --cluster-id $jobFlowId --output text --query 'Cluster.Name') -if grep '"isMaster": true' $infodir/instance.json > /dev/null; then - ## Install deps - sudo yum --enablerepo=epel -y install salt-master - - ## Create conf and shared dirs - sudo mkdir -p -m750 /etc/salt/master.d /srv/salt - - ## Conf file - cat </dev/null -log_level: $loglevel -log_file: udp://localhost:514/$facility + if [[ -z "$version" ]]; then + version=$(grep amiVersion $infodir/job-flow-state.txt | cut -d '"' -f 2) + emrType=ami + else + version=${version#emr-} + emrType=bigtop + fi + + instanceRole=$(jq -r '.instanceGroups[] | select(.instanceGroupId | + contains("'$instanceGroupId'")).instanceRole' $infodir/job-flow.json) + instanceGroupName=$(jq -r '.instanceGroups[] | select(.instanceGroupId | + contains("'$instanceGroupId'")).instanceGroupName' $infodir/job-flow.json) + cat </dev/null +emr: + version: $version + type: $emrType + job_flow_id: $jobFlowId + cluster_name: ${cluster_name} + instance_group_id: $instanceGroupId + instance_group_name: $instanceGroupName + instance_role: $instanceRole +instance_id: $instanceId +instance_type: $instanceType +EOF +} + +install_configure_master() { + sudo yum --enablerepo=epel -y install salt-master + sudo mkdir -p -m750 /etc/salt/master.d + + if [[ -d "/mnt/var" ]]; then + sudo mkdir -p -m750 /mnt/salt + sudo chown root:hadoop /mnt/salt + sudo ln -s /mnt/salt $3 + else + sudo mkdir -p m770 $3 + sudo chown root:hadoop $3 + fi + ## Conf file + cat </dev/null +log_level: $1 +log_file: file:///dev/log/$2 auto_accept : True file_recv: True file_roots: base: - - /srv/salt + - $3 + +nodegroups: + core: 'G@emr:instance_role:Core' + master: 'G@emr:instance_role:Master' + task: 'G@emr:instance_role:Task' + slave: 'G@emr:instance_role:Core or G@emr:instance_role:Task' EOF - - ## Start service - sudo service salt-master start -else - ## Get master hostname - master=`awk -F':' '/masterPrivateDnsName/ { - gsub(/(^ *|"|,)/,"",$2) - print $2 - }' $infodir/job-flow.json` - - ## Install deps - sudo yum --enablerepo=epel -y install salt-minion - - ## Create conf dirs - sudo mkdir -p -m750 /etc/salt/minion.d - - ## Conf file - cat </dev/null -log_level: $loglevel -log_file: udp://localhost:514/$facility -open_mode: True -master: $master + sudo service salt-master start + sudo chkconfig --add salt-master +} + +install_configure_syndic() { + sudo yum --enablerepo=epel -y install salt-syndic + cat </dev/null +syndic_master: $1 EOF - - ## Deregister on terminate - cat <<"EOF" | sudo tee /etc/init.d/salt-revoke 1>/dev/null + sudo service salt-syndic start + sudo chkconfig --add salt-syndic +} + +install_configure_minion() { + local user=$4 + sudo yum --enablerepo=epel -y install salt-minion + sudo mkdir -p -m750 /etc/salt/minion.d + sudo chown -R $user /etc/salt + ## Conf file +# open_mode: True + cat </dev/null +log_level: $1 +log_file: file:///dev/log/$2 +master: $3 +user: $user +EOF + ## Grains with static EMR info + write_grain_file + sudo service salt-minion start + sudo chkconfig --add salt-minion +} + +write_salt-revoke_service() { + cat <<"EOF" | sudo tee /etc/init.d/salt-revoke 1>/dev/null #!/bin/bash ### BEGIN INIT INFO @@ -115,46 +156,107 @@ PROG="salt-revoke" LOCKFILE="/var/lock/subsys/$PROG" start() { - echo -n "Enable $PROG" - if touch $LOCKFILE > /dev/null 2>&1; then - success - else - RETVAL=1 - failure - fi - echo + echo -n "Enable $PROG" + if touch $LOCKFILE > /dev/null 2>&1; then + success + else + RETVAL=1 + failure + fi + echo } stop() { - echo -n "Revoking minion auth key" - if salt-call saltutil.revoke_auth > /dev/null 2>&1; then - rm -f $LOCKFILE > /dev/null 2>&1 - success - else - RETVAL=1 - failure - fi - echo + echo -n "Revoking minion auth key" + if salt-call saltutil.revoke_auth > /dev/null 2>&1; then + rm -f $LOCKFILE > /dev/null 2>&1 + success + else + RETVAL=1 + failure + fi + echo } case "$1" in - start) - start - ;; - stop) - stop - ;; - *) - echo $"Usage: $0 {start|stop}" - exit 2 + start) + start + ;; + stop) + stop + ;; + *) + echo $"Usage: $0 {start|stop}" + exit 2 esac exit $RETVAL EOF +} + +# Defaults: +infodir="/mnt/var/lib/info" +facility="LOG_LOCAL0" +loglevel="warn" +basedir=/srv/salt +minion_on_master=1 +minionuser="root" +syndic=0 +external=0 + +while getopts ":f:l:E:S:dVIh" optname; do + case $optname in + d) set -x ;; + f) facility="$OPTARG" ;; + l) loglevel="$OPTARG" ;; + [eE]) saltmaster="$OPTARG"; external=1 ;; + [sS]) saltmaster="$OPTARG"; syndic=1 ;; + [iI]) : ;; + # Removed for the moment due to + # /~https://github.com/saltstack/salt/issues/22055 + # u) minionuser="$OPTARG" ;; + h) print_usage + exit 0 ;; + V) print_version + exit 0 ;; + ?) if [[ "$optname" == ":" ]]; then + echo "Option ${OPTARG} requires a parameter" 1>&2 + else + echo "Option ${OPTARG} unkown" 1>&2 + fi + exit 1;; + esac +done + +if grep -q '"isMaster": true' $infodir/instance.json && (( ! external )); then + install_configure_master $loglevel $facility $basedir + + if (( syndic )); then + install_configure_syndic $saltmaster + + ## Deregister on terminate + write_salt-revoke_service + sudo chmod +x /etc/init.d/salt-revoke sudo chkconfig --add salt-revoke - - ## Start services - sudo service salt-minion start sudo service salt-revoke start + fi + if (( minion_on_master )); then + install_configure_minion $loglevel $facility 127.0.0.1 $minionuser + fi + +else + if (( ! external )); then + ## Get master hostname + saltmaster=$(grep masterPrivateDnsName $infodir/job-flow.json | cut -d '"' -f 4) + fi + ## Grains with static EMR info + install_configure_minion $loglevel $facility $saltmaster $minionuser + + ## Deregister on terminate + write_salt-revoke_service + + sudo chmod +x /etc/init.d/salt-revoke + sudo chkconfig --add salt-revoke + sudo service salt-revoke start fi