-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathpull.sh
executable file
·141 lines (121 loc) · 5.04 KB
/
pull.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
#!/usr/bin/env bash
# Filename: pull.sh
#
# Copyright 2019-2022 Calvin Ardi
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# easiest way automate this is to install this in your crontab:
#
# run five minutes after midnight, every day
# 5 0 * * * /path/to/aws-spot-price-history/pull.sh >> /path/to/aws-spot-price-history/log.txt
# to be crontab compatible
cd $(dirname $0)
export JAVA_HOME=/usr/lib/jvm/java
# if you don't have the ec2-api-tools already, run the following in
# the directory this script is in:
#
# curl -O http://s3.amazonaws.com/ec2-downloads/ec2-api-tools.zip
# unzip -q ec2-api-tools.zip
# ln -s ec2-api-tools-*/ ec2-api-tools
#
export EC2_HOME=$(pwd)/ec2-api-tools
# replace keys with your own, or drop them in the following files
export AWS_ACCESS_KEY=$(cat aws_access.private)
export AWS_SECRET_KEY=$(cat aws_secret.private)
# where do you want the data?
export DATA_DIR=data
# to get the available regions:
#
# ec2-describe-regions | awk {'print $2'} | tr '\n' ' '
#
# `ec2-describe-regions` does take some time, so it's easier to
# update the variable when it has changed
REGIONS=( eu-north-1 ap-south-1 eu-west-3 eu-west-2 eu-west-1 ap-northeast-3 ap-northeast-2
ap-northeast-1 sa-east-1 ca-central-1 ap-southeast-1 ap-southeast-2
eu-central-1 us-east-1 us-east-2 us-west-1 us-west-2 )
TS="date -Iseconds"
echo "`$TS` starting data pull on $(hostname -f)"
# check if data directory exists
if [ -d "$DATA_DIR" ]; then
# $DATA_DIR exists and is a directory (or a symlink to one), cd into it
cd $DATA_DIR
elif [ ! -e "$DATA_DIR" ]; then
# nothing named $DATA_DIR exists, mkdir then cd into it
mkdir $DATA_DIR
cd $DATA_DIR
else
# something that isn't a directory named "$DATA_DIR" exists, exit
echo "`$TS` $DATA_DIR exists and isn't a directory, exiting..."
exit 1
fi
# we could parallelize this loop, but it's best to naturally throttle
# so we don't unduly burden their servers
for region in "${REGIONS[@]}"; do
echo "`$TS` $region starting"
# do we already have data?
DATA_NEWEST="data.$region.newest"
if [ -e $DATA_NEWEST ]; then
# TODO check if symlink points to a compressed filename and handle appropriately
DATA_NEWEST_RESOLVED=$(readlink ${DATA_NEWEST})
if [ "${DATA_NEWEST_RESOLVED: -3}" == ".xz" ]; then
# file is compressed
# grab the most recent entry timestamp (e.g., 2017-02-02T11:45:34-0800)
NEWEST_TS=$(xzcat ${DATA_NEWEST} | head -20 | awk -F'\t' '{print $3}' | sort -r | head -1)
else
# file is not compressed
# grab the most recent entry timestamp (e.g., 2017-02-02T11:45:34-0800)
NEWEST_TS=$(head -20 ${DATA_NEWEST} | awk -F'\t' '{print $3}' | sort -r | head -1)
fi
# TODO check if timestamp is well-formatted
# sometimes datafile is empty
if [ -z "$NEWEST_TS" ]; then
echo "`$TS` $region existing data found, but most recent timestamp is empty? continuing as if no existing data found"
unset NEWEST_TS
else
echo "`$TS` $region existing data found: most recent timestamp is $NEWEST_TS"
fi
else
unset NEWEST_TS
fi
if [ -z "$NEWEST_TS" ]; then
echo "`$TS` $region no existing data found, starting from the beginning (otherwise symlink 'data.$region.newest' to the most recent file)"
echo "`$TS` $region note: the initial data pull can take a while (~1 hour)"
fi
# we can only grab the last 90 days of data: if the most recent timestamp is
# more than 90 days ago then we're essentially starting from scratch.
FN_TEMP=data.$region.$(date -Iseconds).temp
echo "`$TS` $region starting data refresh"
if [ ! -z "$NEWEST_TS" ]; then
# only get the diff, but there will be some overlap regardless
$EC2_HOME/bin/ec2-describe-spot-price-history \
--region $region \
--start-time $NEWEST_TS \
> $FN_TEMP
else
$EC2_HOME/bin/ec2-describe-spot-price-history \
--region $region \
> $FN_TEMP
fi
# if the data refresh is finished, then move the files around and update
# symlinks.
FN=data.$region.$(date -Iseconds)
if [ $? -eq 0 ]; then
mv $FN_TEMP $FN # filename (e.g., data.us-east-1.2017-01-01T01:01:01-0800)
ln -sfn $FN data.$region.newest # update symlink
NEWEST_TS=$(head -20 $DATA_NEWEST | awk -F'\t' '{print $3}' | sort -r | head -1)
echo "`$TS` $region finished. most recent entry is $NEWEST_TS."
else
echo "`$TS` $region something went wrong."
fi
done
exit 0