-
Notifications
You must be signed in to change notification settings - Fork 6
146 lines (131 loc) · 5.74 KB
/
omicron_variants.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
# name of workflow that will be displayed on the actions page
name: Call Omicron variants
# execute workflow only when these files are modified
on:
push:
paths:
- '.github/workflows/omicron_variants.yml'
workflow_dispatch:
# a list of the jobs that run as part of the workflow
jobs:
call_variant:
runs-on: ubuntu-latest
# the type of runner to run the given job
# container: davetang/build:1.1
# a list of the steps that will run as part of the job
steps:
- run: echo "The job was automatically triggered by a ${{ github.event_name }} event."
- run: echo "This job is now running on a ${{ runner.os }} server hosted by GitHub!"
- run: echo "The name of your branch is ${{ github.ref }} and your repository is ${{ github.repository }}."
- name: Check out repository code
uses: actions/checkout@v2
- run: echo "The ${{ github.repository }} repository has been cloned to the runner."
- run: echo "The workflow is now ready to test your code on the runner."
- name: Prepare environment
run: |
mkdir -p tools/bin
mkdir src
echo "$GITHUB_WORKSPACE/tools/bin:$GITHUB_WORKSPACE/src/edirect:$GITHUB_WORKSPACE/src/sratoolkit.2.11.3-ubuntu64/bin/" >> $GITHUB_PATH
cp -r .ncbi ~
- name: Compile tools
run: |
cd src
wget --quiet /~https://github.com/lh3/bwa/archive/refs/tags/v0.7.17.tar.gz
tar -xzf v0.7.17.tar.gz
cd bwa-0.7.17 && make && mv bwa $GITHUB_WORKSPACE/tools/bin
cd ..
ver=1.14
dir=$GITHUB_WORKSPACE/tools
for tool in htslib bcftools samtools; do
url=/~https://github.com/samtools/${tool}/releases/download/${ver}/${tool}-${ver}.tar.bz2
wget --quiet ${url}
tar xjf ${tool}-${ver}.tar.bz2
cd ${tool}-${ver}
./configure --prefix=${dir}
make && make install
cd ..
done
- name: Install SRA Toolkit and datasets
run: |
cd src
wget --quiet https://ftp-trace.ncbi.nlm.nih.gov/sra/sdk/2.11.3/sratoolkit.2.11.3-ubuntu64.tar.gz
tar -xzf sratoolkit.2.11.3-ubuntu64.tar.gz
wget --quiet https://ftp.ncbi.nlm.nih.gov/pub/datasets/command-line/LATEST/linux-amd64/datasets
chmod 755 datasets && mv datasets $GITHUB_WORKSPACE/tools/bin
# - name: SnpEff
# run: |
# cd src
# wget --quiet https://snpeff.blob.core.windows.net/versions/snpEff_latest_core.zip
# unzip snpEff_latest_core.zip
# cd snpEff
# java -jar snpEff.jar download NC_045512.2
- name: Install Entrez Direct
run: |
cd src
url=ftp://ftp.ncbi.nlm.nih.gov/entrez/entrezdirect
wget --quiet ${url}/edirect.tar.gz
tar -xzf edirect.tar.gz
cd edirect
plt=Linux
for exc in xtract rchive transmute; do
wget --quiet ${url}/${exc}.${plt}.gz
gunzip ${exc}.${plt}.gz
chmod 755 ${exc}.${plt}
done
- name: Download data
run: |
mkdir fasta && cd fasta
datasets download genome accession GCF_009858895.2 --filename GCF_009858895.2.zip --include-gbff --include-gtf
unzip GCF_009858895.2.zip
ln -s ncbi_dataset/data/GCF_009858895.2/GCF_009858895.2_ASM985889v3_genomic.fna ref.fa
bwa index ref.fa
cd ..
project=PRJNA784038
esearch -db sra -query ${project} | efetch -format runinfo > runinfo.csv
for acc in $(cat runinfo.csv | grep "ILLUMINA" | cut -f1 -d','); do
fasterq-dump -p --outdir fastq ${acc}
done
- name: Align
run: |
mkdir bam && cd bam
ref=$GITHUB_WORKSPACE/fasta/ref.fa
thread=2
for acc in $(cat $GITHUB_WORKSPACE/runinfo.csv | grep "ILLUMINA" | cut -f1 -d','); do
platform=$(cat $GITHUB_WORKSPACE/runinfo.csv | grep ${acc} | cut -f19 -d',')
sample=$(cat $GITHUB_WORKSPACE/runinfo.csv | grep ${acc} | cut -f25 -d',')
if [[ -e $GITHUB_WORKSPACE/fastq/${acc}_1.fastq && $GITHUB_WORKSPACE/fastq/${acc}_2.fastq ]]; then
bwa mem \
-t ${thread} \
-R "@RG\tID:${acc}\tSM:${sample}\tPL:${platform}" \
${ref} \
$GITHUB_WORKSPACE/fastq/${acc}_1.fastq \
$GITHUB_WORKSPACE/fastq/${acc}_2.fastq |
samtools sort -@ ${thread} -O BAM |\
tee ${acc}.bam |\
samtools index - ${acc}.bam.bai
fi
rm $GITHUB_WORKSPACE/fastq/${acc}_1.fastq $GITHUB_WORKSPACE/fastq/${acc}_2.fastq
done
- name: Call variants
run: |
mkdir vcf && cd vcf
ref=$GITHUB_WORKSPACE/fasta/ref.fa
for bam in $(ls $GITHUB_WORKSPACE/bam/*.bam); do
base=$(basename ${bam} .bam)
bcftools mpileup -d 10000 -O v -f ${ref} ${bam} | bcftools call -mv -O v -o ${base}.vcf
bgzip ${base}.vcf
tabix -p vcf ${base}.vcf.gz
done
bcftools merge -o PRJNA784038_illumina.vcf -O v SRR*.vcf.gz
# bcftools norm PRJNA784038_illumina.vcf -o PRJNA784038_illumina.norm.vcf
bgzip PRJNA784038_illumina.vcf
tabix -p vcf PRJNA784038_illumina.vcf.gz
cp PRJNA784038_illumina.vcf.gz* ../result
- name: Add and commit variants
run: |
git config --global user.name 'GitHub Actions'
git config --global user.email 'davetingpongtang@gmail.com'
git add result/PRJNA784038_illumina.vcf*
git commit -m "Add variant calls"
git push origin master
- run: echo "This job's status is ${{ job.status }}."