-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathrun.sh
360 lines (339 loc) · 21.6 KB
/
run.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
#!/usr/bin/env bash
#
# run.sh - perform particle picking using iterative ensemble learning
# author: Christopher JF Cameron
#
# set filepath to REPIC install src/
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
REPIC=$( dirname ${SCRIPT_DIR} )
export REPIC_UTILS=${REPIC}/utils
# parse command line arguments
IN_DIR=${1}
[ ! -d ${IN_DIR} ] && echo "Error - input directory '${IN_DIR}' does not exist."
IN_DIR=$( cd ${IN_DIR} && pwd ) # set absolute path
EMPIAR_ID=$( basename ${IN_DIR} )
# number of iterations to perform (+1 for initial round w pre-trained models)
NUM_ROUNDS=${2}
export REPIC_BOX_SIZE=${3}
export REPIC_NUM_PARTICLES=${4}
LABEL=${5}
export REPIC_TRAIN_MRC=${IN_DIR}/iterative_particle_picking/train/${LABEL}
if [ "auto" = ${6} ]; then USE_MANUAL_LABELS=false; else USE_MANUAL_LABELS=true;fi
export SAMPLE_PROB=${7}
if [[ "${8}" == 1 ]]; then GET_SCORE=true; else GET_SCORE=false; fi
export REPIC_VAL_MRC=${IN_DIR}/iterative_particle_picking/val/
export REPIC_TEST_MRC=${IN_DIR}/iterative_particle_picking/test
REPIC_COORD=${IN_DIR}/iterative_particle_picking
export CRYOLO_ENV=${9}
export CRYOLO_MODEL=${10}
export DEEP_ENV=${11}
export DEEP_DIR=${12}
export DEEP_MODEL=${13}
export TOPAZ_ENV=${14}
if [ "None" = ${15} ]; then :; else export TOPAZ_MODEL=${15}; fi
export TOPAZ_SCALE=${16}
export TOPAZ_PARTICLE_RAD=${17}
# CrYOLO filtered micrograph directory
export CRYOLO_FILTERED_DIR=${IN_DIR}/iterative_particle_picking/cryolo_filtered_tmp
###
# step 1 - create train/val/test data sets based on micrograph defocus values
###
# clean up iterative_particle_picking/
rm -rf ${IN_DIR}/iterative_particle_picking/{round_*,train,val,test,cryolo_filtered_tmp,iteration_plots,preprocess_topaz*}
mkdir -p ${IN_DIR}/iterative_particle_picking
echo "Building train/val/test subsets ... "
python ${REPIC_UTILS}/build_subsets.py ${IN_DIR}/data/defocus_${EMPIAR_ID}.txt ${IN_DIR}/data ${IN_DIR}/data ${IN_DIR}/iterative_particle_picking --train_set ${LABEL}
if [ $? != 0 ]; then # end script prematurely if error encountered in by repic build_subsets
exit
fi
# multiple train sets created when possible (depends on number of micrographs)
echo "Downsampling micrographs for input to Topaz ... "
export REPIC_OUT_DIR=${IN_DIR}/iterative_particle_picking/
bash ${REPIC}/iterative_particle_picking/preprocess_topaz.sh &> ${REPIC_OUT_DIR}/preprocess_topaz.log
SUB_DIR=${IN_DIR}/iterative_particle_picking/round_0
mkdir -p ${SUB_DIR}
if ! ${USE_MANUAL_LABELS}; then
##
# step 2 - apply pre-trained models to train and test sets
##
echo -e """\n--- Identifying particles in '${LABEL}' ---
Applying pre-trained models to train, val, and test micrographs ...
--- Round: 0 ---
\tSPHIRE-crYOLO ... "
# SPHIRE-crYOLO
# train set prediction
export REPIC_MRC_DIR=${REPIC_TRAIN_MRC}
export REPIC_OUT_DIR=${SUB_DIR}/${LABEL}/cryolo
mkdir -p ${REPIC_OUT_DIR}/BOX/{train,val,test}
rm -rf ${REPIC_OUT_DIR}/{CBOX,STAR}/*
bash ${REPIC}/iterative_particle_picking/run_cryolo.sh &> ${REPIC_OUT_DIR}/iter_train.log
python ${REPIC_UTILS}/coord_converter.py ${REPIC_OUT_DIR}/CBOX/*.cbox ${REPIC_OUT_DIR}/BOX/train/ -f cbox -t box -b ${REPIC_BOX_SIZE} --round 0 --force &> ${REPIC_OUT_DIR}/convert_train.log
# val set prediction
export REPIC_MRC_DIR=${REPIC_VAL_MRC}
rm -rf ${REPIC_OUT_DIR}/{CBOX,STAR}/*
bash ${REPIC}/iterative_particle_picking/run_cryolo.sh &> ${REPIC_OUT_DIR}/iter_val.log
python ${REPIC_UTILS}/coord_converter.py ${REPIC_OUT_DIR}/CBOX/*.cbox ${REPIC_OUT_DIR}/BOX/val/ -f cbox -t box -b ${REPIC_BOX_SIZE} --round 0 --force &> ${REPIC_OUT_DIR}/convert_val.log
# test set prediction
export REPIC_MRC_DIR=${REPIC_TEST_MRC}
rm -rf ${REPIC_OUT_DIR}/{CBOX,STAR}/*
bash ${REPIC}/iterative_particle_picking/run_cryolo.sh &> ${REPIC_OUT_DIR}/iter_test.log
python ${REPIC_UTILS}/coord_converter.py ${REPIC_OUT_DIR}/CBOX/*.cbox ${REPIC_OUT_DIR}/BOX/test/ -f cbox -t box -b ${REPIC_BOX_SIZE} --round 0 --force &> ${REPIC_OUT_DIR}/convert_test.log
if ${GET_SCORE}; then
python ${REPIC_UTILS}/score_detections.py -g ${REPIC_COORD}/train/${LABEL}/*.box -p ${REPIC_OUT_DIR}/BOX/train/*.box -c 0.3 &> ${REPIC_OUT_DIR}/score_train.log
python ${REPIC_UTILS}/score_detections.py -g ${REPIC_COORD}/val/*.box -p ${REPIC_OUT_DIR}/BOX/val/*.box -c 0.3 &> ${REPIC_OUT_DIR}/score_val.log
python ${REPIC_UTILS}/score_detections.py -g ${REPIC_COORD}/test/*.box -p ${REPIC_OUT_DIR}/BOX/test/*.box -c 0.3 &> ${REPIC_OUT_DIR}/score_test.log
fi
echo -e "\tDeepPicker ... "
# DeepPicker
export REPIC_MRC_DIR=${REPIC_TRAIN_MRC}
export REPIC_OUT_DIR=${SUB_DIR}/${LABEL}/deep
mkdir -p ${REPIC_OUT_DIR}/BOX/{train,val,test}
# train set prediction
rm -rf ${REPIC_OUT_DIR}/STAR/*.star
bash ${REPIC}/iterative_particle_picking/run_deep.sh &> ${REPIC_OUT_DIR}/iter_train.log
python ${REPIC_UTILS}/coord_converter.py ${REPIC_OUT_DIR}/STAR/*.star ${REPIC_OUT_DIR}/BOX/train/ -f star -t box -b ${REPIC_BOX_SIZE} --round 0 --force &> ${REPIC_OUT_DIR}/convert_train.log
# val set prediction
export REPIC_MRC_DIR=${REPIC_VAL_MRC}
rm -rf ${REPIC_OUT_DIR}/STAR/*.star
bash ${REPIC}/iterative_particle_picking/run_deep.sh &> ${REPIC_OUT_DIR}/iter_val.log
python ${REPIC_UTILS}/coord_converter.py ${REPIC_OUT_DIR}/STAR/*.star ${REPIC_OUT_DIR}/BOX/val/ -f star -t box -b ${REPIC_BOX_SIZE} --round 0 --force &> ${REPIC_OUT_DIR}/convert_val.log
# test set prediction
export REPIC_MRC_DIR=${REPIC_TEST_MRC}
rm -rf ${REPIC_OUT_DIR}/STAR/*.star
bash ${REPIC}/iterative_particle_picking/run_deep.sh &> ${REPIC_OUT_DIR}/iter_test.log
python ${REPIC_UTILS}/coord_converter.py ${REPIC_OUT_DIR}/STAR/*.star ${REPIC_OUT_DIR}/BOX/test/ -f star -t box -b ${REPIC_BOX_SIZE} --round 0 --force &> ${REPIC_OUT_DIR}/convert_test.log
if ${GET_SCORE}; then
python ${REPIC_UTILS}/score_detections.py -g ${REPIC_COORD}/train/${LABEL}/*.box -p ${REPIC_OUT_DIR}/BOX/train/*.box -c 0.5 &> ${REPIC_OUT_DIR}/score_train.log
python ${REPIC_UTILS}/score_detections.py -g ${REPIC_COORD}/val/*.box -p ${REPIC_OUT_DIR}/BOX/val/*.box -c 0.5 &> ${REPIC_OUT_DIR}/score_val.log
python ${REPIC_UTILS}/score_detections.py -g ${REPIC_COORD}/test/*.box -p ${REPIC_OUT_DIR}/BOX/test/*.box -c 0.5 &> ${REPIC_OUT_DIR}/score_test.log
fi
echo -e "\tTopaz ... "
# Topaz
export REPIC_MRC_DIR=${REPIC_TRAIN_MRC}
export REPIC_OUT_DIR=${SUB_DIR}/${LABEL}/topaz
mkdir -p ${REPIC_OUT_DIR}/BOX/{train,val,test}
# train set prediction
rm -rf ${REPIC_OUT_DIR}/downsampled_mrc ${REPIC_OUT_DIR}/predicted_particles_all_upsampled.txt
bash ${REPIC}/iterative_particle_picking/run_topaz.sh &> ${REPIC_OUT_DIR}/iter_train.log
python ${REPIC_UTILS}/coord_converter.py ${REPIC_OUT_DIR}/predicted_particles_all_upsampled.txt ${REPIC_OUT_DIR}/BOX/train -f tsv -t box -b ${REPIC_BOX_SIZE} -c 1 2 none none 3 0 --header --multi_out --round 0 --force &> ${REPIC_OUT_DIR}/convert_train.log
# val set prediction
export REPIC_MRC_DIR=${REPIC_VAL_MRC}
rm -rf ${REPIC_OUT_DIR}/downsampled_mrc ${REPIC_OUT_DIR}/predicted_particles_all_upsampled.txt
bash ${REPIC}/iterative_particle_picking/run_topaz.sh &> ${REPIC_OUT_DIR}/iter_val.log
python ${REPIC_UTILS}/coord_converter.py ${REPIC_OUT_DIR}/predicted_particles_all_upsampled.txt ${REPIC_OUT_DIR}/BOX/val -f tsv -t box -b ${REPIC_BOX_SIZE} -c 1 2 none none 3 0 --header --multi_out --round 0 --force &> ${REPIC_OUT_DIR}/convert_val.log
# test set prediction
export REPIC_MRC_DIR=${REPIC_TEST_MRC}
rm -rf ${REPIC_OUT_DIR}/downsampled_mrc ${REPIC_OUT_DIR}/predicted_particles_all_upsampled.txt
bash ${REPIC}/iterative_particle_picking/run_topaz.sh &> ${REPIC_OUT_DIR}/iter_test.log
python ${REPIC_UTILS}/coord_converter.py ${REPIC_OUT_DIR}/predicted_particles_all_upsampled.txt ${REPIC_OUT_DIR}/BOX/test -f tsv -t box -b ${REPIC_BOX_SIZE} -c 1 2 none none 3 0 --header --multi_out --round 0 --force &> ${REPIC_OUT_DIR}/convert_test.log
if ${GET_SCORE}; then
# particle probability of >=0.5 is equal to log-likelihood ratio of >=0.0. See (line 17): /~https://github.com/tbepler/topaz/blob/master/tutorial/02_walkthrough.ipynb
python ${REPIC_UTILS}/score_detections.py -g ${REPIC_COORD}/train/${LABEL}/*.box -p ${REPIC_OUT_DIR}/BOX/train/*.box -c 0. &> ${REPIC_OUT_DIR}/score_train.log
python ${REPIC_UTILS}/score_detections.py -g ${REPIC_COORD}/val/*.box -p ${REPIC_OUT_DIR}/BOX/val/*.box -c 0. &> ${REPIC_OUT_DIR}/score_val.log
python ${REPIC_UTILS}/score_detections.py -g ${REPIC_COORD}/test/*.box -p ${REPIC_OUT_DIR}/BOX/test/*.box -c 0. &> ${REPIC_OUT_DIR}/score_test.log
fi
echo -e "\tBuilding consensus ... "
TMP=${SUB_DIR}/${LABEL}/tmp
rm -rf ${TMP}
REPIC_OUT_DIR=${SUB_DIR}/${LABEL}/clique_files
mkdir -p ${REPIC_OUT_DIR}
# train consensus
mkdir -p ${TMP}/{crYOLO,deepPicker,topaz}
cp -s ${SUB_DIR}/${LABEL}/cryolo/BOX/train/*.box ${TMP}/crYOLO/
cp -s ${SUB_DIR}/${LABEL}/deep/BOX/train/*.box ${TMP}/deepPicker/
cp -s ${SUB_DIR}/${LABEL}/topaz/BOX/train/*.box ${TMP}/topaz/
repic get_cliques ${TMP} ${REPIC_OUT_DIR}/train ${REPIC_BOX_SIZE} &> ${REPIC_OUT_DIR}/clique_train.log
repic run_ilp ${REPIC_OUT_DIR}/train ${REPIC_BOX_SIZE} --num_particles ${REPIC_NUM_PARTICLES} &> ${REPIC_OUT_DIR}/ilp_train.log
rm -rf ${TMP}
# val consensus
mkdir -p ${TMP}/{crYOLO,deepPicker,topaz}
cp -s ${SUB_DIR}/${LABEL}/cryolo/BOX/val/*.box ${TMP}/crYOLO/
cp -s ${SUB_DIR}/${LABEL}/deep/BOX/val/*.box ${TMP}/deepPicker/
cp -s ${SUB_DIR}/${LABEL}/topaz/BOX/val/*.box ${TMP}/topaz/
repic get_cliques ${TMP} ${REPIC_OUT_DIR}/val ${REPIC_BOX_SIZE} &> ${REPIC_OUT_DIR}/clique_val.log
repic run_ilp ${REPIC_OUT_DIR}/val ${REPIC_BOX_SIZE} --num_particles ${REPIC_NUM_PARTICLES} &> ${REPIC_OUT_DIR}/ilp_val.log
rm -rf ${TMP}
# test consensus
mkdir -p ${TMP}/{crYOLO,deepPicker,topaz}
cp -s ${SUB_DIR}/${LABEL}/cryolo/BOX/test/*.box ${TMP}/crYOLO/
cp -s ${SUB_DIR}/${LABEL}/deep/BOX/test/*.box ${TMP}/deepPicker/
cp -s ${SUB_DIR}/${LABEL}/topaz/BOX/test/*.box ${TMP}/topaz/
repic get_cliques ${TMP} ${REPIC_OUT_DIR}/test ${REPIC_BOX_SIZE} &> ${REPIC_OUT_DIR}/clique_test.log
repic run_ilp ${REPIC_OUT_DIR}/test ${REPIC_BOX_SIZE} --num_particles ${REPIC_NUM_PARTICLES} &> ${REPIC_OUT_DIR}/ilp_test.log
rm -rf ${TMP}
if ${GET_SCORE}; then
python ${REPIC_UTILS}/score_detections.py -g ${REPIC_COORD}/train/${LABEL}/*.box -p ${REPIC_OUT_DIR}/train/*.box &> ${REPIC_OUT_DIR}/score_train.log
# update Topaz pos-unlabeled mini-batch balancing with respect to training data
export TOPAZ_BALANCE=$(awk '(NR > 1){total += $NF}END{print total/(NR - 1)}' ${REPIC_OUT_DIR}/train/particle_set_comp.tsv)
python ${REPIC_UTILS}/score_detections.py -g ${REPIC_COORD}/val/*.box -p ${REPIC_OUT_DIR}/val/*.box &> ${REPIC_OUT_DIR}/score_val.log
python ${REPIC_UTILS}/score_detections.py -g ${REPIC_COORD}/test/*.box -p ${REPIC_OUT_DIR}/test/*.box &> ${REPIC_OUT_DIR}/score_test.log
fi
else
rm -rf ${SUB_DIR}/${LABEL}/manual/{train,val}
mkdir -p ${SUB_DIR}/${LABEL}/manual/{train,val}
# downsample and round (for Topaz) training labels
REPIC_OUT_DIR=${SUB_DIR}/${LABEL}/manual/train
touch ${REPIC_OUT_DIR}/tmp.box
while [ $(cat ${REPIC_OUT_DIR}/*.box | wc -l) -le 4 ]; do # ensure there are enough examples sampled
for file in ${REPIC_TRAIN_MRC}/*.box; do
base=$(basename ${file})
cat ${file} | awk -v prob=${SAMPLE_PROB} 'BEGIN {srand()} !/^$/ { if (rand() <= prob) print $0}' > ${REPIC_OUT_DIR}/${base}
perl -i -pe 's/(\d*\.\d*)/int($1+0.5)/ge' ${REPIC_OUT_DIR}/${base}
done
done
# remove empty files (leads to Topaz error)
find ${REPIC_OUT_DIR}/ -type f -empty -delete
# downsample and round (for Topaz) validation labels
REPIC_OUT_DIR=${SUB_DIR}/${LABEL}/manual/val
touch ${REPIC_OUT_DIR}/tmp.box
while [ $(cat ${REPIC_OUT_DIR}/*.box | wc -l) -le 4 ]; do
for file in ${REPIC_VAL_MRC}/*.box; do
base=$(basename ${file})
cat ${file} | awk -v prob=${SAMPLE_PROB} 'BEGIN {srand()} !/^$/ { if (rand() <= prob) print $0}' > ${REPIC_OUT_DIR}/${base}
perl -i -pe 's/(\d*\.\d*)/int($1+0.5)/ge' ${REPIC_OUT_DIR}/${base}
done
done
find ${REPIC_OUT_DIR}/ -type f -empty -delete
export DEEP_BATCH_SIZE=4
fi
# #
# step 3 - iteratively retrain algorithms using consenus particles as training labels
# #
i=0
while [ ${i} -lt ${NUM_ROUNDS} ]; do
# set input coordinate and output directories
COORD_DIR=${IN_DIR}/iterative_particle_picking/round_${i}
if ${USE_MANUAL_LABELS} && [ ${i} -eq 0 ]; then
echo -e "\nFitting models to downsampled manual train and val labels ... "
export REPIC_TRAIN_COORD=${COORD_DIR}/${LABEL}/manual/train/
export REPIC_VAL_COORD=${COORD_DIR}/${LABEL}/manual/val/
else
echo -e "\nFitting models to estimated train and val labels ... "
export REPIC_TRAIN_COORD=${COORD_DIR}/${LABEL}/clique_files/train/
export REPIC_VAL_COORD=${COORD_DIR}/${LABEL}/clique_files/val/
fi
# uncomment line below if continuing from iteration >1
# export TOPAZ_BALANCE=$(awk '(NR > 1){total += $NF}END{print total/(NR - 1)}' ${COORD_DIR}/${LABEL}/clique_files/train/particle_set_comp.tsv)
i=$(( $i + 1 ))
SUB_DIR=${IN_DIR}/iterative_particle_picking/round_${i}
echo "--- Round: ${i} ---"
echo -e "--- Identifying particles in '${LABEL}' ---\n\tSPHIRE-crYOLO ... "
rm -rf ${REPIC_TRAIN_COORD}/STAR ${REPIC_VAL_COORD}/STAR # precaution - to prevent CrYOLO from learning labels with 2x weight
# SPHIRE-crYOLO
# fit model
export REPIC_OUT_DIR=${SUB_DIR}/${LABEL}/cryolo
mkdir -p ${REPIC_OUT_DIR}/BOX/{train,val,test}
bash ${REPIC}/iterative_particle_picking/fit_cryolo.sh &> ${REPIC_OUT_DIR}/iter_fit.log
# train set prediction
export REPIC_MRC_DIR=${REPIC_TRAIN_MRC}
export CRYOLO_MODEL=${REPIC_OUT_DIR}/learned_weights.h5
rm -rf ${REPIC_OUT_DIR}/{CBOX,STAR}/*
bash ${REPIC}/iterative_particle_picking/run_cryolo.sh &> ${REPIC_OUT_DIR}/iter_train.log
python ${REPIC_UTILS}/coord_converter.py ${REPIC_OUT_DIR}/CBOX/*.cbox ${REPIC_OUT_DIR}/BOX/train/ -f cbox -t box -b ${REPIC_BOX_SIZE} --round 0 --force &> ${REPIC_OUT_DIR}/convert_train.log
# val set prediction
export REPIC_MRC_DIR=${REPIC_VAL_MRC}
rm -rf ${REPIC_OUT_DIR}/{CBOX,STAR}/*
bash ${REPIC}/iterative_particle_picking/run_cryolo.sh &> ${REPIC_OUT_DIR}/iter_val.log
python ${REPIC_UTILS}/coord_converter.py ${REPIC_OUT_DIR}/CBOX/*.cbox ${REPIC_OUT_DIR}/BOX/val/ -f cbox -t box -b ${REPIC_BOX_SIZE} --round 0 --force &> ${REPIC_OUT_DIR}/convert_val.log
# test set prediction
export REPIC_MRC_DIR=${REPIC_TEST_MRC}
rm -rf ${REPIC_OUT_DIR}/{CBOX,STAR}/*
bash ${REPIC}/iterative_particle_picking/run_cryolo.sh &> ${REPIC_OUT_DIR}/iter_test.log
python ${REPIC_UTILS}/coord_converter.py ${REPIC_OUT_DIR}/CBOX/*.cbox ${REPIC_OUT_DIR}/BOX/test/ -f cbox -t box -b ${REPIC_BOX_SIZE} --round 0 --force &> ${REPIC_OUT_DIR}/convert_test.log
if ${GET_SCORE}; then
python ${REPIC_UTILS}/score_detections.py -g ${REPIC_COORD}/train/${LABEL}/*.box -p ${REPIC_OUT_DIR}/BOX/train/*.box -c 0.3 &> ${REPIC_OUT_DIR}/score_train.log
python ${REPIC_UTILS}/score_detections.py -g ${REPIC_COORD}/val/*.box -p ${REPIC_OUT_DIR}/BOX/val/*.box -c 0.3 &> ${REPIC_OUT_DIR}/score_val.log
python ${REPIC_UTILS}/score_detections.py -g ${REPIC_COORD}/test/*.box -p ${REPIC_OUT_DIR}/BOX/test/*.box -c 0.3 &> ${REPIC_OUT_DIR}/score_test.log
fi
echo -e "\tDeepPicker ... "
# DeepPicker
# fit model
export REPIC_OUT_DIR=${SUB_DIR}/${LABEL}/deep
mkdir -p ${REPIC_OUT_DIR}/BOX/{train,val,test}
bash ${REPIC}/iterative_particle_picking/fit_deep.sh &> ${REPIC_OUT_DIR}/iter_fit.log
# # train set prediction
export REPIC_MRC_DIR=${REPIC_TRAIN_MRC}
export DEEP_MODEL=${REPIC_OUT_DIR}/model_demo_type3_refined
rm -rf ${REPIC_OUT_DIR}/STAR/*.star
bash ${REPIC}/iterative_particle_picking/run_deep.sh &> ${REPIC_OUT_DIR}/iter_train.log
python ${REPIC_UTILS}/coord_converter.py ${REPIC_OUT_DIR}/STAR/*.star ${REPIC_OUT_DIR}/BOX/train/ -f star -t box -b ${REPIC_BOX_SIZE} --round 0 --force &> ${REPIC_OUT_DIR}/convert_train.log
# val set prediction
export REPIC_MRC_DIR=${REPIC_VAL_MRC}
rm -rf ${REPIC_OUT_DIR}/STAR/*.star
bash ${REPIC}/iterative_particle_picking/run_deep.sh &> ${REPIC_OUT_DIR}/iter_val.log
python ${REPIC_UTILS}/coord_converter.py ${REPIC_OUT_DIR}/STAR/*.star ${REPIC_OUT_DIR}/BOX/val/ -f star -t box -b ${REPIC_BOX_SIZE} --round 0 --force &> ${REPIC_OUT_DIR}/convert_val.log
# test set prediction
export REPIC_MRC_DIR=${REPIC_TEST_MRC}
rm -rf ${REPIC_OUT_DIR}/STAR/*.star
bash ${REPIC}/iterative_particle_picking/run_deep.sh &> ${REPIC_OUT_DIR}/iter_test.log
python ${REPIC_UTILS}/coord_converter.py ${REPIC_OUT_DIR}/STAR/*.star ${REPIC_OUT_DIR}/BOX/test/ -f star -t box -b ${REPIC_BOX_SIZE} --round 0 --force &> ${REPIC_OUT_DIR}/convert_test.log
if ${GET_SCORE}; then
python ${REPIC_UTILS}/score_detections.py -g ${REPIC_COORD}/train/${LABEL}/*.box -p ${REPIC_OUT_DIR}/BOX/train/*.box -c 0.5 &> ${REPIC_OUT_DIR}/score_train.log
python ${REPIC_UTILS}/score_detections.py -g ${REPIC_COORD}/val/*.box -p ${REPIC_OUT_DIR}/BOX/val/*.box -c 0.5 &> ${REPIC_OUT_DIR}/score_val.log
python ${REPIC_UTILS}/score_detections.py -g ${REPIC_COORD}/test/*.box -p ${REPIC_OUT_DIR}/BOX/test/*.box -c 0.5 &> ${REPIC_OUT_DIR}/score_test.log
fi
echo -e "\tTopaz ... "
# Topaz
# fit model
export REPIC_OUT_DIR=${SUB_DIR}/${LABEL}/topaz
mkdir -p ${REPIC_OUT_DIR}/BOX/{train,val,test}
rm -f ${REPIC_OUT_DIR}/predicted_particles_all_upsampled.txt # incase of prev. instance (shouldn't be needed)
bash ${REPIC}/iterative_particle_picking/fit_topaz.sh &> ${REPIC_OUT_DIR}/iter_fit.log
# train set prediction
export REPIC_MRC_DIR=${REPIC_TRAIN_MRC}
export TOPAZ_MODEL=${REPIC_OUT_DIR}/model_epoch10.sav
rm -f ${REPIC_OUT_DIR}/predicted_particles_all_upsampled.txt
bash ${REPIC}/iterative_particle_picking/run_topaz.sh &> ${REPIC_OUT_DIR}/iter_train.log
python ${REPIC_UTILS}/coord_converter.py ${REPIC_OUT_DIR}/predicted_particles_all_upsampled.txt ${REPIC_OUT_DIR}/BOX/train -f tsv -t box -b ${REPIC_BOX_SIZE} -c 1 2 none none 3 0 --header --multi_out --round 0 --force &> ${REPIC_OUT_DIR}/convert_train.log
# val set prediction
export REPIC_MRC_DIR=${REPIC_VAL_MRC}
rm -f ${REPIC_OUT_DIR}/predicted_particles_all_upsampled.txt
bash ${REPIC}/iterative_particle_picking/run_topaz.sh &> ${REPIC_OUT_DIR}/iter_val.log
python ${REPIC_UTILS}/coord_converter.py ${REPIC_OUT_DIR}/predicted_particles_all_upsampled.txt ${REPIC_OUT_DIR}/BOX/val -f tsv -t box -b ${REPIC_BOX_SIZE} -c 1 2 none none 3 0 --header --multi_out --round 0 --force &> ${REPIC_OUT_DIR}/convert_val.log
# test set prediction
export REPIC_MRC_DIR=${REPIC_TEST_MRC}
rm -f ${REPIC_OUT_DIR}/predicted_particles_all_upsampled.txt
bash ${REPIC}/iterative_particle_picking/run_topaz.sh &> ${REPIC_OUT_DIR}/iter_test.log
python ${REPIC_UTILS}/coord_converter.py ${REPIC_OUT_DIR}/predicted_particles_all_upsampled.txt ${REPIC_OUT_DIR}/BOX/test -f tsv -t box -b ${REPIC_BOX_SIZE} -c 1 2 none none 3 0 --header --multi_out --round 0 --force &> ${REPIC_OUT_DIR}/convert_test.log
if ${GET_SCORE}; then
python ${REPIC_UTILS}/score_detections.py -g ${REPIC_COORD}/train/${LABEL}/*.box -p ${REPIC_OUT_DIR}/BOX/train/*.box -c 0. &> ${REPIC_OUT_DIR}/score_train.log
python ${REPIC_UTILS}/score_detections.py -g ${REPIC_COORD}/val/*.box -p ${REPIC_OUT_DIR}/BOX/val/*.box -c 0. &> ${REPIC_OUT_DIR}/score_val.log
python ${REPIC_UTILS}/score_detections.py -g ${REPIC_COORD}/test/*.box -p ${REPIC_OUT_DIR}/BOX/test/*.box -c 0. &> ${REPIC_OUT_DIR}/score_test.log
fi
echo -e "\tBuilding consensus ... "
TMP=${SUB_DIR}/${LABEL}/tmp
rm -rf ${TMP}
# train consensus
mkdir -p ${TMP}/{crYOLO,deepPicker,topaz}
cp -fs ${SUB_DIR}/${LABEL}/cryolo/BOX/train/*.box ${TMP}/crYOLO/
cp -fs ${SUB_DIR}/${LABEL}/deep/BOX/train/*.box ${TMP}/deepPicker/
cp -fs ${SUB_DIR}/${LABEL}/topaz/BOX/train/*.box ${TMP}/topaz/
REPIC_OUT_DIR=${SUB_DIR}/${LABEL}/clique_files
mkdir -p ${REPIC_OUT_DIR}
repic get_cliques ${TMP} ${REPIC_OUT_DIR}/train ${REPIC_BOX_SIZE} &> ${REPIC_OUT_DIR}/clique_train.log
repic run_ilp ${REPIC_OUT_DIR}/train ${REPIC_BOX_SIZE} --num_particles ${REPIC_NUM_PARTICLES} &> ${REPIC_OUT_DIR}/ilp_train.log
rm -rf ${TMP}
# val consensus
mkdir -p ${TMP}/{crYOLO,deepPicker,topaz}
cp -s ${SUB_DIR}/${LABEL}/cryolo/BOX/val/*.box ${TMP}/crYOLO/
cp -s ${SUB_DIR}/${LABEL}/deep/BOX/val/*.box ${TMP}/deepPicker/
cp -s ${SUB_DIR}/${LABEL}/topaz/BOX/val/*.box ${TMP}/topaz/
repic get_cliques ${TMP} ${REPIC_OUT_DIR}/val ${REPIC_BOX_SIZE} &> ${REPIC_OUT_DIR}/clique_val.log
repic run_ilp ${REPIC_OUT_DIR}/val ${REPIC_BOX_SIZE} --num_particles ${REPIC_NUM_PARTICLES} &> ${REPIC_OUT_DIR}/ilp_val.log
rm -rf ${TMP}
# test consensus
mkdir -p ${TMP}/{crYOLO,deepPicker,topaz}
cp -s ${SUB_DIR}/${LABEL}/cryolo/BOX/test/*.box ${TMP}/crYOLO/
cp -s ${SUB_DIR}/${LABEL}/deep/BOX/test/*.box ${TMP}/deepPicker/
cp -s ${SUB_DIR}/${LABEL}/topaz/BOX/test/*.box ${TMP}/topaz/
repic get_cliques ${TMP} ${REPIC_OUT_DIR}/test ${REPIC_BOX_SIZE} &> ${REPIC_OUT_DIR}/clique_test.log
repic run_ilp ${REPIC_OUT_DIR}/test ${REPIC_BOX_SIZE} --num_particles ${REPIC_NUM_PARTICLES} &> ${REPIC_OUT_DIR}/ilp_test.log
rm -rf ${TMP}
if ${GET_SCORE}; then
python ${REPIC_UTILS}/score_detections.py -g ${REPIC_COORD}/train/${LABEL}/*.box -p ${REPIC_OUT_DIR}/train/*.box &> ${REPIC_OUT_DIR}/score_train.log
export TOPAZ_BALANCE=$(awk '(NR > 1){total += $NF}END{print total/(NR - 1)}' ${REPIC_OUT_DIR}/train/particle_set_comp.tsv)
python ${REPIC_UTILS}/score_detections.py -g ${REPIC_COORD}/val/*.box -p ${REPIC_OUT_DIR}/val/*.box &> ${REPIC_OUT_DIR}/score_val.log
python ${REPIC_UTILS}/score_detections.py -g ${REPIC_COORD}/test/*.box -p ${REPIC_OUT_DIR}/test/*.box &> ${REPIC_OUT_DIR}/score_test.log
fi
export DEEP_BATCH_SIZE=32 # in case manual particles are used with downsampling
done