-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathargument_parser_train.py
217 lines (189 loc) · 12.3 KB
/
argument_parser_train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
# Args Parser for training or evaluation (classification) of models
import argparse
def parse_args():
parser = argparse.ArgumentParser(
description='Model trainer for Image Classification'
)
parser.add_argument('--model_arch', default='resnet50', type=str,
help='pick model architecture')
parser.add_argument('--use_torchvision_resnet_model', default=False, action='store_true')
# Data
parser.add_argument('--data_path',
help='directory that contains cub files', required=True)
parser.add_argument('--image_sub_path_train', default='images',
help='subdirectory that contains training images')
parser.add_argument('--image_sub_path_test', default='images',
help='subdirectory that contains test images')
parser.add_argument('--dataset', default='cub', type=str)
parser.add_argument('--train_split', default=0.9, type=float, help='fraction of training data to use')
parser.add_argument('--eval_mode', default='val', choices=['train', 'val', 'test'], type=str,
help='which split to use for evaluation')
parser.add_argument('--anno_path_train', default='', type=str, required=False)
parser.add_argument('--anno_path_test', default='', type=str, required=False)
parser.add_argument('--metadata_path', default='', type=str, required=False)
parser.add_argument('--species_id_to_name_file', default='', type=str, required=False)
# Training
parser.add_argument('--snapshot_dir', type=str)
parser.add_argument('--save_every_n_epochs', default=10, type=int)
parser.add_argument('--batch_size', type=int, default=16)
parser.add_argument('--epochs', type=int, default=28)
parser.add_argument('--num_workers', type=int, default=4)
parser.add_argument('--seed', default=42, type=int)
parser.add_argument('--grad_accumulation_steps', default=1, type=int)
# Attention map saving probability
parser.add_argument('--amap_saving_prob', default=0.05, type=float)
# * Misc training params
parser.add_argument('--grad_norm_clip', default=2.0, type=float)
parser.add_argument('--use_amp', action='store_true', default=False)
# Evaluation params
parser.add_argument('--eval_only', default=False, action='store_true',
help='Whether to only eval the model')
parser.add_argument('--crop_pct', type=float, default=None)
# * Mixup params
parser.add_argument('--mixup', type=float, default=0.0,
help='mixup alpha, mixup enabled if > 0.')
parser.add_argument('--cutmix', type=float, default=0.0,
help='cutmix alpha, cutmix enabled if > 0.')
parser.add_argument('--cutmix_minmax', type=float, nargs='+', default=None,
help='cutmix min/max ratio, overrides alpha and enables cutmix if set (default: None)')
parser.add_argument('--mixup_prob', type=float, default=1.0,
help='Probability of performing mixup or cutmix when either/both is enabled')
parser.add_argument('--mixup_switch_prob', type=float, default=0.5,
help='Probability of switching to cutmix when both mixup and cutmix enabled')
parser.add_argument('--mixup_mode', type=str, default='batch',
help='How to apply mixup/cutmix params. Per "batch", "pair", or "elem"')
# Augmentation parameters
parser.add_argument('--augmentations_to_use', type=str, default='cub_original',
choices=['timm', 'torchvision', 'cub_original'])
parser.add_argument('--image_size', default=448, type=int)
parser.add_argument('--color_jitter', type=float, default=0.4, metavar='PCT',
help='Color jitter factor (default: 0.4)')
parser.add_argument('--aa', type=str, default='rand-m9-mstd0.5-inc1', metavar='NAME',
help='Use AutoAugment policy. "v0" or "original". " + "(default: rand-m9-mstd0.5-inc1)'),
parser.add_argument('--train_interpolation', type=str, default='bicubic',
help='Training interpolation (random, bilinear, bicubic default: "bicubic")')
parser.add_argument('--imagenet_default_mean_and_std', action='store_false', default=True)
parser.add_argument('--hflip', type=float, default=0.5, help='Horizontal flip probability')
parser.add_argument('--vflip', type=float, default=0., help='Vertical flip probability')
# Random Erase params
parser.add_argument('--reprob', type=float, default=0., metavar='PCT',
help='Random erase prob (default: 0.25)')
parser.add_argument('--remode', type=str, default='pixel',
help='Random erase mode (default: "pixel")')
parser.add_argument('--recount', type=int, default=1,
help='Random erase count (default: 1)')
parser.add_argument('--resplit', action='store_true', default=False,
help='Do not random erase first (clean) augmentation split')
# Model params
parser.add_argument('--num_parts', help='number of parts to predict',
default=8, type=int)
parser.add_argument('--pretrained_start_weights', default=False, action='store_true')
parser.add_argument('--drop_path', type=float, default=0, metavar='PCT',
help='Drop path rate (default: 0.0)')
parser.add_argument('--output_stride', type=int, default=32, help='stride of the model')
parser.add_argument('--freeze_backbone', default=False, action='store_true')
parser.add_argument('--freeze_params', default=False, action='store_true')
# * Optimizer params
parser.add_argument('--optimizer_type', default='adam', type=str)
parser.add_argument('--weight_decay', default=0, type=float, help='normalized weight decay')
parser.add_argument('--momentum', default=0.9, type=float)
parser.add_argument('--betas1', default=0.9, type=float)
parser.add_argument('--betas2', default=0.999, type=float)
parser.add_argument('--dampening', default=0.0, type=float)
parser.add_argument('--trust_coeff', default=0.001, type=float)
parser.add_argument('--always_adapt', action='store_true', default=False)
parser.add_argument('--turn_off_grad_averaging', action='store_true', default=False)
parser.add_argument('--max_grad_norm', default=1.0, type=float)
# * Scheduler params
parser.add_argument('--scheduler_type', default='cosine',
choices=['cosine', 'linearlr', 'steplr'],
type=str)
parser.add_argument('--scheduler_warmup_epochs', default=0, type=int)
parser.add_argument('--warmup_lr', type=float, default=0.0)
parser.add_argument('--scheduler_restart_factor', default=1, type=int)
parser.add_argument('--scheduler_gamma', default=0.1, type=float)
parser.add_argument('--scheduler_step_size', default=10, type=int)
parser.add_argument('--min_lr', type=float, default=0.0, metavar='LR',
help='lower lr bound for cyclic schedulers that hit 0')
parser.add_argument('--cosine_cycle_limit', default=1, type=int)
# * LR params for each param group
parser.add_argument('--lr', default=1e-6, type=float)
parser.add_argument('--scratch_lr_factor', default=1e4, type=float)
parser.add_argument('--finer_lr_factor', default=1e3, type=float)
parser.add_argument('--modulation_lr_factor', default=1e4, type=float)
# Wandb params
parser.add_argument('--wandb', action='store_true')
parser.add_argument('--wandb_project', default='', type=str)
parser.add_argument('--job_type', default='', type=str)
parser.add_argument('--log_interval', default=10, type=int)
parser.add_argument('--group', default='vit_base', type=str)
parser.add_argument('--wandb_entity', default='', type=str)
parser.add_argument('--wandb_mode', default='online', type=str, choices=['online', 'offline'])
# * Resume training params
parser.add_argument('--resume_training', action='store_true', default=False)
parser.add_argument('--wandb_resume_id', default=None, type=str)
# Loss hyperparameters
parser.add_argument('--classification_loss', default=1.0, type=float)
parser.add_argument('--presence_loss', default=1.0, type=float)
parser.add_argument('--presence_loss_beta', default=0.1, type=float)
parser.add_argument('--presence_loss_type', default="original",
choices=["original", "soft_constraint", "tanh", "soft_tanh"], type=str)
parser.add_argument('--concentration_loss', default=0, type=float)
parser.add_argument('--equivariance_loss', default=1.0, type=float)
parser.add_argument('--orthogonality_loss_landmarks', default=1.0, type=float)
parser.add_argument('--total_variation_loss', default=1.0, type=float)
parser.add_argument('--enforced_presence_loss', default=2.0, type=float)
parser.add_argument('--enforced_presence_loss_type', default="enforced_presence", choices=["linear", "log", "mse", "enforced_presence"],
type=str)
parser.add_argument('--pixel_wise_entropy_loss', default=1.0, type=float)
# BCE Loss (for multi-class classification) from timm
parser.add_argument('--use_bce_loss', default=False, action='store_true')
parser.add_argument('--bce-sum', action='store_true', default=False,
help='Sum over classes when using BCE loss.')
parser.add_argument('--bce-target-thresh', type=float, default=None,
help='Threshold for binarizing softened BCE targets (default: None, disabled).')
parser.add_argument('--bce-pos-weight', type=float, default=None,
help='Positive weighting for BCE loss.')
# Label Smoothing
parser.add_argument('--smoothing', type=float, default=0.0,
help='Label smoothing (default: 0.0)')
# Equivariance affine transform params
parser.add_argument('--degrees', default=90, type=float)
parser.add_argument('--translate_x', default=0.11, type=float)
parser.add_argument('--translate_y', default=0.11, type=float)
parser.add_argument('--scale_l', default=0.8, type=float)
parser.add_argument('--scale_u', default=1.4, type=float)
parser.add_argument('--shear_x', default=0.0, type=float)
parser.add_argument('--shear_y', default=0.0, type=float)
# Part Dropout
parser.add_argument('--part_dropout', default=0.3, type=float)
# Add noise to vit output features
parser.add_argument('--noise_variance', default=0.0, type=float)
# Gumbel Softmax
parser.add_argument('--gumbel_softmax', default=False, action='store_true')
parser.add_argument('--gumbel_softmax_temperature', default=1.0, type=float)
parser.add_argument('--gumbel_softmax_hard', default=False, action='store_true')
# Modulation
parser.add_argument('--modulation_type', default="original",
choices=["original", "layer_norm", "parallel_mlp", "parallel_mlp_no_bias",
"parallel_mlp_no_act", "parallel_mlp_no_act_no_bias", "none"],
type=str)
parser.add_argument('--modulation_orth', default=False, action='store_true',
help='use orthogonality loss on modulated features')
# Classifier type
parser.add_argument('--classifier_type', default="linear",
choices=["linear", "independent_mlp"], type=str)
# Array training job
parser.add_argument('--array_training_job', default=False, action='store_true',
help='Whether to run as an array job (i.e. training with multiple random seeds on the same settings)')
# Model Iterate Averaging Type
group = parser.add_argument_group('Model iterate average parameters')
group.add_argument('--averaging_type', default='', type=str, help='Type of model iterate averaging to use')
group.add_argument('--model-ema-force-cpu', action='store_true', default=False,
help='Force ema to be tracked on CPU, rank=0 node only. Disables EMA validation.')
group.add_argument('--model-ema-decay', type=float, default=0.9999,
help='Decay factor for model weights moving average (default in timm: 0.9999)')
group.add_argument('--no-model-ema-warmup', action='store_true',
help='Enable warmup for model EMA decay.')
args = parser.parse_args()
return args