Skip to content

Commit

Permalink
save results
Browse files Browse the repository at this point in the history
  • Loading branch information
windsong57 committed Mar 21, 2024
2 parents caa2cbe + cd06c96 commit b8d147e
Show file tree
Hide file tree
Showing 23 changed files with 1,448 additions and 228 deletions.
3 changes: 2 additions & 1 deletion .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
"type": "python",
"request": "launch",
"module": "enter-your-module-name",
"justMyCode": true
"justMyCode": true,
"env": {"PL_TORCH_DISTRIBUTED_BACKEND":"gloo"}
}
]
}
5 changes: 5 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"cSpell.words": [
"denorm"
]
}
78 changes: 55 additions & 23 deletions Modeling eMNS/Generative_model_ETH_v0.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -103,10 +103,16 @@
"print(min_Bfield.shape)\n",
"print(max_Bfield.shape)\n",
"\n",
"torch.save(min_current, \"./normalize_data/cnn_min_current_ETH.pt\")\n",
"torch.save(max_current, \"./normalize_data/cnn_max_current_ETH.pt\")\n",
"torch.save(min_Bfield, \"./normalize_data/cnn_min_Bfield_ETH.pt\")\n",
"torch.save(max_Bfield, \"./normalize_data/cnn_max_Bfield_ETH.pt\")"

"print(minB.shape)\n",
"print(maxB.shape)\n",
"current_norm_max, index = torch.max(Bfield_norm.transpose(0,1).reshape(3,-1), dim=1, keepdim=True)\n",
"print(current_norm_max)\n",
"# torch.save(min_current, \"./normalize_data/cnn_min_current_ETH.pt\")\n",
"# torch.save(max_current, \"./normalize_data/cnn_max_current_ETH.pt\")\n",
"# torch.save(min_Bfield, \"./normalize_data/cnn_min_Bfield_ETH.pt\")\n",
"# torch.save(max_Bfield, \"./normalize_data/cnn_max_Bfield_ETH.pt\")"

]
},
{
Expand All @@ -127,23 +133,35 @@
"metadata": {},
"outputs": [],
"source": [
"from Neural_network import Generative_net, Generative_net_test, ResidualEMNSBlock_3d, BigBlock, weight_init, eMNS_Dataset\n",
"from Training_loop import train_part_GM,get_mean_of_dataloader\n",
"from tqdm import tqdm\n",
"\n",
"from Neural_network import Generative_net,Generative_net_test ,ResidualEMNSBlock_3d, BigBlock, weight_init, eMNS_Dataset\n",
"###############################################\n",
"# Config the neural network\n",
"###############################################\n",
"num_input = 8\n",
"output_shape = (3,16,16,16)\n",
"SB_args = (64,64,4,1) # (Cin, Cout, num_repeat, num_block)\n",
"BB_args = (2,2) # (scale_factor, num_block)\n",

"SB_args = (64,64,1,4) # (Cin, Cout, num_repeat, num_block)\n",
"BB_args = (2,3) # (scale_factor, num_block)\n",

"SB_block = ResidualEMNSBlock_3d \n",
"BB_block = BigBlock\n",
"DF = False # whether using divergence free model\n",
"\n",
"Generative_network = Generative_net(SB_args, BB_args, SB_block, BB_block, num_input=num_input, output_shape= output_shape)\n",
"print(Generative_network)"
"Generative_network = Generative_net_test(SB_args, BB_args, SB_block, BB_block, num_input=num_input, output_shape= output_shape)\n",
"print(Generative_network)\n",
"\n",
"from torchviz import make_dot\n",
"import torch.nn.functional as F\n",
"from Training_loop import grad_loss_Jacobain\n",
"x = torch.randn(2,8)\n",
"y = Bfield[0:2]\n",
"preds = Generative_network(x)\n",
"print(preds.shape)\n",
"loss = F.l1_loss(preds,y)+grad_loss_Jacobain(preds,y)\n",
" # optimizer.zero_grad() #zero out all of gradient\n",
"loss.backward()\n",
"\n",
"make_dot(loss, params=dict(Generative_network.named_parameters()))\n"
]
},
{
Expand All @@ -152,7 +170,7 @@
"metadata": {},
"outputs": [],
"source": [
"from Neural_network import Generative_net, Generative_net_test, ResidualEMNSBlock_3d, BigBlock, weight_init, eMNS_Dataset\n",
"from Neural_network import Generative_net, ResidualEMNSBlock_3d, BigBlock, weight_init, eMNS_Dataset\n",
"from Training_loop import train_part_GM,get_mean_of_dataloader\n",
"from tqdm import tqdm\n",
"\n",
Expand All @@ -174,13 +192,13 @@
"DF = False # whether using divergence free model\n",
"\n",
"Generative_network = Generative_net(SB_args, BB_args, SB_block, BB_block, num_input=num_input, output_shape= output_shape)\n",
"epochs = 400\n",
"epochs = 350\n",
"learning_rate_decay = .5\n",
"learning_rates = [1e-4]\n",
"learning_rates = [1e-5]\n",
"RMSE_lr = []\n",
"schedule = []\n",
"linear_lr = False\n",
"weight_decays = [0]\n",
"weight_decays = [1e-3]\n",
"\n",
"train_percents = np.arange(1.0,1.01,0.1)\n",
"RMSE_history_end = np.zeros(len(train_percents))\n",
Expand Down Expand Up @@ -233,11 +251,16 @@
" mse_val_history_end[index] = mse_val_history[epoch_stop]\n",
" index=index+1\n",
" print('training stop at epoch:',epoch_stop)\n",
" print('training stop at epoch:',Rsquare)\n",
"torch.save(Generative_network, 'EMS_CNN.pt')\t# 这里会存储迄今最优模型的参数\n",
"print(RMSE_lr)\n",
"print(learning_rates)\n",
"print(RMSE_lr[0],learning_rates[0])\n"
" print('training stop at epoch:',Rsquare)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"torch.save(Generative_network, 'EMS_CNN_ETH.pt')\t# 这里会存储迄今最优模型的参数"
]
},
{
Expand All @@ -246,6 +269,12 @@
"metadata": {},
"outputs": [],
"source": [
"\n",
"print(RMSE_lr)\n",
"print(learning_rates)\n",
"print(RMSE_lr[0],learning_rates[0])\n",
"import matplotlib.pyplot as plt \n",
"plt.plot(learning_rates,RMSE_lr)\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"ave_site = 5\n",
Expand All @@ -259,6 +288,7 @@
"plt.legend(['loss','loss_conv'])\n",
"plt.xlabel('iterations')\n",
"plt.ylabel('loss')\n",
"plt.ylim([0,1])\n",
"plt.show()\n",
"\n",
"plt.title('Train and Val RMSE(sample_num=1000)')\n",
Expand All @@ -270,6 +300,7 @@
"plt.legend(['train CNN','val CNN'])\n",
"plt.xlabel('iterations')\n",
"plt.ylabel('RMSE(mT)')\n",
"plt.ylim([0,100])\n",
"plt.grid()\n",
"plt.show()\n",
"\n",
Expand All @@ -281,7 +312,8 @@
"plt.ylabel('mse(mT^2)')\n",
"plt.grid()\n",
"plt.show()\n",
"print(epoch_stop)\n"
"print(epoch_stop)\n",
"\n"
]
},
{
Expand Down Expand Up @@ -310,7 +342,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.1"
"version": "3.10.13"
}
},
"nbformat": 4,
Expand Down
90 changes: 65 additions & 25 deletions Modeling eMNS/Generative_model_ETH_v2.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,15 @@
"### Train ETH data to CNN generative network"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!pip install -U \"ray[data,train,tune,serve]\""
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down Expand Up @@ -95,7 +104,7 @@
"outputs": [],
"source": [
"from Neural_network import Generative_net, Generative_net_test, ResidualEMNSBlock_3d, BigBlock, weight_init, eMNS_Dataset\n",
"from Training_loop_v2 import train_GM\n",
"from Training_loop_v2 import train_GM, train_GM_ray\n",
"from functools import partial\n",
"from ray.train import RunConfig, ScalingConfig, CheckpointConfig\n",
"from ray.train.torch import TorchTrainer\n",
Expand All @@ -116,17 +125,17 @@
"extremes = dataset.train_norm(train_indices = train_set.indices)\n",
"\n",
"tune_schedule = ASHAScheduler(\n",
" metric=\"loss\", # metric to optimize. This metric should be reported with tune.report()\n",
" metric=\"rmse_val\", # metric to optimize. This metric should be reported with tune.report()\n",
" mode=\"min\",\n",
" max_t=10,\n",
" grace_period=1, # minimum stop epoch\n",
" max_t=350,\n",
" grace_period=10, # minimum stop epoch\n",
" reduction_factor=2,\n",
" )\n",
"param_space = {\n",
" \"scaling_config\": ScalingConfig(\n",
" num_workers = 1,\n",
" use_gpu = False,\n",
" #resource_per_worker = {\"CPU\":1, \"GPU\":1}\n",
" use_gpu = use_gpu,\n",
" resources_per_worker = {\"CPU\":4, \"GPU\":1}\n",
" ),\n",
" # You can even grid search various datasets in Tune.\n",
" # \"datasets\": {\n",
Expand All @@ -135,19 +144,19 @@
" # ),\n",
" # },\n",
" \"train_loop_config\": {\n",
" 'epochs': tune.choice([10]),\n",
" 'lr_max': tune.loguniform(1e-4,1e-2),\n",
" 'lr_min': tune.loguniform(1e-5,1e-7),\n",
" 'batch_size': tune.choice([4,8,16]),\n",
" 'L2_norm' : tune.choice([0]),\n",
" 'epochs': 350,\n",
" 'lr_max': 1e-4,\n",
" 'lr_min': 2.5e-6,\n",
" 'batch_size': 8,\n",
" 'L2_norm' : 0,\n",
" 'verbose': False,\n",
" 'DF' : tune.choice([True,False]),\n",
" 'DF' : False,\n",
" 'schedule': [],\n",
" 'grid_space': 16**3,\n",
" 'learning_rate_decay': 0.5,\n",
" 'skip_spacing': tune.choice([1,2,4]),\n",
" 'num_repeat' : tune.choice([1,2,4]),\n",
" 'num_block' : tune.choice([1,2,3]),\n",
" 'skip_spacing': tune.grid_search([1,2,4]),\n",
" 'num_repeat' : tune.grid_search([1,2,4]),\n",
" 'num_block' : tune.grid_search([1,2,3]),\n",
" 'maxB' : extremes[2],\n",
" 'minB' : extremes[3],\n",
" 'train_set' : train_set,\n",
Expand All @@ -170,7 +179,7 @@
"################################################\n",
"\n",
"train_loop_config = {\n",
" 'epochs': 10,\n",
" 'epochs': 350,\n",
" 'lr_max': 1e-4,\n",
" 'lr_min': 2.5e-6,\n",
" 'batch_size': 8,\n",
Expand All @@ -197,7 +206,7 @@
"scaling_config = ScalingConfig(\n",
" num_workers = 1,\n",
" use_gpu = use_gpu,\n",
" #resource_per_worker = {\"CPU\":1, \"GPU\":1}\n",
" resources_per_worker = {\"CPU\":8, \"GPU\":2}\n",
")\n",
"\n",
"run_config = RunConfig(checkpoint_config=CheckpointConfig(num_to_keep=1))\n",
Expand All @@ -212,18 +221,40 @@
" run_config = run_config,\n",
"\n",
")\n",
"\n",
"result = trainer.fit()\n",
"# tuner = tune.Tuner(\n",
"# trainer,\n",
"# param_space = param_space,\n",
"# tune_config =tune.TuneConfig(\n",
"# scheduler=tune_schedule,\n",
"# num_samples=10, # number of samples of hyperparameter space\n",
"# num_samples=1, # number of samples of hyperparameter space\n",
"# ),\n",
"# # run_config = RunConfig(storage_path=\"./results\", name=\"test_experiment\")\n",
"# )\n",
" \n",
"# tuner.fit()"
"# results = tuner.fit()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(torch.device(type='cuda', index=0))\n",
"print(ray.train.torch.get_device())\n",
"print(torch.device('cuda:0'))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"best_result = results.get_best_result(metric='rmse_val',mode='min')\n",
"print(best_result)"
]
},
{
Expand All @@ -233,7 +264,16 @@
"outputs": [],
"source": [
"from utils import plot_ray_results\n",
"plot_ray_results(result, metrics_names=['rmse_train','rmse_val'])"
"plot_ray_results(best_result, metrics_names=['rmse_train','rmse_val'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plot_ray_results(result, metrics_names=['rmse_train','rmse_val'],ylim=[0,25])"
]
},
{
Expand Down Expand Up @@ -272,9 +312,9 @@
" 'schedule': [],\n",
" 'grid_space': 16**3,\n",
" 'learning_rate_decay': 0.5,\n",
" 'skip_spacing': 1,\n",
" 'num_repeat' : 4,\n",
" 'num_block' : 2,\n",
" 'skip_spacing': 2,\n",
" 'num_repeat' : 2,\n",
" 'num_block' : 3,\n",
" 'device' : device,\n",
"}\n",
"train_percents = np.arange(1.0,1.01,0.1)\n",
Expand Down Expand Up @@ -359,7 +399,7 @@
"plt.legend(['loss','loss_conv'])\n",
"plt.xlabel('iterations')\n",
"plt.ylabel('loss')\n",
"# plt.ylim([0,10])\n",
"plt.ylim([0,10])\n",
"plt.show()\n",
"\n",
"plt.title('Train and Val RMSE(sample_num=1000)')\n",
Expand Down Expand Up @@ -404,7 +444,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
"version": "3.9.7"
}
},
"nbformat": 4,
Expand Down
Loading

0 comments on commit b8d147e

Please sign in to comment.