Skip to content

Commit

Permalink
Updates and bug fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
pratulsrinivasan authored Apr 19, 2018
1 parent 26c12a3 commit b68bf83
Showing 1 changed file with 105 additions and 74 deletions.
179 changes: 105 additions & 74 deletions Local_Light_Field_Synthesis.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
Expand All @@ -30,7 +28,7 @@
"#parameters\n",
"\n",
"lfsize = [372, 540, 8, 8] #dimensions of Lytro light fields\n",
"batchsize = 1\n",
"batchsize = 1 #modify based on user's GPU memory\n",
"patchsize = [192, 192] #spatial dimensions of training light fields\n",
"disp_mult = 4.0 #max disparity between adjacent veiws\n",
"num_crops = 4 #number of random spatial crops per light field for each input queue thread to push\n",
Expand All @@ -48,28 +46,53 @@
"source": [
"#functions for CNN layers\n",
"\n",
"def weight_variable(w_shape, name):\n",
" return tf.get_variable(name, w_shape, initializer=tf.contrib.layers.xavier_initializer_conv2d())\n",
"def weight_variable(w_shape):\n",
" return tf.get_variable('weights', w_shape, initializer=tf.contrib.layers.xavier_initializer_conv2d())\n",
"\n",
"def bias_variable(b_shape, init_bias=0.0):\n",
" return tf.get_variable('bias', b_shape, initializer=tf.constant_initializer(init_bias))\n",
"\n",
"#standard atrous layer\n",
"def cnn_layer(input_tensor, w_shape, b_shape, layer_name, is_training, rate=1, padding_type='SAME'):\n",
"def cnn_layer(input_tensor, w_shape, b_shape, layer_name, rate=1, ds=1):\n",
" with tf.variable_scope(layer_name):\n",
" W = weight_variable(w_shape)\n",
" pad_amt_0 = rate * (w_shape[0] - 1)//2\n",
" pad_amt_1 = rate * (w_shape[1] - 1)//2\n",
" input_tensor = tf.pad(input_tensor, [[0,0],[pad_amt_0,pad_amt_0],[pad_amt_1,pad_amt_1],[0,0]], mode='SYMMETRIC')\n",
" h = tf.nn.convolution(input_tensor, W, strides=[ds, ds], padding='VALID', dilation_rate=[rate, rate], name=layer_name + '_conv')\n",
" h = tf.contrib.layers.instance_norm(h + bias_variable(b_shape))\n",
" h = tf.nn.leaky_relu(h)\n",
" return h\n",
" \n",
"def cnn_layer_plain(input_tensor, w_shape, b_shape, layer_name, rate=1, ds=1):\n",
" with tf.variable_scope(layer_name):\n",
" W = weight_variable(w_shape, '_weights')\n",
" h = tf.nn.atrous_conv2d(input_tensor, W, rate, padding=padding_type, name=layer_name + '_conv')\n",
" W = weight_variable(w_shape)\n",
" pad_amt_0 = rate * (w_shape[0] - 1)//2\n",
" pad_amt_1 = rate * (w_shape[1] - 1)//2\n",
" input_tensor = tf.pad(input_tensor, [[0,0],[pad_amt_0,pad_amt_0],[pad_amt_1,pad_amt_1],[0,0]], mode='SYMMETRIC')\n",
" h = tf.nn.convolution(input_tensor, W, strides=[ds, ds], padding='VALID', dilation_rate=[rate, rate], name=layer_name + '_conv')\n",
" h = h + bias_variable(b_shape)\n",
" h = tf.nn.elu(h)\n",
" h = tf.contrib.layers.batch_norm(h, scale=True, updates_collections=None, \n",
" is_training=is_training, scope=layer_name + '_bn')\n",
" return h\n",
"\n",
"#layer with no normalization or activation\n",
"def cnn_layer_no_bn(input_tensor, w_shape, b_shape, layer_name, rate=1, padding_type='SAME'):\n",
" \n",
"def cnn_layer_3D(input_tensor, w_shape, b_shape, layer_name, rate=1, ds=1):\n",
" with tf.variable_scope(layer_name):\n",
" W = weight_variable(w_shape)\n",
" pad_amt_0 = rate * (w_shape[0] - 1)//2\n",
" pad_amt_1 = rate * (w_shape[1] - 1)//2\n",
" pad_amt_2 = rate * (w_shape[2] - 1)//2\n",
" input_tensor = tf.pad(input_tensor, [[0,0],[pad_amt_0,pad_amt_0],[pad_amt_1,pad_amt_1],[pad_amt_2,pad_amt_2],[0,0]], mode='SYMMETRIC')\n",
" h = tf.nn.convolution(input_tensor, W, strides=[ds, ds, ds], padding='VALID', dilation_rate=[rate, rate, rate], name=layer_name + '_conv')\n",
" h = tf.contrib.layers.instance_norm(h + bias_variable(b_shape))\n",
" h = tf.nn.leaky_relu(h)\n",
" return h\n",
" \n",
"def cnn_layer_3D_plain(input_tensor, w_shape, b_shape, layer_name, rate=1, ds=1):\n",
" with tf.variable_scope(layer_name):\n",
" W = weight_variable(w_shape, '_weights')\n",
" h = tf.nn.atrous_conv2d(input_tensor, W, rate, padding=padding_type, name=layer_name + '_conv')\n",
" W = weight_variable(w_shape)\n",
" pad_amt_0 = rate * (w_shape[0] - 1)//2\n",
" pad_amt_1 = rate * (w_shape[1] - 1)//2\n",
" pad_amt_2 = rate * (w_shape[2] - 1)//2\n",
" input_tensor = tf.pad(input_tensor, [[0,0],[pad_amt_0,pad_amt_0],[pad_amt_1,pad_amt_1],[pad_amt_2,pad_amt_2],[0,0]], mode='SYMMETRIC')\n",
" h = tf.nn.convolution(input_tensor, W, strides=[ds, ds, ds], padding='VALID', dilation_rate=[rate, rate, rate], name=layer_name + '_conv')\n",
" h = h + bias_variable(b_shape)\n",
" return h"
]
Expand All @@ -78,13 +101,13 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
"collapsed": true
},
"outputs": [],
"source": [
"#network to predict ray depths from input image\n",
"\n",
"def depth_network(x, lfsize, disp_mult, is_training, name):\n",
"def depth_network(x, lfsize, disp_mult, name):\n",
" with tf.variable_scope(name):\n",
" \n",
" b_sz = tf.shape(x)[0]\n",
Expand All @@ -93,16 +116,17 @@
" v_sz = lfsize[2]\n",
" u_sz = lfsize[3]\n",
" \n",
" c1 = cnn_layer(x, [3, 3, 3, 16], [16], 'c1', is_training)\n",
" c2 = cnn_layer(c1, [3, 3, 16, 64], [64], 'c2', is_training)\n",
" c3 = cnn_layer(c2, [3, 3, 64, 128], [128], 'c3', is_training)\n",
" c4 = cnn_layer(c3, [3, 3, 128, 128], [128], 'c4', is_training, rate=2)\n",
" c5 = cnn_layer(c4, [3, 3, 128, 128], [128], 'c5', is_training, rate=4)\n",
" c6 = cnn_layer(c5, [3, 3, 128, 128], [128], 'c6', is_training, rate=8)\n",
" c7 = cnn_layer(c6, [3, 3, 128, 128], [128], 'c7', is_training, rate=16)\n",
" c8 = cnn_layer(c7, [3, 3, 128, 128], [128], 'c8', is_training)\n",
" c9 = cnn_layer(c8, [3, 3, 128, lfsize[2]*lfsize[3]], [lfsize[2]*lfsize[3]], 'c9', is_training)\n",
" c10 = disp_mult*tf.tanh(cnn_layer_no_bn(c9, [3, 3, lfsize[2]*lfsize[3], lfsize[2]*lfsize[3]], [lfsize[2]*lfsize[3]], 'c10'))\n",
" c1 = cnn_layer(x, [3, 3, 3, 16], [16], 'c1')\n",
" c2 = cnn_layer(c1, [3, 3, 16, 64], [64], 'c2')\n",
" c3 = cnn_layer(c2, [3, 3, 64, 128], [128], 'c3')\n",
" c4 = cnn_layer(c3, [3, 3, 128, 128], [128], 'c4', rate=2)\n",
" c5 = cnn_layer(c4, [3, 3, 128, 128], [128], 'c5', rate=4)\n",
" c6 = cnn_layer(c5, [3, 3, 128, 128], [128], 'c6', rate=8)\n",
" c7 = cnn_layer(c6, [3, 3, 128, 128], [128], 'c7', rate=16)\n",
" c8 = cnn_layer(c7, [3, 3, 128, 128], [128], 'c8')\n",
" c9 = cnn_layer(c8, [3, 3, 128, lfsize[2]*lfsize[3]], [lfsize[2]*lfsize[3]], 'c9')\n",
" c10 = disp_mult*tf.tanh(cnn_layer_plain(c9, [3, 3, lfsize[2]*lfsize[3], lfsize[2]*lfsize[3]], \\\n",
" [lfsize[2]*lfsize[3]], 'c10'))\n",
" \n",
" return tf.reshape(c10, [b_sz, y_sz, x_sz, v_sz, u_sz])"
]
Expand All @@ -117,7 +141,7 @@
"source": [
"#network for refining Lambertian light field (predict occluded rays and non-Lambertian effects)\n",
"\n",
"def occlusions_network(x, shear, lfsize, is_training, name):\n",
"def occlusions_network(x, shear, lfsize, name):\n",
" with tf.variable_scope(name):\n",
" \n",
" b_sz = tf.shape(x)[0]\n",
Expand All @@ -126,15 +150,19 @@
" v_sz = lfsize[2]\n",
" u_sz = lfsize[3]\n",
" \n",
" x = tf.reshape(x, [b_sz, y_sz, x_sz, v_sz*u_sz*4])\n",
" shear = tf.reshape(shear, [b_sz, y_sz, x_sz, v_sz*u_sz*3])\n",
" c1 = cnn_layer(x, [3, 3, v_sz*u_sz*4, 128], [128], 'c1', is_training)\n",
" c2 = cnn_layer(c1, [3, 3, 128, 128], [128], 'c2', is_training)\n",
" c3 = cnn_layer(c2, [3, 3, 128, 128], [128], 'c3', is_training)\n",
" c4 = cnn_layer(c3, [3, 3, 128, v_sz*u_sz*3], [v_sz*u_sz*3], 'c4', is_training)\n",
" c5 = tf.tanh(cnn_layer_no_bn(c4, [3, 3, v_sz*u_sz*3, v_sz*u_sz*3], [v_sz*u_sz*3], 'c5') + shear)\n",
" x = tf.transpose(tf.reshape(tf.transpose(x, perm=[0, 5, 1, 2, 3, 4]), \\\n",
" [b_sz, 4, y_sz, x_sz, u_sz*v_sz]), perm=[0, 4, 2, 3, 1])\n",
" \n",
" c1 = cnn_layer_3D(x, [3, 3, 3, 4, 8], [8], 'c1')\n",
" c2 = cnn_layer_3D(c1, [3, 3, 3, 8, 8], [8], 'c2')\n",
" c3 = cnn_layer_3D(c2, [3, 3, 3, 8, 8], [8], 'c3')\n",
" c4 = cnn_layer_3D(c3, [3, 3, 3, 8, 8], [8], 'c4')\n",
" c5 = tf.tanh(cnn_layer_3D_plain(c4, [3, 3, 3, 8, 3], [3], 'c5'))\n",
" \n",
" return tf.reshape(c5, [b_sz, y_sz, x_sz, v_sz, u_sz, 3])"
" output = tf.transpose(tf.reshape(tf.transpose(c5, perm=[0, 4, 2, 3, 1]), \\\n",
" [b_sz, 3, y_sz, x_sz, v_sz, u_sz]), perm=[0, 2, 3, 4, 5, 1]) + shear\n",
" \n",
" return output"
]
},
{
Expand All @@ -147,26 +175,26 @@
"source": [
"#full forward model\n",
"\n",
"def forward_model(x, lfsize, disp_mult, is_training):\n",
"def forward_model(x, lfsize, disp_mult):\n",
" with tf.variable_scope('forward_model') as scope:\n",
" #predict ray depths from input image\n",
" ray_depths = depth_network(x, lfsize, disp_mult, is_training, 'ray_depths')\n",
" ray_depths = depth_network(x, lfsize, disp_mult, 'ray_depths')\n",
" #shear input image by predicted ray depths to render Lambertian light field\n",
" lf_shear_r = depth_rendering(x[:, :, :, 0], ray_depths, lfsize)\n",
" lf_shear_g = depth_rendering(x[:, :, :, 1], ray_depths, lfsize)\n",
" lf_shear_b = depth_rendering(x[:, :, :, 2], ray_depths, lfsize)\n",
" lf_shear = tf.stack([lf_shear_r, lf_shear_g, lf_shear_b], axis=5)\n",
" #occlusion/non-Lambertian prediction network\n",
" shear_and_depth = tf.stack([lf_shear_r, lf_shear_g, lf_shear_b, tf.stop_gradient(ray_depths)], axis=5)\n",
" y = occlusions_network(shear_and_depth, lf_shear, lfsize, is_training, 'occlusions')\n",
" y = occlusions_network(shear_and_depth, lf_shear, lfsize, 'occlusions')\n",
" return ray_depths, lf_shear, y"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
"collapsed": true
},
"outputs": [],
"source": [
Expand Down Expand Up @@ -353,10 +381,19 @@
"source": [
"#spatial TV loss (l1 of spatial derivatives)\n",
"\n",
"def image_derivs(x, nc):\n",
" dy = tf.nn.depthwise_conv2d(x, tf.tile(tf.expand_dims(tf.expand_dims([[1.0, 2.0, 1.0], [0.0, 0.0, 0.0], [-1.0, -2.0, -1.0]], 2), 3), [1, 1, nc, 1]), strides=[1, 1, 1, 1], padding='VALID')\n",
" dx = tf.nn.depthwise_conv2d(x, tf.tile(tf.expand_dims(tf.expand_dims([[1.0, 0.0, -1.0], [2.0, 0.0, -2.0], [1.0, 0.0, -1.0]], 2), 3), [1, 1, nc, 1]), strides=[1, 1, 1, 1], padding='VALID')\n",
" return dy, dx\n",
"\n",
"def tv_loss(x):\n",
" temp = x[:,0:lfsize[2]-1,0:lfsize[3]-1,:,:]\n",
" dy = (x[:,1:lfsize[2],0:lfsize[3]-1,:,:] - temp)\n",
" dx = (x[:,0:lfsize[2]-1,1:lfsize[3],:,:] - temp)\n",
" b_sz = tf.shape(x)[0]\n",
" y_sz = tf.shape(x)[1]\n",
" x_sz = tf.shape(x)[2]\n",
" u_sz = lfsize[2]\n",
" v_sz = lfsize[3]\n",
" temp = tf.reshape(x, [b_sz, y_sz, x_sz, u_sz*v_sz])\n",
" dy, dx = image_derivs(temp, u_sz*v_sz)\n",
" l1 = tf.reduce_mean(tf.abs(dy)+tf.abs(dx))\n",
" return l1"
]
Expand Down Expand Up @@ -386,10 +423,11 @@
"#input pipeline\n",
"\n",
"def process_lf(lf, num_crops, lfsize, patchsize):\n",
" lf = normalize_lf(tf.image.adjust_gamma(tf.to_float(lf[:lfsize[0]*14, :lfsize[1]*14, :])/255.0, gamma=0.4))\n",
" gamma_val = tf.random_uniform(shape=[], minval=0.4, maxval=1.0) #random gamma for data augmentation (change at test time, I suggest 0.4-0.5)\n",
" lf = normalize_lf(tf.image.adjust_gamma(tf.to_float(lf[:lfsize[0]*14, :lfsize[1]*14, :])/255.0, gamma=gamma_val))\n",
" lf = tf.transpose(tf.reshape(lf, [lfsize[0], 14, lfsize[1], 14, 3]), [0, 2, 1, 3, 4])\n",
" lf = lf[:, :, (14/2)-(lfsize[2]/2):(14/2)+(lfsize[2]/2), (14/2)-(lfsize[3]/2):(14/2)+(lfsize[3]/2), :]\n",
" aif = lf[:, :, lfsize[2]/2, lfsize[3]/2, :]\n",
" lf = lf[:, :, (14//2)-(lfsize[2]//2):(14//2)+(lfsize[2]//2), (14//2)-(lfsize[3]//2):(14//2)+(lfsize[3]//2), :]\n",
" aif = lf[:, :, lfsize[2]//2, lfsize[3]//2, :]\n",
" aif_list = []\n",
" lf_list = []\n",
" for i in range(num_crops):\n",
Expand Down Expand Up @@ -420,28 +458,23 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [],
"source": [
"train_path = '/Users/pratul/Dropbox/LF_Flowers' #path to training examples\n",
"train_path = '/Users/pratulsrinivasan/Dropbox/LF_Flowers' #path to training examples\n",
"train_filenames = [os.path.join(train_path, f) for f in os.listdir(train_path) if not f.startswith('.')]\n",
"\n",
"aif_batch, lf_batch = input_pipeline(train_filenames, lfsize, patchsize, batchsize, num_crops)\n",
"is_training = tf.placeholder(tf.bool, [])"
"aif_batch, lf_batch = input_pipeline(train_filenames, lfsize, patchsize, batchsize, num_crops)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [],
"source": [
"#forward model\n",
"ray_depths, lf_shear, y = forward_model(aif_batch, lfsize, disp_mult, is_training)\n",
"ray_depths, lf_shear, y = forward_model(aif_batch, lfsize, disp_mult)\n",
"\n",
"#training losses to minimize\n",
"lam_tv = 0.01\n",
Expand All @@ -461,7 +494,7 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
"collapsed": true
},
"outputs": [],
"source": [
Expand All @@ -488,9 +521,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [],
"source": [
"logdir = 'logs/train/' #path to store logs\n",
Expand All @@ -499,18 +530,18 @@
"with tf.Session() as sess:\n",
" train_writer = tf.summary.FileWriter(logdir, sess.graph)\n",
" saver = tf.train.Saver()\n",
" sess.run(tf.global_variables_initializer()) #initialize variables (comment out if restoring from trained model)\n",
" #saver.restore(sess, 'checkpoints/model.ckpt-123999') #restore trained model\n",
" sess.run(tf.global_variables_initializer()) #initialize variables \n",
" \n",
" coord = tf.train.Coordinator() #coordinator for input queue threads\n",
" threads = tf.train.start_queue_runners(sess=sess, coord=coord) #start input queue threads\n",
" \n",
" for i in range(train_iters):\n",
" #training training step\n",
" _ = sess.run(train_step, feed_dict={is_training:True})\n",
" _ = sess.run(train_step)\n",
" #save training summaries\n",
" if (i+1) % 1 == 0: #can change the frequency of writing summaries for faster training\n",
" trainsummary = sess.run(merged, feed_dict={is_training:True})\n",
" if (i+1) % 1 == 0: #can change the frequency of writing summaries if desired\n",
" print('training step: ', i)\n",
" trainsummary = sess.run(merged)\n",
" train_writer.add_summary(trainsummary, i) \n",
" #save checkpoint\n",
" if (i+1) % 4000 == 0:\n",
Expand All @@ -534,23 +565,23 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"display_name": "Python 3",
"language": "python",
"name": "python2"
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.10"
"pygments_lexer": "ipython3",
"version": "3.6.3"
}
},
"nbformat": 4,
"nbformat_minor": 0
"nbformat_minor": 1
}

0 comments on commit b68bf83

Please sign in to comment.