Updates and bug fixes

pratulsrinivasan · Apr 19, 2018 · b68bf83 · b68bf83
1 parent 26c12a3
commit b68bf83
Showing 1 changed file with 105 additions and 74 deletions.
diff --git a/Local_Light_Field_Synthesis.ipynb b/Local_Light_Field_Synthesis.ipynb
@@ -3,9 +3,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "%matplotlib inline\n",
@@ -30,7 +28,7 @@
     "#parameters\n",
     "\n",
     "lfsize = [372, 540, 8, 8] #dimensions of Lytro light fields\n",
-    "batchsize = 1\n",
+    "batchsize = 1 #modify based on user's GPU memory\n",
     "patchsize = [192, 192] #spatial dimensions of training light fields\n",
     "disp_mult = 4.0 #max disparity between adjacent veiws\n",
     "num_crops = 4 #number of random spatial crops per light field for each input queue thread to push\n",
@@ -48,28 +46,53 @@
    "source": [
     "#functions for CNN layers\n",
     "\n",
-    "def weight_variable(w_shape, name):\n",
-    "    return tf.get_variable(name, w_shape, initializer=tf.contrib.layers.xavier_initializer_conv2d())\n",
+    "def weight_variable(w_shape):\n",
+    "    return tf.get_variable('weights', w_shape, initializer=tf.contrib.layers.xavier_initializer_conv2d())\n",
     "\n",
     "def bias_variable(b_shape, init_bias=0.0):\n",
     "    return tf.get_variable('bias', b_shape, initializer=tf.constant_initializer(init_bias))\n",
     "\n",
-    "#standard atrous layer\n",
-    "def cnn_layer(input_tensor, w_shape, b_shape, layer_name, is_training, rate=1, padding_type='SAME'):\n",
+    "def cnn_layer(input_tensor, w_shape, b_shape, layer_name, rate=1, ds=1):\n",
+    "    with tf.variable_scope(layer_name):\n",
+    "        W = weight_variable(w_shape)\n",
+    "        pad_amt_0 = rate * (w_shape[0] - 1)//2\n",
+    "        pad_amt_1 = rate * (w_shape[1] - 1)//2\n",
+    "        input_tensor = tf.pad(input_tensor, [[0,0],[pad_amt_0,pad_amt_0],[pad_amt_1,pad_amt_1],[0,0]], mode='SYMMETRIC')\n",
+    "        h = tf.nn.convolution(input_tensor, W, strides=[ds, ds], padding='VALID', dilation_rate=[rate, rate], name=layer_name + '_conv')\n",
+    "        h = tf.contrib.layers.instance_norm(h + bias_variable(b_shape))\n",
+    "        h = tf.nn.leaky_relu(h)\n",
+    "        return h\n",
+    "    \n",
+    "def cnn_layer_plain(input_tensor, w_shape, b_shape, layer_name, rate=1, ds=1):\n",
     "    with tf.variable_scope(layer_name):\n",
-    "        W = weight_variable(w_shape, '_weights')\n",
-    "        h = tf.nn.atrous_conv2d(input_tensor, W, rate, padding=padding_type, name=layer_name + '_conv')\n",
+    "        W = weight_variable(w_shape)\n",
+    "        pad_amt_0 = rate * (w_shape[0] - 1)//2\n",
+    "        pad_amt_1 = rate * (w_shape[1] - 1)//2\n",
+    "        input_tensor = tf.pad(input_tensor, [[0,0],[pad_amt_0,pad_amt_0],[pad_amt_1,pad_amt_1],[0,0]], mode='SYMMETRIC')\n",
+    "        h = tf.nn.convolution(input_tensor, W, strides=[ds, ds], padding='VALID', dilation_rate=[rate, rate], name=layer_name + '_conv')\n",
     "        h = h + bias_variable(b_shape)\n",
-    "        h = tf.nn.elu(h)\n",
-    "        h = tf.contrib.layers.batch_norm(h, scale=True, updates_collections=None, \n",
-    "                                             is_training=is_training, scope=layer_name + '_bn')\n",
     "        return h\n",
-    "\n",
-    "#layer with no normalization or activation\n",
-    "def cnn_layer_no_bn(input_tensor, w_shape, b_shape, layer_name, rate=1, padding_type='SAME'):\n",
+    "    \n",
+    "def cnn_layer_3D(input_tensor, w_shape, b_shape, layer_name, rate=1, ds=1):\n",
+    "    with tf.variable_scope(layer_name):\n",
+    "        W = weight_variable(w_shape)\n",
+    "        pad_amt_0 = rate * (w_shape[0] - 1)//2\n",
+    "        pad_amt_1 = rate * (w_shape[1] - 1)//2\n",
+    "        pad_amt_2 = rate * (w_shape[2] - 1)//2\n",
+    "        input_tensor = tf.pad(input_tensor, [[0,0],[pad_amt_0,pad_amt_0],[pad_amt_1,pad_amt_1],[pad_amt_2,pad_amt_2],[0,0]], mode='SYMMETRIC')\n",
+    "        h = tf.nn.convolution(input_tensor, W, strides=[ds, ds, ds], padding='VALID', dilation_rate=[rate, rate, rate], name=layer_name + '_conv')\n",
+    "        h = tf.contrib.layers.instance_norm(h + bias_variable(b_shape))\n",
+    "        h = tf.nn.leaky_relu(h)\n",
+    "        return h\n",
+    "    \n",
+    "def cnn_layer_3D_plain(input_tensor, w_shape, b_shape, layer_name, rate=1, ds=1):\n",
     "    with tf.variable_scope(layer_name):\n",
-    "        W = weight_variable(w_shape, '_weights')\n",
-    "        h = tf.nn.atrous_conv2d(input_tensor, W, rate, padding=padding_type, name=layer_name + '_conv')\n",
+    "        W = weight_variable(w_shape)\n",
+    "        pad_amt_0 = rate * (w_shape[0] - 1)//2\n",
+    "        pad_amt_1 = rate * (w_shape[1] - 1)//2\n",
+    "        pad_amt_2 = rate * (w_shape[2] - 1)//2\n",
+    "        input_tensor = tf.pad(input_tensor, [[0,0],[pad_amt_0,pad_amt_0],[pad_amt_1,pad_amt_1],[pad_amt_2,pad_amt_2],[0,0]], mode='SYMMETRIC')\n",
+    "        h = tf.nn.convolution(input_tensor, W, strides=[ds, ds, ds], padding='VALID', dilation_rate=[rate, rate, rate], name=layer_name + '_conv')\n",
     "        h = h + bias_variable(b_shape)\n",
     "        return h"
    ]
@@ -78,13 +101,13 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "collapsed": false
+    "collapsed": true
    },
    "outputs": [],
    "source": [
     "#network to predict ray depths from input image\n",
     "\n",
-    "def depth_network(x, lfsize, disp_mult, is_training, name):\n",
+    "def depth_network(x, lfsize, disp_mult, name):\n",
     "    with tf.variable_scope(name):\n",
     "        \n",
     "        b_sz = tf.shape(x)[0]\n",
@@ -93,16 +116,17 @@
     "        v_sz = lfsize[2]\n",
     "        u_sz = lfsize[3]\n",
     "        \n",
-    "        c1 = cnn_layer(x, [3, 3, 3, 16], [16], 'c1', is_training)\n",
-    "        c2 = cnn_layer(c1, [3, 3, 16, 64], [64], 'c2', is_training)\n",
-    "        c3 = cnn_layer(c2, [3, 3, 64, 128], [128], 'c3', is_training)\n",
-    "        c4 = cnn_layer(c3, [3, 3, 128, 128], [128], 'c4', is_training, rate=2)\n",
-    "        c5 = cnn_layer(c4, [3, 3, 128, 128], [128], 'c5', is_training, rate=4)\n",
-    "        c6 = cnn_layer(c5, [3, 3, 128, 128], [128], 'c6', is_training, rate=8)\n",
-    "        c7 = cnn_layer(c6, [3, 3, 128, 128], [128], 'c7', is_training, rate=16)\n",
-    "        c8 = cnn_layer(c7, [3, 3, 128, 128], [128], 'c8', is_training)\n",
-    "        c9 = cnn_layer(c8, [3, 3, 128, lfsize[2]*lfsize[3]], [lfsize[2]*lfsize[3]], 'c9', is_training)\n",
-    "        c10 = disp_mult*tf.tanh(cnn_layer_no_bn(c9, [3, 3, lfsize[2]*lfsize[3], lfsize[2]*lfsize[3]], [lfsize[2]*lfsize[3]], 'c10'))\n",
+    "        c1 = cnn_layer(x, [3, 3, 3, 16], [16], 'c1')\n",
+    "        c2 = cnn_layer(c1, [3, 3, 16, 64], [64], 'c2')\n",
+    "        c3 = cnn_layer(c2, [3, 3, 64, 128], [128], 'c3')\n",
+    "        c4 = cnn_layer(c3, [3, 3, 128, 128], [128], 'c4', rate=2)\n",
+    "        c5 = cnn_layer(c4, [3, 3, 128, 128], [128], 'c5', rate=4)\n",
+    "        c6 = cnn_layer(c5, [3, 3, 128, 128], [128], 'c6', rate=8)\n",
+    "        c7 = cnn_layer(c6, [3, 3, 128, 128], [128], 'c7', rate=16)\n",
+    "        c8 = cnn_layer(c7, [3, 3, 128, 128], [128], 'c8')\n",
+    "        c9 = cnn_layer(c8, [3, 3, 128, lfsize[2]*lfsize[3]], [lfsize[2]*lfsize[3]], 'c9')\n",
+    "        c10 = disp_mult*tf.tanh(cnn_layer_plain(c9, [3, 3, lfsize[2]*lfsize[3], lfsize[2]*lfsize[3]], \\\n",
+    "                                                [lfsize[2]*lfsize[3]], 'c10'))\n",
     "        \n",
     "        return tf.reshape(c10, [b_sz, y_sz, x_sz, v_sz, u_sz])"
    ]
@@ -117,7 +141,7 @@
    "source": [
     "#network for refining Lambertian light field (predict occluded rays and non-Lambertian effects)\n",
     "\n",
-    "def occlusions_network(x, shear, lfsize, is_training, name):\n",
+    "def occlusions_network(x, shear, lfsize, name):\n",
     "    with tf.variable_scope(name):\n",
     "        \n",
     "        b_sz = tf.shape(x)[0]\n",
@@ -126,15 +150,19 @@
     "        v_sz = lfsize[2]\n",
     "        u_sz = lfsize[3]\n",
     "        \n",
-    "        x = tf.reshape(x, [b_sz, y_sz, x_sz, v_sz*u_sz*4])\n",
-    "        shear = tf.reshape(shear, [b_sz, y_sz, x_sz, v_sz*u_sz*3])\n",
-    "        c1 = cnn_layer(x, [3, 3, v_sz*u_sz*4, 128], [128], 'c1', is_training)\n",
-    "        c2 = cnn_layer(c1, [3, 3, 128, 128], [128], 'c2', is_training)\n",
-    "        c3 = cnn_layer(c2, [3, 3, 128, 128], [128], 'c3', is_training)\n",
-    "        c4 = cnn_layer(c3, [3, 3, 128, v_sz*u_sz*3], [v_sz*u_sz*3], 'c4', is_training)\n",
-    "        c5 = tf.tanh(cnn_layer_no_bn(c4, [3, 3, v_sz*u_sz*3, v_sz*u_sz*3], [v_sz*u_sz*3], 'c5') + shear)\n",
+    "        x = tf.transpose(tf.reshape(tf.transpose(x, perm=[0, 5, 1, 2, 3, 4]), \\\n",
+    "                                    [b_sz, 4, y_sz, x_sz, u_sz*v_sz]), perm=[0, 4, 2, 3, 1])\n",
+    "        \n",
+    "        c1 = cnn_layer_3D(x, [3, 3, 3, 4, 8], [8], 'c1')\n",
+    "        c2 = cnn_layer_3D(c1, [3, 3, 3, 8, 8], [8], 'c2')\n",
+    "        c3 = cnn_layer_3D(c2, [3, 3, 3, 8, 8], [8], 'c3')\n",
+    "        c4 = cnn_layer_3D(c3, [3, 3, 3, 8, 8], [8], 'c4')\n",
+    "        c5 = tf.tanh(cnn_layer_3D_plain(c4, [3, 3, 3, 8, 3], [3], 'c5'))\n",
     "        \n",
-    "        return tf.reshape(c5, [b_sz, y_sz, x_sz, v_sz, u_sz, 3])"
+    "        output = tf.transpose(tf.reshape(tf.transpose(c5, perm=[0, 4, 2, 3, 1]), \\\n",
+    "                                         [b_sz, 3, y_sz, x_sz, v_sz, u_sz]), perm=[0, 2, 3, 4, 5, 1]) + shear\n",
+    "                \n",
+    "        return output"
    ]
   },
   {
@@ -147,26 +175,26 @@
    "source": [
     "#full forward model\n",
     "\n",
-    "def forward_model(x, lfsize, disp_mult, is_training):\n",
+    "def forward_model(x, lfsize, disp_mult):\n",
     "    with tf.variable_scope('forward_model') as scope:\n",
     "        #predict ray depths from input image\n",
-    "        ray_depths = depth_network(x, lfsize, disp_mult, is_training, 'ray_depths')\n",
+    "        ray_depths = depth_network(x, lfsize, disp_mult, 'ray_depths')\n",
     "        #shear input image by predicted ray depths to render Lambertian light field\n",
     "        lf_shear_r = depth_rendering(x[:, :, :, 0], ray_depths, lfsize)\n",
     "        lf_shear_g = depth_rendering(x[:, :, :, 1], ray_depths, lfsize)\n",
     "        lf_shear_b = depth_rendering(x[:, :, :, 2], ray_depths, lfsize)\n",
     "        lf_shear = tf.stack([lf_shear_r, lf_shear_g, lf_shear_b], axis=5)\n",
     "        #occlusion/non-Lambertian prediction network\n",
     "        shear_and_depth = tf.stack([lf_shear_r, lf_shear_g, lf_shear_b, tf.stop_gradient(ray_depths)], axis=5)\n",
-    "        y = occlusions_network(shear_and_depth, lf_shear, lfsize, is_training, 'occlusions')\n",
+    "        y = occlusions_network(shear_and_depth, lf_shear, lfsize, 'occlusions')\n",
     "        return ray_depths, lf_shear, y"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "collapsed": false
+    "collapsed": true
    },
    "outputs": [],
    "source": [
@@ -353,10 +381,19 @@
    "source": [
     "#spatial TV loss (l1 of spatial derivatives)\n",
     "\n",
+    "def image_derivs(x, nc):\n",
+    "    dy = tf.nn.depthwise_conv2d(x, tf.tile(tf.expand_dims(tf.expand_dims([[1.0, 2.0, 1.0], [0.0, 0.0, 0.0], [-1.0, -2.0, -1.0]], 2), 3), [1, 1, nc, 1]), strides=[1, 1, 1, 1], padding='VALID')\n",
+    "    dx = tf.nn.depthwise_conv2d(x, tf.tile(tf.expand_dims(tf.expand_dims([[1.0, 0.0, -1.0], [2.0, 0.0, -2.0], [1.0, 0.0, -1.0]], 2), 3), [1, 1, nc, 1]), strides=[1, 1, 1, 1], padding='VALID')\n",
+    "    return dy, dx\n",
+    "\n",
     "def tv_loss(x):\n",
-    "    temp = x[:,0:lfsize[2]-1,0:lfsize[3]-1,:,:]\n",
-    "    dy = (x[:,1:lfsize[2],0:lfsize[3]-1,:,:] - temp)\n",
-    "    dx = (x[:,0:lfsize[2]-1,1:lfsize[3],:,:] - temp)\n",
+    "    b_sz = tf.shape(x)[0]\n",
+    "    y_sz = tf.shape(x)[1]\n",
+    "    x_sz = tf.shape(x)[2]\n",
+    "    u_sz = lfsize[2]\n",
+    "    v_sz = lfsize[3]\n",
+    "    temp = tf.reshape(x, [b_sz, y_sz, x_sz, u_sz*v_sz])\n",
+    "    dy, dx = image_derivs(temp, u_sz*v_sz)\n",
     "    l1 = tf.reduce_mean(tf.abs(dy)+tf.abs(dx))\n",
     "    return l1"
    ]
@@ -386,10 +423,11 @@
     "#input pipeline\n",
     "\n",
     "def process_lf(lf, num_crops, lfsize, patchsize):\n",
-    "    lf = normalize_lf(tf.image.adjust_gamma(tf.to_float(lf[:lfsize[0]*14, :lfsize[1]*14, :])/255.0, gamma=0.4))\n",
+    "    gamma_val = tf.random_uniform(shape=[], minval=0.4, maxval=1.0) #random gamma for data augmentation (change at test time, I suggest 0.4-0.5)\n",
+    "    lf = normalize_lf(tf.image.adjust_gamma(tf.to_float(lf[:lfsize[0]*14, :lfsize[1]*14, :])/255.0, gamma=gamma_val))\n",
     "    lf = tf.transpose(tf.reshape(lf, [lfsize[0], 14, lfsize[1], 14, 3]), [0, 2, 1, 3, 4])\n",
-    "    lf = lf[:, :, (14/2)-(lfsize[2]/2):(14/2)+(lfsize[2]/2), (14/2)-(lfsize[3]/2):(14/2)+(lfsize[3]/2), :]\n",
-    "    aif = lf[:, :, lfsize[2]/2, lfsize[3]/2, :]\n",
+    "    lf = lf[:, :, (14//2)-(lfsize[2]//2):(14//2)+(lfsize[2]//2), (14//2)-(lfsize[3]//2):(14//2)+(lfsize[3]//2), :]\n",
+    "    aif = lf[:, :, lfsize[2]//2, lfsize[3]//2, :]\n",
     "    aif_list = []\n",
     "    lf_list = []\n",
     "    for i in range(num_crops):\n",
@@ -420,28 +458,23 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
-    "train_path = '/Users/pratul/Dropbox/LF_Flowers' #path to training examples\n",
+    "train_path = '/Users/pratulsrinivasan/Dropbox/LF_Flowers' #path to training examples\n",
     "train_filenames = [os.path.join(train_path, f) for f in os.listdir(train_path) if not f.startswith('.')]\n",
     "\n",
-    "aif_batch, lf_batch = input_pipeline(train_filenames, lfsize, patchsize, batchsize, num_crops)\n",
-    "is_training = tf.placeholder(tf.bool, [])"
+    "aif_batch, lf_batch = input_pipeline(train_filenames, lfsize, patchsize, batchsize, num_crops)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "#forward model\n",
-    "ray_depths, lf_shear, y = forward_model(aif_batch, lfsize, disp_mult, is_training)\n",
+    "ray_depths, lf_shear, y = forward_model(aif_batch, lfsize, disp_mult)\n",
     "\n",
     "#training losses to minimize\n",
     "lam_tv = 0.01\n",
@@ -461,7 +494,7 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "collapsed": false
+    "collapsed": true
    },
    "outputs": [],
    "source": [
@@ -488,9 +521,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "logdir = 'logs/train/' #path to store logs\n",
@@ -499,18 +530,18 @@
     "with tf.Session() as sess:\n",
     "    train_writer = tf.summary.FileWriter(logdir, sess.graph)\n",
     "    saver = tf.train.Saver()\n",
-    "    sess.run(tf.global_variables_initializer()) #initialize variables (comment out if restoring from trained model)\n",
-    "    #saver.restore(sess, 'checkpoints/model.ckpt-123999') #restore trained model\n",
+    "    sess.run(tf.global_variables_initializer()) #initialize variables \n",
     "    \n",
     "    coord = tf.train.Coordinator() #coordinator for input queue threads\n",
     "    threads = tf.train.start_queue_runners(sess=sess, coord=coord) #start input queue threads\n",
     "    \n",
     "    for i in range(train_iters):\n",
     "        #training training step\n",
-    "        _ = sess.run(train_step, feed_dict={is_training:True})\n",
+    "        _ = sess.run(train_step)\n",
     "        #save training summaries\n",
-    "        if (i+1) % 1 == 0: #can change the frequency of writing summaries for faster training\n",
-    "            trainsummary = sess.run(merged, feed_dict={is_training:True})\n",
+    "        if (i+1) % 1 == 0: #can change the frequency of writing summaries if desired\n",
+    "            print('training step: ', i)\n",
+    "            trainsummary = sess.run(merged)\n",
     "            train_writer.add_summary(trainsummary, i)  \n",
     "        #save checkpoint\n",
     "        if (i+1) % 4000 == 0:\n",
@@ -534,23 +565,23 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 2",
+   "display_name": "Python 3",
    "language": "python",
-   "name": "python2"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
     "name": "ipython",
-    "version": 2
+    "version": 3
    },
    "file_extension": ".py",
    "mimetype": "text/x-python",
    "name": "python",
    "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.10"
+   "pygments_lexer": "ipython3",
+   "version": "3.6.3"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 0
+ "nbformat_minor": 1
 }