Friday, January 4, 2019

[TensorFlow] How to write op with gradient in python?

Recently for some reasons, I studied the Domain-Adversarial Training of Neural Networks and it can be downloaded from http://jmlr.org/papers/volume17/15-239/15-239.pdf

In this paper, there is the key point that we should implement "Gradient Reversal Layer" for Discriminator to use it to connect the feature extractor. I found the source to implement it by replacing Identity op's gradient function as follows:



from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf
from tensorflow.python.framework import ops


class FlipGradientBuilder(object):
    def __init__(self):
        self.num_calls = 0

    def __call__(self, x, l=1.0):
        grad_name = "FlipGradient%d" % self.num_calls
        @ops.RegisterGradient(grad_name)
        def _flip_gradients(op, grad):
            return [tf.negative(grad) * l]
        
        g = tf.get_default_graph()
        with g.gradient_override_map({"Identity": grad_name}):
            y = tf.identity(x)
            
        self.num_calls += 1
        return y
    
flip_gradient = FlipGradientBuilder()

So, how to use this Gradient Reversal Layer? I mark the line in red color as follows:
(The code is from https://github.com/pumpikano/tf-dann/blob/master/Blobs-DANN.ipynb )

import tensorflow as tf
from utils import *
from flip_gradient import flip_gradient

shallow_domain_classifier=False
batch_size = 16

X = tf.placeholder(tf.float32, [None, 2], name='X') # Input data
Y_ind = tf.placeholder(tf.int32, [None], name='Y_ind')  # Class index
D_ind = tf.placeholder(tf.int32, [None], name='D_ind')  # Domain index
train = tf.placeholder(tf.bool, [], name='train')       # Switch for routing data to class predictor
l = tf.placeholder(tf.float32, [], name='l')        # Gradient reversal scaler

Y = tf.one_hot(Y_ind, 2)
D = tf.one_hot(D_ind, 2)

# Feature extractor - single layer
W0 = weight_variable([2, 15])
b0 = bias_variable([15])
F = tf.nn.relu(tf.matmul(X, W0) + b0, name='feature')

# Label predictor - single layer
f = tf.cond(train, lambda: tf.slice(F, [0, 0], [batch_size // 2, -1]), lambda: F)
y = tf.cond(train, lambda: tf.slice(Y, [0, 0], [batch_size // 2, -1]), lambda: Y)

W1 = weight_variable([15, 2])
b1 = bias_variable([2])
p_logit = tf.matmul(f, W1) + b1
p = tf.nn.softmax(p_logit)
p_loss = tf.nn.softmax_cross_entropy_with_logits(logits=p_logit, labels=y)

# Domain predictor - shallow
f_ = flip_gradient(F, l)

if shallow_domain_classifier:
    W2 = weight_variable([15, 2])
    b2 = bias_variable([2])
    d_logit = tf.matmul(f_, W2) + b2
    d = tf.nn.softmax(d_logit)
    d_loss = tf.nn.softmax_cross_entropy_with_logits(logits=d_logit, labels=D)

else:
    W2 = weight_variable([15, 8])
    b2 = bias_variable([8])
    h2 = tf.nn.relu(tf.matmul(f_, W2) + b2)

    W3 = weight_variable([8, 2])
    b3 = bias_variable([2])
    d_logit = tf.matmul(h2, W3) + b3
    d = tf.nn.softmax(d_logit)
    d_loss = tf.nn.softmax_cross_entropy_with_logits(logits=d_logit, labels=D)


# Optimization
pred_loss = tf.reduce_sum(p_loss, name='pred_loss')
domain_loss = tf.reduce_sum(d_loss, name='domain_loss')
total_loss = tf.add(pred_loss, domain_loss, name='total_loss')

pred_train_op = tf.train.AdamOptimizer().minimize(pred_loss, name='pred_train_op')
domain_train_op = tf.train.AdamOptimizer().minimize(domain_loss, name='domain_train_op')
dann_train_op = tf.train.AdamOptimizer().minimize(total_loss, name='dann_train_op')

# Evaluation
p_acc = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(y, 1), tf.argmax(p, 1)), tf.float32), name='p_acc')
d_acc = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(D, 1), tf.argmax(d, 1)), tf.float32), name='d_acc')
I also capture the computation graph from tensorboard to prove it:
You can see that the Identity op's gradient function is replaced by ours "FlipGradient0"


P.S: For more in details
https://stackoverflow.com/questions/39048984/tensorflow-how-to-write-op-with-gradient-in-python

No comments: