tf.variable_scope - HAN ValueError Analysis

to be continued

tf.variable_scope | TensorFlow

解决ValueError: Trying to share variable rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel

2018.2.13

Comparing RNNCell in API r1.0 and in API r1.6

This module provides a number of basic commonly used RNN cells, such as LSTM (Long Short Term Memory) or GRU (Gated Recurrent Unit), and a number of operators that allow add dropouts, projections, or embeddings for inputs. Constructing multi-layer cells is supported by the class MultiRNNCell, or by calling the rnn ops several times. Every RNNCell must have the properties below and implement __call__ with the following signature.

tf.contrib.rnn.RNNCell - API r1.0

Every RNNCell must have the properties below and implement call with the signature (output, next_state) = call(input, state). The optional third input argument, scope, is allowed for backwards compatibility purposes; but should be left off for new subclasses.

tf.contrib.rnn.RNNCell - API r1.6

  • I guess I should def new call func to replace old __call__ func and return (new_h, new_c) according to API r1.6.
    • x input
    • new_h output
    • c, new_c state

ValueError:

1
ValueError: Trying to share variable tcm/word/fw/multi_rnn_cell/cell_0/bn_lstm/W_xh, but specified shape (100, 320) and found shape (200, 320).

Traceback:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
C:\Python36\python.exe D:/hierarchical-attention-networks-master/worker.py --mode=train --device=/gpu:0 --batch-size=30
epoch 1
data loaded
2018-03-12 21:59:50.508320: I C:\tf_jenkins\workspace\rel-win\M\windows\PY\36\tensorflow\core\platform\cpu_feature_guard.cc:140] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2
Traceback (most recent call last):
File "D:/hierarchical-attention-networks-master/worker.py", line 207, in <module>
main()
File "D:/hierarchical-attention-networks-master/worker.py", line 202, in main
train()
File "D:/hierarchical-attention-networks-master/worker.py", line 153, in train
model, saver = model_fn(s)
File "D:/hierarchical-attention-networks-master/worker.py", line 86, in HAN_model_1
is_training=is_training,
File "D:\hierarchical-attention-networks-master\HAN_model.py", line 67, in __init__
self._init_body(scope)
File "D:\hierarchical-attention-networks-master\HAN_model.py", line 140, in _init_body
self.sentence_cell, self.sentence_cell, sentence_inputs, self.sentence_lengths, scope=scope)
File "D:\hierarchical-attention-networks-master\model_components.py", line 24, in bidirectional_rnn
scope=scope))
File "C:\Python36\lib\site-packages\tensorflow\python\ops\rnn.py", line 416, in bidirectional_dynamic_rnn
time_major=time_major, scope=fw_scope)
File "C:\Python36\lib\site-packages\tensorflow\python\ops\rnn.py", line 632, in dynamic_rnn
dtype=dtype)
File "C:\Python36\lib\site-packages\tensorflow\python\ops\rnn.py", line 829, in _dynamic_rnn_loop
swap_memory=swap_memory)
File "C:\Python36\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 3096, in while_loop
result = loop_context.BuildLoop(cond, body, loop_vars, shape_invariants)
File "C:\Python36\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 2874, in BuildLoop
pred, body, original_loop_vars, loop_vars, shape_invariants)
File "C:\Python36\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 2814, in _BuildLoop
body_result = body(*packed_vars_for_body)
File "C:\Python36\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 3075, in <lambda>
body = lambda i, lv: (i + 1, orig_body(*lv))
File "C:\Python36\lib\site-packages\tensorflow\python\ops\rnn.py", line 798, in _time_step
skip_conditionals=True)
File "C:\Python36\lib\site-packages\tensorflow\python\ops\rnn.py", line 249, in _rnn_step
new_output, new_state = call_cell()
File "C:\Python36\lib\site-packages\tensorflow\python\ops\rnn.py", line 786, in <lambda>
call_cell = lambda: cell(input_t, state)
File "C:\Python36\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 190, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "C:\Python36\lib\site-packages\tensorflow\python\layers\base.py", line 696, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "C:\Python36\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 1234, in call
cur_inp, new_state = cell(cur_inp, cur_state)
File "D:\hierarchical-attention-networks-master\bn_lstm.py", line 74, in __call__
initializer=orthogonal_initializer())
File "C:\Python36\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 1297, in get_variable
constraint=constraint)
File "C:\Python36\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 1093, in get_variable
constraint=constraint)
File "C:\Python36\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 431, in get_variable
return custom_getter(**custom_getter_kwargs)
File "C:\Python36\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 193, in _rnn_get_variable
variable = getter(*args, **kwargs)
File "C:\Python36\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 408, in _true_getter
use_resource=use_resource, constraint=constraint)
File "C:\Python36\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 752, in _get_single_variable
found_var.get_shape()))
ValueError: Trying to share variable tcm/word/fw/multi_rnn_cell/cell_0/bn_lstm/W_xh, but specified shape (100, 320) and found shape (200, 320).

in HAN_model.py below:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
class HANClassifierModel():
""" Implementation of document classification model described in
`Hierarchical Attention Networks for Document Classification (Yang et al., 2016)`
(https://www.cs.cmu.edu/~diyiy/docs/naacl16.pdf)"""

def __init__(self,
vocab_size,
embedding_size,
classes,
word_cell,
sentence_cell,
word_output_size,
sentence_output_size,
max_grad_norm,
dropout_keep_proba,
is_training=None,
learning_rate=1e-4,
device='/cpu:0',
scope=None):
self.vocab_size = vocab_size
self.embedding_size = embedding_size
self.classes = classes
self.word_cell = word_cell
self.word_output_size = word_output_size
self.sentence_cell = sentence_cell
self.sentence_output_size = sentence_output_size
self.max_grad_norm = max_grad_norm
self.dropout_keep_proba = dropout_keep_proba

with tf.variable_scope(scope or 'tcm') as scope:
self.global_step = tf.Variable(0, name='global_step', trainable=False)

if is_training is not None:
self.is_training = is_training
else:
self.is_training = tf.placeholder(dtype=tf.bool, name='is_training')

self.sample_weights = tf.placeholder(shape=(None,), dtype=tf.float32, name='sample_weights')

# [document x sentence x word]
self.inputs = tf.placeholder(shape=(None, None, None), dtype=tf.int32, name='inputs')

# [document x sentence]
self.word_lengths = tf.placeholder(shape=(None, None), dtype=tf.int32, name='word_lengths')

# [document]
self.sentence_lengths = tf.placeholder(shape=(None,), dtype=tf.int32, name='sentence_lengths')

# [document]
self.labels = tf.placeholder(shape=(None,), dtype=tf.int32, name='labels')

(self.document_size,
self.sentence_size,
self.word_size) = tf.unstack(tf.shape(self.inputs))

self._init_embedding(scope)

# embeddings cannot be placed on GPU
with tf.device(device):
self._init_body(scope)

with tf.variable_scope('train'):
self.cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.labels, logits=self.logits)

self.loss = tf.reduce_mean(tf.multiply(self.cross_entropy, self.sample_weights))
tf.summary.scalar('loss', self.loss)

self.accuracy = tf.reduce_mean(tf.cast(tf.nn.in_top_k(self.logits, self.labels, 1), tf.float32))
tf.summary.scalar('accuracy', self.accuracy)

tvars = tf.trainable_variables()

grads, global_norm = tf.clip_by_global_norm(
tf.gradients(self.loss, tvars),
self.max_grad_norm)
tf.summary.scalar('global_grad_norm', global_norm)

opt = tf.train.AdamOptimizer(learning_rate)

self.train_op = opt.apply_gradients(
zip(grads, tvars), name='train_op',
global_step=self.global_step)

self.summary_op = tf.summary.merge_all()

in bn_lstm.py below:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
class BNLSTMCell(RNNCell):
"""Batch normalized LSTM as described in http://arxiv.org/abs/1603.09025"""
def __init__(self, num_units, training):
self.num_units = num_units
self.training = training

@property
def state_size(self):
return (self.num_units, self.num_units)

@property
def output_size(self):
return self.num_units

def __call__(self, x, state, scope=None):
with tf.variable_scope(scope or 'bn_lstm'):
c, h = state

x_size = x.get_shape().as_list()[1]
W_xh = tf.get_variable('W_xh', # ValueError!!!
[x_size, 4 * self.num_units],
initializer=orthogonal_initializer())
W_hh = tf.get_variable('W_hh',
[self.num_units, 4 * self.num_units],
initializer=bn_lstm_identity_initializer(0.95))
bias = tf.get_variable('bias', [4 * self.num_units])

xh = tf.matmul(x, W_xh)
hh = tf.matmul(h, W_hh)

bn_xh = batch_norm(xh, 'xh', self.training)
bn_hh = batch_norm(hh, 'hh', self.training)

hidden = bn_xh + bn_hh + bias

i, j, f, o = tf.split(hidden, 4, axis=1)

new_c = c * tf.sigmoid(f) + tf.sigmoid(i) * tf.tanh(j)
bn_new_c = batch_norm(new_c, 'c', self.training)

new_h = tf.tanh(bn_new_c) * tf.sigmoid(o)

return new_h, (new_c, new_h)

I guess Variable W_xh in tcm/word/fw/multi_rnn_cell/cell_0/bn_lstm/W_xh is in conflict with value in so-called specified shape