with tf.Session() as sess: # 區塊1,模型初始化 # Create model. print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) # 透過 create_model() 方法創建一個 seq2seq_model model = create_model(sess, False) # 區塊2,讀入資料 # Read data into buckets and compute their sizes. print ("Reading development and training data (limit: %d)." % FLAGS.max_train_data_size) # read_data 函數讀取 train, dev 的路徑, dev_set = read_data(from_dev, to_dev) train_set = read_data(from_train, to_train, FLAGS.max_train_data_size) train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))] train_total_size = float(sum(train_bucket_sizes)) train_buckets_scale = [sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes))] whileTrue: # 區塊3,建立 batch # Choose a bucket according to data distribution. We pick a random number # in [0, 1] and use the corresponding interval in train_buckets_scale. random_number_01 = np.random.random_sample() bucket_id = min([i for i in xrange(len(train_buckets_scale)) if train_buckets_scale[i] > random_number_01]) # Get a batch and make a step. start_time = time.time() encoder_inputs, decoder_inputs, target_weights = model.get_batch( train_set, bucket_id) # 區塊4,訓練 _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, False)
defget_batch(self, data, bucket_id): # 根據傳進來的 bucket_id 決定這次的 encoder, deocder size,例如 5, 10 encoder_size, decoder_size = self.buckets[bucket_id] encoder_inputs, decoder_inputs = [], [] # Get a random batch of encoder and decoder inputs from data, # pad them if needed, reverse encoder inputs and add GO to decoder. for _ in xrange(self.batch_size): # 前面提過 data 是一個長度為4的list,data[i] 存放長度符合 bucket[i] 的資料 encoder_input, decoder_input = random.choice(data[bucket_id]) # Encoder inputs are padded and then reversed. encoder_pad = [data_utils.PAD_ID] * (encoder_size - len(encoder_input)) encoder_inputs.append(list(reversed(encoder_input + encoder_pad))) # Decoder inputs get an extra "GO" symbol, and are padded then. decoder_pad_size = decoder_size - len(decoder_input) - 1 decoder_inputs.append([data_utils.GO_ID] + decoder_input + [data_utils.PAD_ID] * decoder_pad_size) # Now we create batch-major vectors from the data selected above. batch_encoder_inputs, batch_decoder_inputs, batch_weights = [], [], [] # Batch encoder inputs are just re-indexed encoder_inputs. for length_idx in xrange(encoder_size): batch_encoder_inputs.append( np.array([encoder_inputs[batch_idx][length_idx] for batch_idx in xrange(self.batch_size)], dtype=np.int32)) # Batch decoder inputs are re-indexed decoder_inputs, we create weights. for length_idx in xrange(decoder_size): batch_decoder_inputs.append( np.array([decoder_inputs[batch_idx][length_idx] for batch_idx in xrange(self.batch_size)], dtype=np.int32)) # Create target_weights to be 0 for targets that are padding. # 這個 weights 是給模型訓練用的,有目標值的地方為1,其他為0 # 有目標值的地方,指的是 decoder_input 平移1格的結果 batch_weight = np.ones(self.batch_size, dtype=np.float32) for batch_idx in xrange(self.batch_size): # We set weight to 0 if the corresponding target is a PAD symbol. # The corresponding target is decoder_input shifted by 1 forward. if length_idx < decoder_size - 1: target = decoder_inputs[batch_idx][length_idx + 1] if length_idx == decoder_size - 1or target == data_utils.PAD_ID: batch_weight[batch_idx] = 0.0 batch_weights.append(batch_weight) return batch_encoder_inputs, batch_decoder_inputs, batch_weights