defremove_conflicting_examples(data): correct_words, incorrect_words = np.array(data)[:,0], np.array(data)[:,1] correct_vocab, incorrect_vocab = list(set(correct_words)), list(set(incorrect_words)) try: i=0 while(i<len(data)): if data[i][0] in incorrect_vocab or data[i][1] in correct_vocab: del data[i] i-=1 i+=1 except: pass return data
去掉一些相互冲突的example,梳理相关的数据
1 2 3 4 5
defleft_pad(list_): max_seq_len = 20 ans = np.zeros((max_seq_len), dtype=int) ans[:len(list_)] = np.array(list_[:max_seq_len] ) return ans
Why does everyone need mobile devs? Because the web is slowly dying. I have friends — well, probably ex-friends now — in just about every org at Google, who used to point me at their gloomy graphs, and it doesn’t matter how you slice it, the web’s in a steady decline as the whole world moves to mobile.
At Google, most engineers are too snooty to do mobile or web programming. “I don’t do frontend”, they proclaim with maximal snootiness.
There’s a phenomenon there that I like to call the “DAG of Disdain”, wherein DAG means Directed Acyclic Graph, which is a bit like a flowchart
I am talking about full-scale replacements for Google’s entireAndroid development stack. Microsoft has Xamarin, Adobe has Cordova, Facebook has React Native, I mean it’s crazy town.Google has a new Flutter
To most folks, they probably appear to be comfortably in the driver’s seat. But consider: If all mobile developers were to start using a particular cross-platform framework X, then literally any other hardware/OS manufacturer or consortium could come along with their own competing hardware/OS platform (like, say, Windows) that supports that framework X directly, and all the apps would run on it (probably faster, to boot), which would cut Google out entirely.
Abandoning native programming in favor of fast-cycle cross-platform frameworks like React Native is a winning strategy
with tf.Session() as sess: # 區塊1,模型初始化 # Create model. print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) # 透過 create_model() 方法創建一個 seq2seq_model model = create_model(sess, False) # 區塊2,讀入資料 # Read data into buckets and compute their sizes. print ("Reading development and training data (limit: %d)." % FLAGS.max_train_data_size) # read_data 函數讀取 train, dev 的路徑, dev_set = read_data(from_dev, to_dev) train_set = read_data(from_train, to_train, FLAGS.max_train_data_size) train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))] train_total_size = float(sum(train_bucket_sizes)) train_buckets_scale = [sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes))] whileTrue: # 區塊3,建立 batch # Choose a bucket according to data distribution. We pick a random number # in [0, 1] and use the corresponding interval in train_buckets_scale. random_number_01 = np.random.random_sample() bucket_id = min([i for i in xrange(len(train_buckets_scale)) if train_buckets_scale[i] > random_number_01]) # Get a batch and make a step. start_time = time.time() encoder_inputs, decoder_inputs, target_weights = model.get_batch( train_set, bucket_id) # 區塊4,訓練 _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, False)
defget_batch(self, data, bucket_id): # 根據傳進來的 bucket_id 決定這次的 encoder, deocder size,例如 5, 10 encoder_size, decoder_size = self.buckets[bucket_id] encoder_inputs, decoder_inputs = [], [] # Get a random batch of encoder and decoder inputs from data, # pad them if needed, reverse encoder inputs and add GO to decoder. for _ in xrange(self.batch_size): # 前面提過 data 是一個長度為4的list,data[i] 存放長度符合 bucket[i] 的資料 encoder_input, decoder_input = random.choice(data[bucket_id]) # Encoder inputs are padded and then reversed. encoder_pad = [data_utils.PAD_ID] * (encoder_size - len(encoder_input)) encoder_inputs.append(list(reversed(encoder_input + encoder_pad))) # Decoder inputs get an extra "GO" symbol, and are padded then. decoder_pad_size = decoder_size - len(decoder_input) - 1 decoder_inputs.append([data_utils.GO_ID] + decoder_input + [data_utils.PAD_ID] * decoder_pad_size) # Now we create batch-major vectors from the data selected above. batch_encoder_inputs, batch_decoder_inputs, batch_weights = [], [], [] # Batch encoder inputs are just re-indexed encoder_inputs. for length_idx in xrange(encoder_size): batch_encoder_inputs.append( np.array([encoder_inputs[batch_idx][length_idx] for batch_idx in xrange(self.batch_size)], dtype=np.int32)) # Batch decoder inputs are re-indexed decoder_inputs, we create weights. for length_idx in xrange(decoder_size): batch_decoder_inputs.append( np.array([decoder_inputs[batch_idx][length_idx] for batch_idx in xrange(self.batch_size)], dtype=np.int32)) # Create target_weights to be 0 for targets that are padding. # 這個 weights 是給模型訓練用的,有目標值的地方為1,其他為0 # 有目標值的地方,指的是 decoder_input 平移1格的結果 batch_weight = np.ones(self.batch_size, dtype=np.float32) for batch_idx in xrange(self.batch_size): # We set weight to 0 if the corresponding target is a PAD symbol. # The corresponding target is decoder_input shifted by 1 forward. if length_idx < decoder_size - 1: target = decoder_inputs[batch_idx][length_idx + 1] if length_idx == decoder_size - 1or target == data_utils.PAD_ID: batch_weight[batch_idx] = 0.0 batch_weights.append(batch_weight) return batch_encoder_inputs, batch_decoder_inputs, batch_weights