defremove_conflicting_examples(data): correct_words, incorrect_words = np.array(data)[:,0], np.array(data)[:,1] correct_vocab, incorrect_vocab = list(set(correct_words)), list(set(incorrect_words)) try: i=0 while(i<len(data)): if data[i][0] in incorrect_vocab or data[i][1] in correct_vocab: del data[i] i-=1 i+=1 except: pass return data
去掉一些相互冲突的example,梳理相关的数据
1 2 3 4 5
defleft_pad(list_): max_seq_len = 20 ans = np.zeros((max_seq_len), dtype=int) ans[:len(list_)] = np.array(list_[:max_seq_len] ) return ans