Jezia commited on
Commit
a0f8996
·
1 Parent(s): 64f3f34

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -4
app.py CHANGED
@@ -17,7 +17,7 @@ model = from_pretrained_keras("keras-io/text-generation-miniature-gpt")
17
  a = []
18
  word_to_index = {}
19
 
20
- a_file = open("imdb.vocab")
21
  for line in a_file:
22
  a.append(line.strip())
23
  print(len(a))
@@ -25,9 +25,11 @@ print(len(a))
25
  for index, word in enumerate(a):
26
  word_to_index[index] = word
27
 
28
- def text_process_pipeline(text): #pipeline
29
- tokenizer = Tokenizer(num_words=80, split=' ')
30
- tokenizer.fit_on_texts(word_to_index.values())
 
 
31
  processed_text = tokenizer.texts_to_sequences(start_prompt)
32
  processed_text = pad_sequences(processed_text, maxlen=80, padding='post')
33
  return processed_text
 
17
  a = []
18
  word_to_index = {}
19
 
20
+ a_file = open("imdb.vocab") # get vocab tokens
21
  for line in a_file:
22
  a.append(line.strip())
23
  print(len(a))
 
25
  for index, word in enumerate(a):
26
  word_to_index[index] = word
27
 
28
+ tokenizer = Tokenizer(num_words=80, split=' ')
29
+ tokenizer.fit_on_texts(word_to_index.values()) # fit tokenizer on vocab tokens
30
+
31
+ def text_process_pipeline(start_prompt): #pipeline
32
+
33
  processed_text = tokenizer.texts_to_sequences(start_prompt)
34
  processed_text = pad_sequences(processed_text, maxlen=80, padding='post')
35
  return processed_text