Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -17,7 +17,7 @@ model = from_pretrained_keras("keras-io/text-generation-miniature-gpt")
|
|
17 |
a = []
|
18 |
word_to_index = {}
|
19 |
|
20 |
-
a_file = open("imdb.vocab")
|
21 |
for line in a_file:
|
22 |
a.append(line.strip())
|
23 |
print(len(a))
|
@@ -25,9 +25,11 @@ print(len(a))
|
|
25 |
for index, word in enumerate(a):
|
26 |
word_to_index[index] = word
|
27 |
|
28 |
-
|
29 |
-
|
30 |
-
|
|
|
|
|
31 |
processed_text = tokenizer.texts_to_sequences(start_prompt)
|
32 |
processed_text = pad_sequences(processed_text, maxlen=80, padding='post')
|
33 |
return processed_text
|
|
|
17 |
a = []
|
18 |
word_to_index = {}
|
19 |
|
20 |
+
a_file = open("imdb.vocab") # get vocab tokens
|
21 |
for line in a_file:
|
22 |
a.append(line.strip())
|
23 |
print(len(a))
|
|
|
25 |
for index, word in enumerate(a):
|
26 |
word_to_index[index] = word
|
27 |
|
28 |
+
tokenizer = Tokenizer(num_words=80, split=' ')
|
29 |
+
tokenizer.fit_on_texts(word_to_index.values()) # fit tokenizer on vocab tokens
|
30 |
+
|
31 |
+
def text_process_pipeline(start_prompt): #pipeline
|
32 |
+
|
33 |
processed_text = tokenizer.texts_to_sequences(start_prompt)
|
34 |
processed_text = pad_sequences(processed_text, maxlen=80, padding='post')
|
35 |
return processed_text
|