File size: 2,327 Bytes
e8cb9c9
3c4c2ab
b34ecb0
 
 
dcdaaec
f306d19
557899a
8e5beee
d46dad1
b34ecb0
b30b5d8
c4556a0
ebb0ce9
39f1739
f306d19
6f18088
 
 
a0f8996
6f18088
 
 
 
 
 
 
a0f8996
 
 
 
 
b34ecb0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b30b5d8
3c363fa
0f3065b
5826562
 
9f584e5
b34ecb0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

import gradio as gr
from gradio import mix
import numpy as np
import torch
from keras.utils.data_utils import pad_sequences


from huggingface_hub import from_pretrained_keras

model = from_pretrained_keras("keras-io/text-generation-miniature-gpt")

a = []
word_to_index = {}

a_file = open("imdb.vocab") # get vocab tokens
for line in a_file:
  a.append(line.strip())
print(len(a))

for index, word in enumerate(a):
    word_to_index[index] = word

tokenizer = Tokenizer(num_words=80, split=' ')
tokenizer.fit_on_texts(word_to_index.values()) # fit tokenizer on vocab tokens
  
def text_process_pipeline(start_prompt): #pipeline

  processed_text = tokenizer.texts_to_sequences(start_prompt)
  processed_text = pad_sequences(processed_text, maxlen=80, padding='post')
  return processed_text
  
def sample_from(logits):
  l, i = tf.math.top_k(logits, k=10, sorted=True)
  indices = np.asarray(i).astype("int32")
  preds = keras.activations.softmax(tf.expand_dims(l, 0))[0]
  preds = np.asarray(preds).astype("float32")
  return np.random.choice(i, p=preds)

def generate_answers(start_prompt):
  num_tokens_generated = 0 
  sample_index = len(start_prompt) - 1
  start_tokens = [word_to_index.get(_, 1) for _ in start_prompt]
  tokens_generated= []

  text_out = text_process_pipeline(start_prompt)
  predictions,_ = mode.predict(text_out)
  results = np.argmax(predictions, axis=1)[0]

  while num_tokens_generated <= 40:
    sample_token = sample_from(predictions[0][sample_index])
    tokens_generated.append(sample_token)
    start_tokens.append(sample_token)
    num_tokens_generated = len(tokens_generated)

  text_out = tokenizer.sequences_to_texts([tokens_generated])  
  return text_out[0]
    
examples = [["The movie was nice, "], ["It was showing nothing special to "]]
title = "Text Generation with Miniature GPT"
description = "Gradio Demo for a miniature with GPT. To use it, simply add your text, or click one of the examples to load them. Read more at the links below."

iface = gr.Interface(fn=generate_answers, title = title, description=description, inputs=['text'], outputs=["text"], examples=examples)
iface.launch(debug=True)