Spaces:

keras-io
/

text-generation

Build error

File size: 2,327 Bytes

e8cb9c9
3c4c2ab
b34ecb0
 
 
dcdaaec
f306d19
557899a
8e5beee
d46dad1
b34ecb0
b30b5d8
c4556a0
ebb0ce9
39f1739
f306d19
6f18088
 
 
a0f8996
6f18088
 
 
 
 
 
 
a0f8996
 
 
 
 
b34ecb0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b30b5d8
3c363fa
0f3065b
5826562
 
9f584e5
b34ecb0

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

import gradio as gr
from gradio import mix
import numpy as np
import torch
from keras.utils.data_utils import pad_sequences


from huggingface_hub import from_pretrained_keras

model = from_pretrained_keras("keras-io/text-generation-miniature-gpt")

a = []
word_to_index = {}

a_file = open("imdb.vocab") # get vocab tokens
for line in a_file:
  a.append(line.strip())
print(len(a))

for index, word in enumerate(a):
    word_to_index[index] = word

tokenizer = Tokenizer(num_words=80, split=' ')
tokenizer.fit_on_texts(word_to_index.values()) # fit tokenizer on vocab tokens
  
def text_process_pipeline(start_prompt): #pipeline

  processed_text = tokenizer.texts_to_sequences(start_prompt)
  processed_text = pad_sequences(processed_text, maxlen=80, padding='post')
  return processed_text
  
def sample_from(logits):
  l, i = tf.math.top_k(logits, k=10, sorted=True)
  indices = np.asarray(i).astype("int32")
  preds = keras.activations.softmax(tf.expand_dims(l, 0))[0]
  preds = np.asarray(preds).astype("float32")
  return np.random.choice(i, p=preds)

def generate_answers(start_prompt):
  num_tokens_generated = 0 
  sample_index = len(start_prompt) - 1
  start_tokens = [word_to_index.get(_, 1) for _ in start_prompt]
  tokens_generated= []

  text_out = text_process_pipeline(start_prompt)
  predictions,_ = mode.predict(text_out)
  results = np.argmax(predictions, axis=1)[0]

  while num_tokens_generated <= 40:
    sample_token = sample_from(predictions[0][sample_index])
    tokens_generated.append(sample_token)
    start_tokens.append(sample_token)
    num_tokens_generated = len(tokens_generated)

  text_out = tokenizer.sequences_to_texts([tokens_generated])  
  return text_out[0]
    
examples = [["The movie was nice, "], ["It was showing nothing special to "]]
title = "Text Generation with Miniature GPT"
description = "Gradio Demo for a miniature with GPT. To use it, simply add your text, or click one of the examples to load them. Read more at the links below."

iface = gr.Interface(fn=generate_answers, title = title, description=description, inputs=['text'], outputs=["text"], examples=examples)
iface.launch(debug=True)