Spaces:

YuvrajSingh9886
/

StoryLlama

Paused

App Files Files Community

YuvrajSingh9886 commited on Mar 18

Commit

325f8af

verified ·

1 Parent(s): 5bb6ad4

Upload 2 files

Browse files

Files changed (2) hide show

app.py +151 -0
requirements.txt +4 -0

app.py ADDED Viewed

	@@ -0,0 +1,151 @@

+import gradio as gr
+from transformers import pipeline
+from config import ModelArgs
+from inference import remove_prefix
+from model import Llama
+import torch
+from inference_sft import topk_sampling
+import os
+import subprocess
+import re
+from tokenizer import Tokenizer
+import torch.nn.functional as F
+import shutil
+# Define model paths
+model_paths = {
+    "Pretrained": "weights/pretrained/https://huggingface.co/YuvrajSingh9886/StoryLlama/blob/main/snapshot_4650.pt"
+}
+# ACCESS_TOKEN = os.getenv("GDRIVE_ACCESS_TOKEN")
+# def download_models():
+for i in model_paths:
+    subprocess.run(["python", "download_model_weight.py", "--model_type", i.lower()], check=True)
+# download_models()
+tk = Tokenizer()
+tk = tk.ready_tokenizer()
+def beam_search(model, prompt, device, max_length=50, beam_width=5, top_k=50, temperature=1.0):
+    input_ids = tk.encode(prompt, return_tensors='pt').to(device)
+    # Initialize beams with initial input repeated beam_width times
+    beams = input_ids.repeat(beam_width, 1)
+    beam_scores = torch.zeros(beam_width).to(device)  # Initialize scores
+    for _ in range(max_length):
+        with torch.no_grad():
+            outputs = model(beams)
+            logits = outputs[:, -1, :]  # Get last token logits
+            # Apply temperature scaling
+            scaled_logits = logits / temperature
+            # Calculate log probabilities
+            log_probs = F.log_softmax(scaled_logits, dim=-1)
+            # Get top k candidates for each beam
+            topk_log_probs, topk_indices = torch.topk(log_probs, top_k, dim=-1)
+            # Generate all possible candidates
+            expanded_beams = beams.repeat_interleave(top_k, dim=0)
+            new_tokens = topk_indices.view(-1, 1)
+            candidate_beams = torch.cat([expanded_beams, new_tokens], dim=1)
+            # Calculate new scores for all candidates
+            expanded_scores = beam_scores.repeat_interleave(top_k)
+            candidate_scores = expanded_scores + topk_log_probs.view(-1)
+            # Select top beam_width candidates
+            top_scores, top_indices = candidate_scores.topk(beam_width)
+            beams = candidate_beams[top_indices]
+            beam_scores = top_scores
+    # Select best beam
+    best_idx = beam_scores.argmax()
+    best_sequence = beams[best_idx]
+    return tk.decode(best_sequence, skip_special_tokens=True)
+# Function to load the selected model
+def load_model(model_type):
+    model_path = model_paths[model_type]
+    # Check if the model exists; if not, download it
+    # if not os.path.exists(model_path):
+    #     shutil.rmtree(model_path)
+    #     os.mkdir(model_path)
+    #     print(f"{model_type} Model not found! Downloading...")
+    #     subprocess.run(["python", "download_model_weight.py", f"--{model_type.lower()}"], check=True)
+    # else:
+    #     print(f"{model_type} Model found, skipping download.")
+    # Load the model
+    model = Llama(
+        device=ModelArgs.device,
+        embeddings_dims=ModelArgs.embeddings_dims,
+        no_of_decoder_layers=ModelArgs.no_of_decoder_layers,
+        block_size=ModelArgs.block_size,
+        vocab_size=ModelArgs.vocab_size,
+        dropout=ModelArgs.dropout
+    )
+    model = model.to(ModelArgs.device)
+    dict_model = torch.load(model_path, weights_only=False)
+    dict_model['MODEL_STATE'] = remove_prefix(dict_model['MODEL_STATE'], '_orig_mod.')
+    model.load_state_dict(dict_model['MODEL_STATE'])
+    model.eval()
+    return model
+# download_models()
+current_model = load_model("Pretrained")
+def answer_question(model_type, prompt, temperature, top_k, max_length):
+    global current_model
+    # Reload model if the selected model type is different
+    if model_type == "Base (Pretrained)":
+        model_type = "Pretrained"
+    if model_paths[model_type] != model_paths.get(current_model, None):
+        current_model = load_model(model_type)
+    # formatted_prompt = f"### Instruction: Answer the following query. \n\n ### Input: {prompt}.\n\n ### Response: "
+    with torch.no_grad():
+        # if decoding_method == "Beam Search":
+        #     generated_text = beam_search(current_model, formatted_prompt, device=ModelArgs.device,
+        #                                    max_length=max_length, beam_width=5, top_k=top_k, temperature=temperature)
+        # else:
+        generated_text = topk_sampling(current_model, prompt, max_length=max_length,
+                                           top_k=top_k, temperature=temperature, device=ModelArgs.device)
+        return generated_text
+iface = gr.Interface(
+    fn=answer_question,
+    inputs=[
+        gr.Dropdown(choices=["Base (Pretrained)"], value="Pretrained", label="Select Model"),
+        # gr.Dropdown(choices=["Top-K Sampling", "Beam Search"], value="Top-K Sampling", label="Decoding Method"),
+        gr.Textbox(label="Prompt", lines=5),
+        gr.Slider(minimum=0.1, maximum=1.0, value=0.8, step=0.1, label="Temperature"),
+        gr.Slider(minimum=50,maximum = ModelArgs.vocab_size, value=50, step=1, label="Top-k"),
+        gr.Slider(minimum=10, maximum=ModelArgs.block_size, value=256, step=1, label="Max Length")
+    ],
+    outputs=gr.Textbox(label="Answer"),
+    title="StoryLlama",
+    description="Enter a prompt, select a model (Pretrained) and the model will generate a story!."
+)
+# Launch the Gradio app
+if __name__ == "__main__":
+    iface.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+torch
+transformers
+gdown
+huggingface_hub