BitNet.cpp

Running

MekkCyber commited on Oct 28, 2024

Commit

db371b0

1 Parent(s): ae3cfae

update

Files changed (1) hide show

app.py CHANGED Viewed

@@ -35,8 +35,10 @@ def run_inference(model_name, input_text, num_tokens=6):
     try:
         # Call the `run_inference.py` script with the model and input
         start_time = time.time()
         result = subprocess.run(
-            f"python run_inference.py -m models/Llama3-8B-1.58-100B-tokens/ggml-model-i2_s.gguf -p \"{input_text}\" -n {num_tokens} -temp 0",
             shell=True,
             cwd=BITNET_REPO_PATH,
             capture_output=True,
@@ -59,7 +61,8 @@ def run_transformers(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken
     # Load the model and tokenizer dynamically if needed (commented out for performance)
     tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=oauth_token.token)
     model = AutoModelForCausalLM.from_pretrained(model_name, use_auth_token=oauth_token.token)
     # Encode the input text
     input_ids = tokenizer.encode(input_text, return_tensors="pt")
@@ -115,8 +118,8 @@ def interface():
         with gr.Row():
             transformer_model_dropdown = gr.Dropdown(
                 label="Select Transformers Model",
-                choices=["meta-llama/Llama-3.1-8B", "meta-llama/Llama-3.2-3B", "meta-llama/Llama-3.2-1B"],  # Replace with actual models
-                value="meta-llama/Llama-3.1-8B",
                 interactive=True
             )
             compare_button = gr.Button("Run Transformers Inference", elem_id="compare-button")

     try:
         # Call the `run_inference.py` script with the model and input
         start_time = time.time()
+        if input_text is None :
+            return "Please provide an input text for the model"
         result = subprocess.run(
+            f"python run_inference.py -m models/{model_name}/ggml-model-i2_s.gguf -p \"{input_text}\" -n {num_tokens} -temp 0",
             shell=True,
             cwd=BITNET_REPO_PATH,
             capture_output=True,
     # Load the model and tokenizer dynamically if needed (commented out for performance)
     tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=oauth_token.token)
     model = AutoModelForCausalLM.from_pretrained(model_name, use_auth_token=oauth_token.token)
+    if input_text is None :
+        return "Please provide an input text for the model", None
     # Encode the input text
     input_ids = tokenizer.encode(input_text, return_tensors="pt")
         with gr.Row():
             transformer_model_dropdown = gr.Dropdown(
                 label="Select Transformers Model",
+                choices=["TinyLlama/TinyLlama-1.1B-Chat-v1.0"],  # Replace with actual models
+                value="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
                 interactive=True
             )
             compare_button = gr.Button("Run Transformers Inference", elem_id="compare-button")