pentarosarium commited on
Commit
3d82a40
·
verified ·
1 Parent(s): 03c1eed

Upload 2 files

Browse files
Files changed (1) hide show
  1. app.py +8 -3
app.py CHANGED
@@ -1,16 +1,18 @@
1
  import streamlit as st
2
  import fitz # PyMuPDF for PDF processing
3
  import pandas as pd
4
- from transformers import pipeline
5
  import os
 
 
6
 
7
  # Get the Hugging Face token from the environment variables
8
  hf_token = os.getenv("HF_API_TOKEN")
9
 
 
10
  # Load the model (Meta-Llama 3.1 8B)
11
  @st.cache_resource
12
  def load_model():
13
- model = pipeline("text2text-generation", model="meta-llama/Meta-Llama-3.1-8B-Instruct", use_auth_token=hf_token)
14
  return model
15
 
16
  model = load_model()
@@ -65,7 +67,10 @@ def generate_extraction_prompt(chunk):
65
  # Function to query Meta-Llama for each chunk
66
  def extract_financial_metrics_from_chunk(chunk):
67
  prompt = generate_extraction_prompt(chunk)
68
- response = model(prompt)
 
 
 
69
  return response[0]['generated_text']
70
 
71
  # Process the PDF text through the model
 
1
  import streamlit as st
2
  import fitz # PyMuPDF for PDF processing
3
  import pandas as pd
 
4
  import os
5
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
6
+
7
 
8
  # Get the Hugging Face token from the environment variables
9
  hf_token = os.getenv("HF_API_TOKEN")
10
 
11
+
12
  # Load the model (Meta-Llama 3.1 8B)
13
  @st.cache_resource
14
  def load_model():
15
+ model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3.1-8B-Instruct", use_auth_token=hf_token)
16
  return model
17
 
18
  model = load_model()
 
67
  # Function to query Meta-Llama for each chunk
68
  def extract_financial_metrics_from_chunk(chunk):
69
  prompt = generate_extraction_prompt(chunk)
70
+ model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3.1-8B-Instruct", use_auth_token=hf_token)
71
+ tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3.1-8B-Instruct", use_auth_token=hf_token)
72
+ nlp = pipeline("text-generation", model=model, tokenizer=tokenizer)
73
+ response = nlp(prompt)
74
  return response[0]['generated_text']
75
 
76
  # Process the PDF text through the model