Spaces:

pentarosarium
/

rdtest

Sleeping

App Files Files Community

pentarosarium commited on Sep 4, 2024

Commit

6567c4a

verified ·

1 Parent(s): 65937bf

Upload 2 files

Browse files

Files changed (2) hide show

app.py +183 -0
requirements.txt +4 -0

app.py ADDED Viewed

	@@ -0,0 +1,183 @@

+import streamlit as st
+import fitz  # PyMuPDF for PDF processing
+import pandas as pd
+from transformers import pipeline
+# Load the model (Meta-Llama 3.1 8B)
+@st.cache_resource
+def load_model():
+    model = pipeline("text2text-generation", model="meta-llama/Meta-Llama-3.1-8B-Instruct")
+    return model
+model = load_model()
+# Function to extract text from PDF
+def extract_pdf_text(file):
+    doc = fitz.open(stream=file.read(), filetype="pdf")
+    extracted_text = ""
+    for page in doc:
+        extracted_text += page.get_text("text")
+    return extracted_text
+# Function to chunk text into smaller sections
+def chunk_text(text, max_tokens=1000):
+    sentences = text.split('.')
+    chunks = []
+    current_chunk = ""
+    current_token_count = 0
+    for sentence in sentences:
+        token_count = len(sentence.split())
+        if current_token_count + token_count > max_tokens:
+            chunks.append(current_chunk.strip())
+            current_chunk = sentence
+            current_token_count = token_count
+        else:
+            current_chunk += sentence + "."
+            current_token_count += token_count
+    if current_chunk:
+        chunks.append(current_chunk.strip())
+    return chunks
+# Prompt generation for extracting financial data
+def generate_extraction_prompt(chunk):
+    return f"""
+    From the following text, please extract the following financial metrics in IFRS format:
+    - Revenue
+    - Net Income
+    - Total Assets
+    - Total Liabilities
+    - Shareholders' Equity
+    - Current Assets
+    - Current Liabilities
+    If the information is not found in the text, return 'Not Available'.
+    Text: {chunk}
+    """
+# Function to query Meta-Llama for each chunk
+def extract_financial_metrics_from_chunk(chunk):
+    prompt = generate_extraction_prompt(chunk)
+    response = model(prompt)
+    return response[0]['generated_text']
+# Process the PDF text through the model
+def process_pdf_text_for_metrics(text):
+    chunks = chunk_text(text)
+    extracted_metrics = []
+    for chunk in chunks:
+        response = extract_financial_metrics_from_chunk(chunk)
+        extracted_metrics.append(response)
+    return extracted_metrics
+# Function to parse the metrics from the model response
+import re
+def parse_metrics(extracted_text):
+    metrics = {}
+    for line in extracted_text.split("\n"):
+        if "Revenue" in line:
+            metrics['Revenue'] = re.findall(r'\d+', line)  # Find numeric data
+        elif "Net Income" in line:
+            metrics['Net Income'] = re.findall(r'\d+', line)
+        elif "Total Assets" in line:
+            metrics['Total Assets'] = re.findall(r'\d+', line)
+        elif "Total Liabilities" in line:
+            metrics['Total Liabilities'] = re.findall(r'\d+', line)
+        elif "Shareholders' Equity" in line:
+            metrics['Shareholders\' Equity'] = re.findall(r'\d+', line)
+        elif "Current Assets" in line:
+            metrics['Current Assets'] = re.findall(r'\d+', line)
+        elif "Current Liabilities" in line:
+            metrics['Current Liabilities'] = re.findall(r'\d+', line)
+    return metrics
+# Function to aggregate metrics from all chunks
+def aggregate_metrics(extracted_metrics):
+    aggregated_metrics = {
+        "Revenue": None,
+        "Net Income": None,
+        "Total Assets": None,
+        "Total Liabilities": None,
+        "Shareholders' Equity": None,
+        "Current Assets": None,
+        "Current Liabilities": None
+    }
+    for metrics_text in extracted_metrics:
+        parsed = parse_metrics(metrics_text)
+        for key in parsed:
+            if not aggregated_metrics[key]:
+                aggregated_metrics[key] = parsed[key]
+    return aggregated_metrics
+# Function to calculate financial ratios
+def calculate_financial_ratios(metrics):
+    try:
+        current_ratio = int(metrics['Current Assets'][0]) / int(metrics['Current Liabilities'][0])
+        debt_to_equity = int(metrics['Total Liabilities'][0]) / int(metrics['Shareholders\' Equity'][0])
+        roa = int(metrics['Net Income'][0]) / int(metrics['Total Assets'][0])
+        roe = int(metrics['Net Income'][0]) / int(metrics['Shareholders\' Equity'][0])
+        return {
+            'Current Ratio': current_ratio,
+            'Debt to Equity': debt_to_equity,
+            'Return on Assets (ROA)': roa,
+            'Return on Equity (ROE)': roe
+        }
+    except (TypeError, KeyError, IndexError):
+        return "Some metrics were not extracted properly or are missing."
+# Streamlit UI
+st.title("Financial Ratio Extractor from IFRS Reports")
+st.write("""
+    Upload an IFRS financial report (PDF), and this app will automatically extract key financial metrics such as Revenue,
+    Net Income, Total Assets, and calculate important financial ratios like ROA, ROE, and Debt-to-Equity Ratio.
+    You can also ask questions about the financial data using Meta-Llama.
+""")
+# File uploader for PDF
+uploaded_file = st.file_uploader("Upload your IFRS report (PDF)", type=["pdf"])
+# If a PDF is uploaded
+if uploaded_file:
+    st.write("Processing your document, please wait...")
+    # Extract text from PDF
+    pdf_text = extract_pdf_text(uploaded_file)
+    # Process the text through Meta-Llama for metrics extraction
+    extracted_metrics = process_pdf_text_for_metrics(pdf_text)
+    # Aggregate extracted metrics
+    aggregated_metrics = aggregate_metrics(extracted_metrics)
+    # Calculate financial ratios
+    financial_ratios = calculate_financial_ratios(aggregated_metrics)
+    # Display extracted financial ratios
+    st.subheader("Extracted Financial Ratios:")
+    if isinstance(financial_ratios, dict):
+        st.table(pd.DataFrame(financial_ratios.items(), columns=["Ratio", "Value"]))
+    else:
+        st.write(financial_ratios)
+# Asking questions to Meta-Llama
+st.subheader("Ask Meta-Llama about the extracted financial data:")
+question = st.text_input("Enter your question here")
+if st.button("Ask Meta-Llama"):
+    if question:
+        response = model(question)
+        st.write("Meta-Llama's Response:")
+        st.write(response[0]['generated_text'])

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+streamlit==1.18.0
+pymupdf==1.22.5
+transformers==4.28.0
+pandas==1.3.3