pentarosarium commited on
Commit
6567c4a
·
verified ·
1 Parent(s): 65937bf

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +183 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import fitz # PyMuPDF for PDF processing
3
+ import pandas as pd
4
+ from transformers import pipeline
5
+
6
+ # Load the model (Meta-Llama 3.1 8B)
7
+ @st.cache_resource
8
+ def load_model():
9
+ model = pipeline("text2text-generation", model="meta-llama/Meta-Llama-3.1-8B-Instruct")
10
+ return model
11
+
12
+ model = load_model()
13
+
14
+ # Function to extract text from PDF
15
+ def extract_pdf_text(file):
16
+ doc = fitz.open(stream=file.read(), filetype="pdf")
17
+ extracted_text = ""
18
+ for page in doc:
19
+ extracted_text += page.get_text("text")
20
+ return extracted_text
21
+
22
+ # Function to chunk text into smaller sections
23
+ def chunk_text(text, max_tokens=1000):
24
+ sentences = text.split('.')
25
+ chunks = []
26
+ current_chunk = ""
27
+ current_token_count = 0
28
+
29
+ for sentence in sentences:
30
+ token_count = len(sentence.split())
31
+ if current_token_count + token_count > max_tokens:
32
+ chunks.append(current_chunk.strip())
33
+ current_chunk = sentence
34
+ current_token_count = token_count
35
+ else:
36
+ current_chunk += sentence + "."
37
+ current_token_count += token_count
38
+
39
+ if current_chunk:
40
+ chunks.append(current_chunk.strip())
41
+
42
+ return chunks
43
+
44
+ # Prompt generation for extracting financial data
45
+ def generate_extraction_prompt(chunk):
46
+ return f"""
47
+ From the following text, please extract the following financial metrics in IFRS format:
48
+ - Revenue
49
+ - Net Income
50
+ - Total Assets
51
+ - Total Liabilities
52
+ - Shareholders' Equity
53
+ - Current Assets
54
+ - Current Liabilities
55
+
56
+ If the information is not found in the text, return 'Not Available'.
57
+
58
+ Text: {chunk}
59
+ """
60
+
61
+ # Function to query Meta-Llama for each chunk
62
+ def extract_financial_metrics_from_chunk(chunk):
63
+ prompt = generate_extraction_prompt(chunk)
64
+ response = model(prompt)
65
+ return response[0]['generated_text']
66
+
67
+ # Process the PDF text through the model
68
+ def process_pdf_text_for_metrics(text):
69
+ chunks = chunk_text(text)
70
+ extracted_metrics = []
71
+
72
+ for chunk in chunks:
73
+ response = extract_financial_metrics_from_chunk(chunk)
74
+ extracted_metrics.append(response)
75
+
76
+ return extracted_metrics
77
+
78
+ # Function to parse the metrics from the model response
79
+ import re
80
+
81
+ def parse_metrics(extracted_text):
82
+ metrics = {}
83
+ for line in extracted_text.split("\n"):
84
+ if "Revenue" in line:
85
+ metrics['Revenue'] = re.findall(r'\d+', line) # Find numeric data
86
+ elif "Net Income" in line:
87
+ metrics['Net Income'] = re.findall(r'\d+', line)
88
+ elif "Total Assets" in line:
89
+ metrics['Total Assets'] = re.findall(r'\d+', line)
90
+ elif "Total Liabilities" in line:
91
+ metrics['Total Liabilities'] = re.findall(r'\d+', line)
92
+ elif "Shareholders' Equity" in line:
93
+ metrics['Shareholders\' Equity'] = re.findall(r'\d+', line)
94
+ elif "Current Assets" in line:
95
+ metrics['Current Assets'] = re.findall(r'\d+', line)
96
+ elif "Current Liabilities" in line:
97
+ metrics['Current Liabilities'] = re.findall(r'\d+', line)
98
+
99
+ return metrics
100
+
101
+ # Function to aggregate metrics from all chunks
102
+ def aggregate_metrics(extracted_metrics):
103
+ aggregated_metrics = {
104
+ "Revenue": None,
105
+ "Net Income": None,
106
+ "Total Assets": None,
107
+ "Total Liabilities": None,
108
+ "Shareholders' Equity": None,
109
+ "Current Assets": None,
110
+ "Current Liabilities": None
111
+ }
112
+
113
+ for metrics_text in extracted_metrics:
114
+ parsed = parse_metrics(metrics_text)
115
+ for key in parsed:
116
+ if not aggregated_metrics[key]:
117
+ aggregated_metrics[key] = parsed[key]
118
+
119
+ return aggregated_metrics
120
+
121
+ # Function to calculate financial ratios
122
+ def calculate_financial_ratios(metrics):
123
+ try:
124
+ current_ratio = int(metrics['Current Assets'][0]) / int(metrics['Current Liabilities'][0])
125
+ debt_to_equity = int(metrics['Total Liabilities'][0]) / int(metrics['Shareholders\' Equity'][0])
126
+ roa = int(metrics['Net Income'][0]) / int(metrics['Total Assets'][0])
127
+ roe = int(metrics['Net Income'][0]) / int(metrics['Shareholders\' Equity'][0])
128
+
129
+ return {
130
+ 'Current Ratio': current_ratio,
131
+ 'Debt to Equity': debt_to_equity,
132
+ 'Return on Assets (ROA)': roa,
133
+ 'Return on Equity (ROE)': roe
134
+ }
135
+ except (TypeError, KeyError, IndexError):
136
+ return "Some metrics were not extracted properly or are missing."
137
+
138
+ # Streamlit UI
139
+ st.title("Financial Ratio Extractor from IFRS Reports")
140
+
141
+ st.write("""
142
+ Upload an IFRS financial report (PDF), and this app will automatically extract key financial metrics such as Revenue,
143
+ Net Income, Total Assets, and calculate important financial ratios like ROA, ROE, and Debt-to-Equity Ratio.
144
+ You can also ask questions about the financial data using Meta-Llama.
145
+ """)
146
+
147
+ # File uploader for PDF
148
+ uploaded_file = st.file_uploader("Upload your IFRS report (PDF)", type=["pdf"])
149
+
150
+ # If a PDF is uploaded
151
+ if uploaded_file:
152
+ st.write("Processing your document, please wait...")
153
+
154
+ # Extract text from PDF
155
+ pdf_text = extract_pdf_text(uploaded_file)
156
+
157
+ # Process the text through Meta-Llama for metrics extraction
158
+ extracted_metrics = process_pdf_text_for_metrics(pdf_text)
159
+
160
+ # Aggregate extracted metrics
161
+ aggregated_metrics = aggregate_metrics(extracted_metrics)
162
+
163
+ # Calculate financial ratios
164
+ financial_ratios = calculate_financial_ratios(aggregated_metrics)
165
+
166
+ # Display extracted financial ratios
167
+ st.subheader("Extracted Financial Ratios:")
168
+
169
+ if isinstance(financial_ratios, dict):
170
+ st.table(pd.DataFrame(financial_ratios.items(), columns=["Ratio", "Value"]))
171
+ else:
172
+ st.write(financial_ratios)
173
+
174
+ # Asking questions to Meta-Llama
175
+ st.subheader("Ask Meta-Llama about the extracted financial data:")
176
+
177
+ question = st.text_input("Enter your question here")
178
+
179
+ if st.button("Ask Meta-Llama"):
180
+ if question:
181
+ response = model(question)
182
+ st.write("Meta-Llama's Response:")
183
+ st.write(response[0]['generated_text'])
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ streamlit==1.18.0
2
+ pymupdf==1.22.5
3
+ transformers==4.28.0
4
+ pandas==1.3.3