Manoj779944 commited on
Commit
49ca866
·
verified ·
1 Parent(s): 96c153e

Update virtualhealth.py

Browse files
Files changed (1) hide show
  1. virtualhealth.py +33 -339
virtualhealth.py CHANGED
@@ -1,108 +1,22 @@
1
- # -*- coding: utf-8 -*-
2
- """VirtualHealth.ipynb
3
-
4
- Automatically generated by Colab.
5
-
6
- Original file is located at
7
- https://colab.research.google.com/drive/1yVSYiPz-WUlO7U0uAKe9LmfMXHd5eyAA
8
- """
9
- !pip install streamlit
10
-
11
  import xgboost as xgb
12
  import pickle
13
  import numpy as np
14
  import pandas as pd
15
-
16
- # Load the trained model
17
- model = xgb.XGBClassifier()
18
- model.load_model("symptom_disease_model.json")
19
-
20
- # Load the label encoder
21
- label_encoder = pickle.load(open("label_encoder.pkl", "rb"))
22
-
23
- # Load symptom names (from preprocessed training data)
24
- X_train = pd.read_csv("X_train.csv") # Get feature names
25
- symptom_list = X_train.columns.tolist()
26
-
27
- # Function to Predict Disease
28
- def predict_disease(user_symptoms):
29
- # Convert user symptoms into one-hot encoded format
30
- input_vector = np.zeros(len(symptom_list))
31
-
32
- for symptom in user_symptoms:
33
- if symptom in symptom_list:
34
- input_vector[symptom_list.index(symptom)] = 1
35
-
36
- input_vector = input_vector.reshape(1, -1) # Reshape for model
37
-
38
- # Predict disease (returns a numerical class)
39
- predicted_class = model.predict(input_vector)[0]
40
-
41
- # Convert number to disease name
42
- predicted_disease = label_encoder.inverse_transform([predicted_class])[0]
43
-
44
- return predicted_disease
45
-
46
- # Example Usage
47
- user_symptoms = ["itching", "skin_rash", "nodal_skin_eruptions"]
48
- predicted_disease = predict_disease(user_symptoms)
49
- print(f"Predicted Disease: {predicted_disease}")
50
-
51
- !pip install zipfile36
52
- import sys
53
- if sys.version_info >= (3, 6):
54
- import zipfile
55
- else:
56
- import zipfile36 as zipfile
57
- import os
58
-
59
- zip_file_path = '/content/disease symptom.zip' # Update with your path
60
- extracted_dir = '/content' # Where to extract the files
61
-
62
- with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
63
- zip_ref.extractall(extracted_dir)
64
-
65
- # Load the precaution dataset
66
- precaution_df = pd.read_csv("Disease precaution.csv")
67
-
68
- # Convert to dictionary for fast lookup
69
- precaution_dict = {}
70
- for _, row in precaution_df.iterrows():
71
- disease = row["Disease"].strip().lower()
72
- precautions = [row[f"Precaution_{i}"] for i in range(1, 5) if pd.notna(row[f"Precaution_{i}"])]
73
- precaution_dict[disease] = precautions
74
-
75
- # Function to Get Precautions
76
- def get_precautions(disease_name):
77
- disease_name = disease_name.strip().lower()
78
- return precaution_dict.get(disease_name, ["No precautions found"])
79
-
80
- # Example Usage
81
- precautions = get_precautions(predicted_disease)
82
- print(f"Precautions for {predicted_disease}: {precautions}")
83
-
84
- !pip install nltk
85
-
86
- import re
87
  import nltk
88
- from nltk.corpus import stopwords
89
  from nltk.tokenize import word_tokenize
 
 
90
 
91
- # Download stopwords if not already downloaded
92
  nltk.download("stopwords")
93
  nltk.download("punkt")
 
94
 
95
  # Load English stopwords
96
  stop_words = set(stopwords.words("english"))
97
- nltk.download('punkt_tab')
98
-
99
- import xgboost as xgb
100
- import pickle
101
- import numpy as np
102
- import pandas as pd
103
- import torch
104
- from transformers import AutoTokenizer, AutoModelForQuestionAnswering
105
- import re # Import regex module for better input processing
106
 
107
  # ============================
108
  # 🔹 1. Load Pretrained Medical Q&A Model
@@ -112,18 +26,12 @@ tokenizer = AutoTokenizer.from_pretrained(qa_model_name)
112
  qa_model = AutoModelForQuestionAnswering.from_pretrained(qa_model_name)
113
 
114
  # ============================
115
- # 🔹 2. Load Symptom Checker Model & Label Encoder (Fixed)
116
  # ============================
117
- # Load trained XGBoost model from JSON
118
  model = xgb.XGBClassifier()
119
- model.load_model("symptom_disease_model.json")
120
- common_symptoms = ["fever", "cough", "headache", "pain", "vomiting", "fatigue", "nausea", "rash", "chills", "dizziness", "sore throat", "diarrhea"]
121
-
122
- # Load Corrected Label Encoder
123
- label_encoder = pickle.load(open("label_encoder.pkl", "rb"))
124
-
125
- # Load symptom names from training data
126
- X_train = pd.read_csv("X_train.csv") # Get feature names
127
  symptom_list = X_train.columns.tolist()
128
 
129
  # ============================
@@ -145,34 +53,27 @@ def load_medical_context():
145
  medical_context = load_medical_context()
146
 
147
  # ============================
148
- # 🔹 5. Doctor Database (For Appointments)
149
  # ============================
150
  doctor_database = {
151
  "malaria": [{"name": "Dr. Rajesh Kumar", "specialty": "Infectious Diseases", "location": "Apollo Hospital", "contact": "9876543210"}],
152
  "diabetes": [{"name": "Dr. Anil Mehta", "specialty": "Endocrinologist", "location": "AIIMS Delhi", "contact": "9876543233"}],
153
  "heart attack": [{"name": "Dr. Vikram Singh", "specialty": "Cardiologist", "location": "Medanta Hospital", "contact": "9876543255"}],
154
- "hepatitis e": [{"name": "Dr. Sunil Agarwal", "specialty": "Hepatologist", "location": "Fortis Hospital", "contact": "9876543266"}],
155
- "pneumonia": [{"name": "Dr. Priya Sharma", "specialty": "Pulmonologist", "location": "Max Healthcare", "contact": "9876543277"}],
156
- "heartattack": [{"name": "Dr. Vikram Singh", "specialty": "Cardiologist", "location": "Medanta Hospital", "contact": "9876543255"}],
157
  }
158
 
159
  # ============================
160
- # 🔹 6. Predict Disease from Symptoms (Fully Fixed)
161
  # ============================
162
  def predict_disease(user_symptoms):
163
- """Predicts the disease based on user symptoms using the trained XGBoost model."""
164
  input_vector = np.zeros(len(symptom_list))
165
 
166
  for symptom in user_symptoms:
167
  if symptom in symptom_list:
168
- input_vector[symptom_list.index(symptom)] = 1 # One-hot encoding
169
 
170
  input_vector = input_vector.reshape(1, -1) # Reshape for model input
171
-
172
- # Predict disease (returns a numerical class)
173
- predicted_class = model.predict(input_vector)[0]
174
-
175
- # Convert number to disease name
176
  predicted_disease = label_encoder.inverse_transform([predicted_class])[0]
177
 
178
  return predicted_disease
@@ -218,237 +119,30 @@ def book_appointment(disease):
218
  # ============================
219
  # 🔹 10. Handle User Queries
220
  # ============================
221
- def extract_treatment_from_context(disease):
222
- """Extracts treatment details for a given disease from `medical_context.txt`."""
223
- with open("medical_context.txt", "r", encoding="utf-8") as file:
224
- lines = file.readlines()
225
-
226
- treatment_section = []
227
- found_disease = False
228
- found_treatment = False
229
-
230
- for line in lines:
231
- line = line.strip()
232
-
233
- # Check if we found the disease name
234
- if f"## {disease.lower()}" in line.lower():
235
- found_disease = True
236
-
237
- # If we found the disease, now look for "Treatment"
238
- if found_disease and "**Treatment**" in line:
239
- found_treatment = True
240
- continue # Skip the "**Treatment**:" line itself
241
-
242
- # If found, keep extracting treatment details
243
- if found_treatment:
244
- # Stop at blank line or the next section (## New Disease Name)
245
- if line == "" or line.startswith("## "):
246
- break
247
- treatment_section.append(line)
248
-
249
- return "\n".join(treatment_section) if treatment_section else None
250
-
251
-
252
- def extract_disease_name(user_query):
253
- """Extracts the disease name by removing unnecessary words, but keeps medical terms."""
254
- user_query_cleaned = re.sub(r"[^\w\s]", "", user_query.lower()) # Remove punctuation
255
- words = word_tokenize(user_query_cleaned)
256
-
257
- # Remove stopwords but keep diseases/symptoms
258
- filtered_words = [word for word in words if word not in stop_words or word in common_symptoms]
259
-
260
- return " ".join(filtered_words).strip()
261
-
262
- def find_best_match(query, database):
263
- """Finds the best matching disease from the database based on query words."""
264
- query_words = query.split() # Split query into words
265
-
266
- # Check for exact match first
267
- if query in database:
268
- return query # Exact match found
269
-
270
- # Check if any word in query exists in database keys
271
- for disease in database:
272
- for word in query_words:
273
- if word in disease: # Partial match found
274
- return disease
275
-
276
- return None # No match found
277
-
278
-
279
  def handle_user_query(user_query):
280
  """Handles user queries related to symptoms, diseases, and doctor appointments."""
281
-
282
  user_query = user_query.lower().strip()
283
 
284
- # Skip Cleaning for "I have..." and "experiencing..." Cases
285
- if "i have" in user_query or "experiencing" in user_query:
286
- symptoms = user_query.replace("I have", "").replace("experiencing", "").strip()
287
- disease = predict_disease(symptoms.split(", ")) # Convert to list
288
- precautions = get_precautions(disease)
289
- return f"**Predicted Disease:** {disease}\n**Precautions:** {', '.join(precautions)}\n{book_appointment(disease)}"
290
-
291
- # Extract Disease Name for Queries
292
- user_query_cleaned = extract_disease_name(user_query)
293
-
294
- # Handle "Who should I see for..." Queries (Improved with Partial Matching)
295
- if "who should i see " in user_query:
296
- disease_query = user_query.replace("who should i see", "").strip()
297
- disease = find_best_match(disease_query, doctor_database) # Get best match
298
-
299
- if disease:
300
- doctor = doctor_database[disease][0]
301
- return f"You should see a **{doctor['specialty']}** for {disease}.\nExample: {doctor['name']} at {doctor['location']}."
302
- else:
303
- return "I'm not sure. Please consult a general physician for more guidance."
304
-
305
- # Book Appointment (Improved with Partial Matching)
306
- elif "book appointment" in user_query_cleaned:
307
- disease_query = user_query_cleaned.replace("book appointment", "").strip()
308
- disease = find_best_match(disease_query, doctor_database)
309
- return book_appointment(disease) if disease else "Sorry, no matching doctor found."
310
-
311
- # Symptoms Query
312
- elif "symptoms" in user_query_cleaned or "signs" in user_query_cleaned:
313
- disease = user_query_cleaned.replace("symptoms", "").replace("signs", "").strip()
314
  return get_medical_answer(f"What are the symptoms of {disease}?")
315
 
316
- # Precautions Query
317
- elif "precautions" in user_query_cleaned or "prevent" in user_query_cleaned:
318
- disease = user_query_cleaned.replace("precautions", "").replace("prevent", "").strip()
319
- return ", ".join(get_precautions(disease))
320
-
321
- # Treatment Query
322
- if "treatment" in user_query_cleaned or "treat" in user_query_cleaned:
323
- disease = user_query_cleaned.replace("treatment", "").replace("treat", "").strip()
324
 
325
- # 🔹 First, try to extract treatment from `medical_context.txt`
326
- treatment_answer = extract_treatment_from_context(disease)
327
- if treatment_answer:
328
- return treatment_answer # Use direct extraction first
329
 
330
- # 🔹 If no treatment info found, use the Q&A Model
331
- model_answer = get_medical_answer(f"What is the treatment for {disease}?")
332
- if model_answer in ["<s>", "", "No reliable answer found."]:
333
- return f"I'm not sure, but common treatments for {disease} include medication, therapy, or consulting a specialist."
334
- return model_answer
335
 
336
- # General Medical Questions (Fallback)
337
  else:
338
- response = get_medical_answer(user_query)
339
- if response in ["<s>", "", "No reliable answer found."]:
340
- return "I'm not sure, but you may consult a specialist for better guidance."
341
- return response
342
-
343
- # ============================
344
- # 🔹 11. Test Cases (Run Examples)
345
- # ============================
346
- print(handle_user_query("I have fever, chills, and muscle aches")) # Should predict disease & precautions
347
- print(handle_user_query("What are the symptoms of pneumonia?")) # Should return pneumonia symptoms
348
- print(handle_user_query("Book an appointment for diabetes")) # Should book a diabetes specialist
349
- print(handle_user_query("Who should I see for heart attack")) # Should return "Cardiologist"
350
- print(handle_user_query("what is the treatment for tuberculosis")) # Should return correct treatment
351
-
352
- print(handle_user_query("What is the treatment for tuberculosis?")) # Should return correct treatment
353
- print(handle_user_query("What is the treatment for malaria?")) # Should also work
354
- print(handle_user_query("What is the treatment for cancer?")) # Should return something useful
355
-
356
- print(handle_user_query("What is the treatment for tuberculosis?")) # Should return correct treatment
357
- print(handle_user_query("What is the treatment for malaria?")) # Should also work
358
- print(handle_user_query("What is the treatment for cancer?")) # Should return something useful
359
- print(handle_user_query("How to treat diabetes?")) # Should return proper treatment
360
- print(handle_user_query("Tell me the cure for pneumonia?")) # Should return treatment
361
- print(handle_user_query("Treatment for typhoid?")) # Should extract treatment
362
-
363
- print(handle_user_query("What are the symptoms of pneumonia?")) # Should return correct symptoms
364
- print(handle_user_query("Signs of heart attack?")) # Should return expected symptoms
365
- print(handle_user_query("How do I know if I have typhoid?")) # Should return typhoid symptoms
366
- print(handle_user_query("What symptoms should I check for tuberculosis?")) # Should work
367
- print(handle_user_query("Symptoms of dengue?")) # Should return symptoms of dengue
368
-
369
- print(handle_user_query("Who should I see for a heart attack?")) # Should return "Cardiologist"
370
- print(handle_user_query("Which doctor should I visit for diabetes?")) # Should return "Endocrinologist"
371
- print(handle_user_query("Who should I consult for a skin rash?")) # Should return "Dermatologist"
372
- print(handle_user_query("What kind of doctor treats pneumonia?")) # Should return "Pulmonologist"
373
- print(handle_user_query("Who specializes in treating migraines?")) # Should return "Neurologist"
374
-
375
- print(handle_user_query("Book an appointment for malaria")) # Should book doctor for malaria
376
- print(handle_user_query("I need a doctor for high blood pressure")) # Should book doctor for hypertension
377
- print(handle_user_query("Schedule a consultation for fever")) # Should book general physician
378
- print(handle_user_query("Find a doctor for diabetes treatment")) # Should book endocrinologist
379
- print(handle_user_query("Book an appointment for pneumonia treatment")) # Should book pulmonologist
380
-
381
- print(handle_user_query("I have fever, cough, and chills")) # Should predict disease correctly
382
- print(handle_user_query("Experiencing blurry vision and excessive thirst")) # Should return "Diabetes"
383
- print(handle_user_query("I am experiencing severe chest pain and difficulty breathing")) # Should return "Heart Attack"
384
- print(handle_user_query("Feeling tired, cold, and gaining weight")) # Should return "Hypothyroidism"
385
- print(handle_user_query("I have rash, joint pain, and headache")) # Should return "Dengue"
386
-
387
- print(handle_user_query("What does a doctor do?")) # Should return general doctor description
388
- print(handle_user_query("What are antibiotics?")) # Should explain antibiotics
389
- print(handle_user_query("How does the immune system work?")) # Should explain immunity
390
- print(handle_user_query("What is the function of the liver?")) # Should explain liver function
391
- print(handle_user_query("Explain how blood pressure works?")) # Should provide useful explanation
392
-
393
-
394
-
395
-
396
- # Commented out IPython magic to ensure Python compatibility.
397
- # %%writefile app.py
398
- # import streamlit as st
399
- # import requests
400
- #
401
- # st.set_page_config(page_title="AI Health Assistant", page_icon="🤖")
402
- #
403
- # st.title("🩺 AI Health Assistant")
404
- # st.write("Ask any medical-related questions:")
405
- #
406
- # # User Input
407
- # user_input = st.text_input("Your Question:")
408
- #
409
- # # Button to Send Query
410
- # if st.button("Ask"):
411
- # response = requests.post("https://b7da-35-232-247-117.ngrok-free.app/query/", json={"user_input": user_input})
412
- # bot_response = response.json().get("response", "Error fetching response")
413
- #
414
- # st.markdown(f"**🤖 Bot:** {bot_response}")
415
- #
416
-
417
- """✅ Steps to Deploy on Hugging Face Spaces
418
- 📌 Step 1: Create a Hugging Face Space
419
- 1️⃣ Go to Hugging Face Spaces
420
- 2️⃣ Click "New Space"
421
- 3️⃣ Name the Space (e.g., AI-Health-Assistant)
422
- 4️⃣ Select "Streamlit" as the SDK
423
- 5️⃣ Click "Create Space" ✅
424
-
425
- 📌 Step 2: Clone the Repository Locally
426
- After creating the Space, clone it to your local machine or Google Colab:
427
-
428
- ```bash
429
- git clone https://huggingface.co/spaces/YOUR_USERNAME/AI-Health-Assistant
430
- cd AI-Health-Assistant
431
- ```
432
- Replace YOUR_USERNAME with your Hugging Face username!
433
-
434
- 📌 Step 3: Add app.py (Your Streamlit Chatbot)
435
- Inside the cloned folder, create app.py and paste the following:
436
-
437
- 📌 Step 4: Create requirements.txt
438
- Create a new file requirements.txt inside the same folder and add:
439
- ```bash
440
- streamlit
441
- requests
442
- ```
443
-
444
- 📌 Step 5: Push Your Code to Hugging Face
445
- Run these commands to push the code:
446
-
447
- ```bash
448
- git add .
449
- git commit -m "Initial commit"
450
- git push
451
- ```
452
- 🚀 Your Space will automatically start building!
453
- """
454
-
 
 
 
 
 
 
 
 
 
 
 
1
  import xgboost as xgb
2
  import pickle
3
  import numpy as np
4
  import pandas as pd
5
+ import torch
6
+ import streamlit as st
7
+ from transformers import AutoTokenizer, AutoModelForQuestionAnswering
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  import nltk
 
9
  from nltk.tokenize import word_tokenize
10
+ from nltk.corpus import stopwords
11
+ import re
12
 
13
+ # 🔹 Download stopwords only when needed
14
  nltk.download("stopwords")
15
  nltk.download("punkt")
16
+ nltk.download('punkt_tab')
17
 
18
  # Load English stopwords
19
  stop_words = set(stopwords.words("english"))
 
 
 
 
 
 
 
 
 
20
 
21
  # ============================
22
  # 🔹 1. Load Pretrained Medical Q&A Model
 
26
  qa_model = AutoModelForQuestionAnswering.from_pretrained(qa_model_name)
27
 
28
  # ============================
29
+ # 🔹 2. Load Symptom Checker Model
30
  # ============================
 
31
  model = xgb.XGBClassifier()
32
+ model.load_model("symptom_disease_model.json") # Load trained model
33
+ label_encoder = pickle.load(open("label_encoder.pkl", "rb")) # Load label encoder
34
+ X_train = pd.read_csv("X_train.csv") # Load symptoms
 
 
 
 
 
35
  symptom_list = X_train.columns.tolist()
36
 
37
  # ============================
 
53
  medical_context = load_medical_context()
54
 
55
  # ============================
56
+ # 🔹 5. Doctor Database
57
  # ============================
58
  doctor_database = {
59
  "malaria": [{"name": "Dr. Rajesh Kumar", "specialty": "Infectious Diseases", "location": "Apollo Hospital", "contact": "9876543210"}],
60
  "diabetes": [{"name": "Dr. Anil Mehta", "specialty": "Endocrinologist", "location": "AIIMS Delhi", "contact": "9876543233"}],
61
  "heart attack": [{"name": "Dr. Vikram Singh", "specialty": "Cardiologist", "location": "Medanta Hospital", "contact": "9876543255"}],
 
 
 
62
  }
63
 
64
  # ============================
65
+ # 🔹 6. Predict Disease from Symptoms
66
  # ============================
67
  def predict_disease(user_symptoms):
68
+ """Predicts disease based on user symptoms using the trained XGBoost model."""
69
  input_vector = np.zeros(len(symptom_list))
70
 
71
  for symptom in user_symptoms:
72
  if symptom in symptom_list:
73
+ input_vector[symptom_list.index(symptom)] = 1
74
 
75
  input_vector = input_vector.reshape(1, -1) # Reshape for model input
76
+ predicted_class = model.predict(input_vector)[0] # Predict disease
 
 
 
 
77
  predicted_disease = label_encoder.inverse_transform([predicted_class])[0]
78
 
79
  return predicted_disease
 
119
  # ============================
120
  # 🔹 10. Handle User Queries
121
  # ============================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  def handle_user_query(user_query):
123
  """Handles user queries related to symptoms, diseases, and doctor appointments."""
 
124
  user_query = user_query.lower().strip()
125
 
126
+ # Check if query is about symptoms
127
+ if "symptoms" in user_query or "signs" in user_query:
128
+ disease = user_query.replace("symptoms", "").replace("signs", "").strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  return get_medical_answer(f"What are the symptoms of {disease}?")
130
 
131
+ # Check if query is about treatment
132
+ elif "treatment" in user_query or "treat" in user_query:
133
+ disease = user_query.replace("treatment", "").replace("treat", "").strip()
134
+ return get_medical_answer(f"What is the treatment for {disease}?")
 
 
 
 
135
 
136
+ # Check for doctor recommendation
137
+ elif "who should i see" in user_query:
138
+ disease = user_query.replace("who should i see for", "").strip()
139
+ return book_appointment(disease)
140
 
141
+ # Check for appointment booking
142
+ elif "book appointment" in user_query:
143
+ disease = user_query.replace("book appointment for", "").strip()
144
+ return book_appointment(disease)
 
145
 
146
+ # Default case: general medical question
147
  else:
148
+ return get_medical_answer(user_query)