Spaces:

noobmaster1246
/

quickaid

Runtime error

App Files Files Community

noobmaster1246 commited on Feb 9

Commit

49a6a82

verified ·

1 Parent(s): 40d2442

Update ai.py

Browse files

Files changed (1) hide show

ai.py +144 -120

ai.py CHANGED Viewed

@@ -1,120 +1,144 @@
-import torch
-import torch.nn as nn
-import numpy as np
-import pandas as pd
-from sklearn.preprocessing import LabelEncoder, StandardScaler
-from sentence_transformers import SentenceTransformer, util
-import json
-from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
-from symspellpy import SymSpell, Verbosity
-device = torch.device("cpu")
-class DiseaseClassifier(nn.Module):
-    def __init__(self, input_size, num_classes, dropout_rate=0.35665610394511454):
-        super(DiseaseClassifier, self).__init__()
-        self.fc1 = nn.Linear(input_size, 382)
-        self.fc2 = nn.Linear(382, 389)
-        self.fc3 = nn.Linear(389, 433)
-        self.fc4 = nn.Linear(433, num_classes)
-        self.activation = nn.LeakyReLU()
-        self.dropout = nn.Dropout(dropout_rate)
-    def forward(self, x):
-        x = self.activation(self.fc1(x))
-        x = self.dropout(x)
-        x = self.activation(self.fc2(x))
-        x = self.dropout(x)
-        x = self.activation(self.fc3(x))
-        x = self.dropout(x)
-        x = self.fc4(x)  # Logits
-        return x
-class DiseasePredictionModel:
-    def __init__(self, ai_model_name="model.pth", data_file="data.csv", symptom_json="symptoms.json", dictionary_file="frequency_dictionary_en_82_765.txt"):
-        self.df = pd.read_csv(data_file)
-        self.symptom_columns = self.load_symptoms(symptom_json)
-        self.label_encoder = LabelEncoder()
-        self.label_encoder.fit(self.df.iloc[:, 0])
-        self.scaler = StandardScaler()
-        self.scaler.fit(self.df.iloc[:, 1:].values)
-        self.input_size = len(self.symptom_columns)
-        self.num_classes = len(self.label_encoder.classes_)
-        self.model = self._load_model(ai_model_name)
-        self.SYMPTOM_LIST = self.load_symptoms(symptom_json)
-        self.sym_spell = SymSpell(max_dictionary_edit_distance=2, prefix_length=7)
-        self.sym_spell.load_dictionary(dictionary_file, term_index=0, count_index=1)
-        self.tokenizer = AutoTokenizer.from_pretrained("./biobert_diseases_ner")
-        # self.tokenizer.save_pretrained("./biobert_diseases_ner")
-        self.nlp_model = AutoModelForTokenClassification.from_pretrained("./biobert_diseases_ner")
-        # self.nlp_model.save_pretrained("./biobert_diseases_ner")
-        self.ner_pipeline = pipeline("ner", model=self.nlp_model, tokenizer=self.tokenizer, aggregation_strategy="simple")
-        self.semantic_model = SentenceTransformer('all-MiniLM-L6-v2')
-    def _load_model(self, ai_model_name):
-        model = DiseaseClassifier(self.input_size, self.num_classes).to(device)
-        model.load_state_dict(torch.load(ai_model_name, map_location=device, weights_only=True))
-        model.eval()
-        return model
-    def predict_disease(self, symptoms):
-        input_vector = np.zeros(len(self.symptom_columns))
-        for symptom in symptoms:
-            if symptom in self.symptom_columns:
-                input_vector[list(self.symptom_columns).index(symptom)] = 1
-        input_vector = self.scaler.transform([input_vector])
-        input_tensor = torch.tensor(input_vector, dtype=torch.float32).to(device)
-        with torch.no_grad():
-            outputs = self.model(input_tensor)
-            _, predicted_class = torch.max(outputs, 1)
-        predicted_disease = self.label_encoder.inverse_transform([predicted_class.cpu().numpy()[0]])[0]
-        return predicted_disease
-    def load_symptoms(self, json_file):
-        with open(json_file, "r", encoding="utf-8") as f:
-            return json.load(f)
-    def correct_text(self, text):
-        words = text.split()
-        corrected_words = []
-        for word in words:
-            if word.lower() in [symptom.lower() for symptom in self.SYMPTOM_LIST]:
-                corrected_words.append(word)
-            else:
-                suggestions = self.sym_spell.lookup(word, Verbosity.CLOSEST, max_edit_distance=2)
-                if suggestions:
-                    corrected_words.append(suggestions[0].term)
-                else:
-                    corrected_words.append(word)
-        return ' '.join(corrected_words)
-    def extract_symptoms(self, text):
-        ner_results = self.ner_pipeline(text)
-        symptoms = set()
-        for entity in ner_results:
-            if entity["entity_group"] == "DISEASE":
-                symptoms.add(entity["word"].lower())
-        return list(symptoms)
-    def match_symptoms(self, extracted_symptoms):
-        matched = {}
-        symptom_embeddings = self.semantic_model.encode(self.SYMPTOM_LIST, convert_to_tensor=True)
-        for symptom in extracted_symptoms:
-            symptom_embedding = self.semantic_model.encode(symptom, convert_to_tensor=True)
-            similarities = util.pytorch_cos_sim(symptom_embedding, symptom_embeddings)[0]
-            most_similar_idx = similarities.argmax()
-            best_match = self.SYMPTOM_LIST[most_similar_idx]
-            matched[symptom] = best_match
-        return matched.values()

+import os
+import torch
+import torch.nn as nn
+import numpy as np
+import pandas as pd
+import json
+from sklearn.preprocessing import LabelEncoder, StandardScaler
+from sentence_transformers import SentenceTransformer, util
+from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
+from symspellpy import SymSpell, Verbosity
+import gradio as gr
+# Ensure Hugging Face cache directory is writable
+os.environ["TRANSFORMERS_CACHE"] = "/home/user/.cache/huggingface"
+# Set device
+device = torch.device("cpu")
+# Define DiseaseClassifier Model
+class DiseaseClassifier(nn.Module):
+    def __init__(self, input_size, num_classes, dropout_rate=0.35665610394511454):
+        super(DiseaseClassifier, self).__init__()
+        self.fc1 = nn.Linear(input_size, 382)
+        self.fc2 = nn.Linear(382, 389)
+        self.fc3 = nn.Linear(389, 433)
+        self.fc4 = nn.Linear(433, num_classes)
+        self.activation = nn.LeakyReLU()
+        self.dropout = nn.Dropout(dropout_rate)
+    def forward(self, x):
+        x = self.activation(self.fc1(x))
+        x = self.dropout(x)
+        x = self.activation(self.fc2(x))
+        x = self.dropout(x)
+        x = self.activation(self.fc3(x))
+        x = self.dropout(x)
+        x = self.fc4(x)  # Logits
+        return x
+# Define DiseasePredictionModel
+class DiseasePredictionModel:
+    def __init__(self, ai_model_name="model.pth", data_file="data.csv", symptom_json="symptoms.json", dictionary_file="frequency_dictionary_en_82_765.txt"):
+        # Load dataset
+        self.df = pd.read_csv(data_file)
+        self.symptom_columns = self.load_symptoms(symptom_json)
+        self.label_encoder = LabelEncoder()
+        self.label_encoder.fit(self.df.iloc[:, 0])
+        self.scaler = StandardScaler()
+        self.scaler.fit(self.df.iloc[:, 1:].values)
+        self.input_size = len(self.symptom_columns)
+        self.num_classes = len(self.label_encoder.classes_)
+        self.model = self._load_model(ai_model_name)
+        self.SYMPTOM_LIST = self.load_symptoms(symptom_json)
+        # Load SymSpell dictionary
+        self.sym_spell = SymSpell(max_dictionary_edit_distance=2, prefix_length=7)
+        self.sym_spell.load_dictionary(dictionary_file, term_index=0, count_index=1)
+        # Load BioBERT tokenizer and model
+        self.tokenizer = AutoTokenizer.from_pretrained("dmis-lab/biobert-v1.1")
+        self.nlp_model = AutoModelForTokenClassification.from_pretrained("dmis-lab/biobert-v1.1")
+        self.ner_pipeline = pipeline("ner", model=self.nlp_model, tokenizer=self.tokenizer, aggregation_strategy="simple")
+        # Load Sentence Transformer
+        self.semantic_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
+    def _load_model(self, ai_model_name):
+        model = DiseaseClassifier(self.input_size, self.num_classes).to(device)
+        model.load_state_dict(torch.load(ai_model_name, map_location=device))
+        model.eval()
+        return model
+    def predict_disease(self, symptoms):
+        input_vector = np.zeros(len(self.symptom_columns))
+        for symptom in symptoms:
+            if symptom in self.symptom_columns:
+                input_vector[list(self.symptom_columns).index(symptom)] = 1
+        input_vector = self.scaler.transform([input_vector])
+        input_tensor = torch.tensor(input_vector, dtype=torch.float32).to(device)
+        with torch.no_grad():
+            outputs = self.model(input_tensor)
+            _, predicted_class = torch.max(outputs, 1)
+        predicted_disease = self.label_encoder.inverse_transform([predicted_class.cpu().numpy()[0]])[0]
+        return predicted_disease
+    def load_symptoms(self, json_file):
+        with open(json_file, "r", encoding="utf-8") as f:
+            return json.load(f)
+    def correct_text(self, text):
+        words = text.split()
+        corrected_words = []
+        for word in words:
+            if word.lower() in [symptom.lower() for symptom in self.SYMPTOM_LIST]:
+                corrected_words.append(word)
+            else:
+                suggestions = self.sym_spell.lookup(word, Verbosity.CLOSEST, max_edit_distance=2)
+                if suggestions:
+                    corrected_words.append(suggestions[0].term)
+                else:
+                    corrected_words.append(word)
+        return ' '.join(corrected_words)
+    def extract_symptoms(self, text):
+        ner_results = self.ner_pipeline(text)
+        symptoms = set()
+        for entity in ner_results:
+            if entity["entity_group"] == "DISEASE":
+                symptoms.add(entity["word"].lower())
+        return list(symptoms)
+    def match_symptoms(self, extracted_symptoms):
+        matched = {}
+        symptom_embeddings = self.semantic_model.encode(self.SYMPTOM_LIST, convert_to_tensor=True)
+        for symptom in extracted_symptoms:
+            symptom_embedding = self.semantic_model.encode(symptom, convert_to_tensor=True)
+            similarities = util.pytorch_cos_sim(symptom_embedding, symptom_embeddings)[0]
+            most_similar_idx = similarities.argmax()
+            best_match = self.SYMPTOM_LIST[most_similar_idx]
+            matched[symptom] = best_match
+        return matched.values()
+# Initialize Model
+model = DiseasePredictionModel()
+# Define Prediction Function
+def predict(symptoms):
+    corrected = model.correct_text(symptoms)
+    extracted = model.extract_symptoms(corrected)
+    matched = model.match_symptoms(extracted)
+    prediction = model.predict_disease(matched)
+    return prediction
+# Define Gradio Interface
+iface = gr.Interface(fn=predict, inputs="text", outputs="text", title="Disease Prediction AI")
+iface.launch()