File size: 2,991 Bytes
f8049b3
b1e4b23
 
 
 
f8049b3
b1e4b23
 
 
 
 
 
 
f8049b3
b1e4b23
f8049b3
c537159
 
 
 
 
 
 
 
 
 
 
 
b1e4b23
 
 
 
eb6dcce
b1e4b23
 
 
 
 
 
 
 
4802f7f
b1e4b23
bff2055
c537159
b1e4b23
 
 
 
 
 
f8049b3
c537159
b1e4b23
 
c537159
f8049b3
c537159
f8049b3
b1e4b23
 
 
 
c537159
 
 
 
 
4802f7f
b1e4b23
 
f8049b3
c537159
b1e4b23
 
f8049b3
c537159
b1e4b23
 
 
eb6dcce
c537159
 
 
b1e4b23
 
eb6dcce
b1e4b23
 
f8049b3
b1e4b23
c537159
 
 
 
b1e4b23
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import streamlit as st
from transformers import T5ForConditionalGeneration, T5Tokenizer
from spellchecker import SpellChecker
import re
import torch

# Load model and tokenizer
@st.cache_resource
def load_model():
    model_name = "vennify/t5-base-grammar-correction"
    tokenizer = T5Tokenizer.from_pretrained(model_name)
    model = T5ForConditionalGeneration.from_pretrained(model_name)
    return tokenizer, model

tokenizer, model = load_model()

# Step 0: Preprocess the input
def preprocess_input(text):
    # Remove special characters like '#' from the end
    cleaned = re.sub(r'[^\w\s]$', '', text.strip())

    # Ensure sentence ends with a period if not already
    if not cleaned.endswith('.'):
        cleaned += '.'

    return cleaned

# Step 1: Spelling correction
def correct_spelling(text):
    spell = SpellChecker()
    words = re.findall(r'\b\w+\b|\S', text)
    corrected_words = []

    for word in words:
        clean_word = re.sub(r'[^\w\s]', '', word)
        if clean_word.isalpha():
            corrected_word = spell.correction(clean_word.lower()) or clean_word
            trailing = ''.join(re.findall(r'[^\w\s]', word))
            corrected_words.append(corrected_word + trailing)
        else:
            corrected_words.append(word)

    return ' '.join(corrected_words)

# Step 2: Grammar correction
def correct_grammar(text):
    input_text = "gec: " + text
    input_ids = tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)
    outputs = model.generate(input_ids, max_length=512, num_beams=4, early_stopping=True)
    corrected = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return corrected

# Streamlit UI
st.set_page_config(page_title="Grammar & Spelling Assistant", page_icon="🧠")
st.title("🧠 Grammar & Spelling Correction Assistant")
st.write("Fixes grammar and spelling errors without changing your original meaning.")

user_input = st.text_area("✍️ Enter your sentence:", height=150)

if st.button("Correct & Explain"):
    if not user_input.strip():
        st.warning("Please enter a sentence.")
    else:
        # Step 0: Preprocess
        preprocessed = preprocess_input(user_input)

        # Step 1: Spell check
        spelling_fixed = correct_spelling(preprocessed)

        # Step 2: Grammar correction
        final_output = correct_grammar(spelling_fixed)

        # Output
        st.markdown("### βœ… Final Correction:")
        st.success(final_output)

        st.markdown("### πŸ” Explanation:")
        st.info(f"""
**Original Sentence:**  
{user_input}

**After Preprocessing (remove #, enforce period):**  
{preprocessed}

**After Spelling Correction:**  
{spelling_fixed}

**After Grammar Correction:**  
{final_output}

**Explanation:**  
- Special characters like `#` were removed  
- Misspelled words like `ober` β†’ `over`, `dogz` β†’ `dogs` were fixed  
- Grammar (capitalization, punctuation) was corrected  
- No unwanted words like `#5` were added  
""")