Spaces:

hardik8588
/

doc_analyzer

Sleeping

App Files Files Community

hardik8588 commited on 14 days ago

Commit

c09a046

verified ·

1 Parent(s): 1ceb85a

Upload 10 files

Browse files

Files changed (10) hide show

.gitattributes +3 -35
Dockerfile +10 -0
README.md +40 -11
app.py +1408 -0
auth.py +655 -0
fix_users_table.py +180 -0
initialize_plans.py +25 -0
legal_analysis.db +0 -0
paypal_integration.py +1004 -0
requirements.txt +21 -0

.gitattributes CHANGED Viewed

@@ -1,35 +1,3 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

+*.pt filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text

Dockerfile ADDED Viewed

	@@ -0,0 +1,10 @@

+FROM python:3.10
+WORKDIR /code
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY . .
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -1,11 +1,40 @@
----
-title: Doc Analyzer
-emoji: 📈
-colorFrom: blue
-colorTo: gray
-sdk: docker
-pinned: false
-license: mit
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+---
+title: Legal Document Analysis API
+emoji: 📄
+colorFrom: blue
+colorTo: indigo
+sdk: docker
+pinned: false
+license: mit
+---
+# Legal Document Analysis API
+This API provides tools for analyzing legal documents, videos, and audio files. It uses NLP models to extract insights, summarize content, and answer legal questions.
+## Features
+- Document analysis (PDF)
+- Video and audio transcription and analysis
+- Legal question answering
+- Risk assessment and visualization
+- Contract clause analysis
+## Deployment
+This API is deployed on Hugging Face Spaces.
+## API Endpoints
+- `/analyze_document` - Analyze legal documents
+- `/analyze_legal_video` - Analyze legal videos
+- `/analyze_legal_audio` - Analyze legal audio
+- `/ask_legal_question` - Ask questions about legal documents
+## Technologies
+- FastAPI
+- Hugging Face Transformers
+- SpaCy
+- PyTorch
+- MoviePy

app.py ADDED Viewed

	@@ -0,0 +1,1408 @@

+import os
+import io
+import time
+import uuid
+import tempfile
+import numpy as np
+import matplotlib.pyplot as plt
+import pdfplumber
+import spacy
+import torch
+import sqlite3
+import uvicorn
+import moviepy.editor as mp
+from threading import Thread
+from datetime import datetime, timedelta
+from typing import List, Dict, Optional
+from fastapi import FastAPI, File, UploadFile, Form, Depends, HTTPException, status, Header
+from fastapi.responses import FileResponse, HTMLResponse, JSONResponse
+from fastapi.staticfiles import StaticFiles
+from fastapi.middleware.cors import CORSMiddleware
+import logging
+from pydantic import BaseModel
+from transformers import (
+    AutoTokenizer,
+    AutoModelForQuestionAnswering,
+    pipeline,
+    TrainingArguments,
+    Trainer
+)
+from sentence_transformers import SentenceTransformer
+from passlib.context import CryptContext
+from fastapi.security import OAuth2PasswordBearer, OAuth2PasswordRequestForm
+import jwt
+from dotenv import load_dotenv
+# Import get_db_connection from auth
+from auth import (
+    User, UserCreate, Token, get_current_active_user, authenticate_user,
+    create_access_token, hash_password, register_user, check_subscription_access,
+    SUBSCRIPTION_TIERS, JWT_EXPIRATION_DELTA, get_db_connection, update_auth_db_schema, get_subscription_plans
+)
+# Add this import near the top with your other imports
+from paypal_integration import (
+    create_user_subscription, verify_subscription_payment,
+    update_user_subscription, handle_subscription_webhook, initialize_database
+)
+from fastapi import Request  # Add this if not already imported
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger("app")
+# Initialize the database
+# Initialize FastAPI app
+app = FastAPI(
+    title="Legal Document Analysis API",
+    description="API for analyzing legal documents, videos, and audio",
+    version="1.0.0"
+)
+# Set up CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["http://localhost:3000"],  # Frontend URL
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+initialize_database()
+try:
+    update_auth_db_schema()
+    logger.info("Database schema updated successfully")
+except Exception as e:
+    logger.error(f"Database schema update error: {e}")
+# Create static directory for file storage
+os.makedirs("static", exist_ok=True)
+os.makedirs("uploads", exist_ok=True)
+os.makedirs("temp", exist_ok=True)
+app.mount("/static", StaticFiles(directory="static"), name="static")
+# Set device for model inference
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"Using device: {device}")
+# Initialize chat history
+chat_history = []
+# Document context storage
+document_contexts = {}
+def store_document_context(task_id, text):
+    """Store document text for later retrieval."""
+    document_contexts[task_id] = text
+def load_document_context(task_id):
+    """Load document text for a given task ID."""
+    return document_contexts.get(task_id, "")
+load_dotenv()
+DB_PATH = os.getenv("DB_PATH", os.path.join(os.path.dirname(__file__), "data/user_data.db"))
+os.makedirs(os.path.join(os.path.dirname(__file__), "data"), exist_ok=True)
+def fine_tune_qa_model():
+    """Fine-tunes a QA model on the CUAD dataset."""
+    print("Loading base model for fine-tuning...")
+    tokenizer = AutoTokenizer.from_pretrained("deepset/roberta-base-squad2")
+    model = AutoModelForQuestionAnswering.from_pretrained("deepset/roberta-base-squad2")
+    # Load and preprocess CUAD dataset
+    print("Loading CUAD dataset...")
+    from datasets import load_dataset
+    try:
+        dataset = load_dataset("cuad")
+    except Exception as e:
+        print(f"Error loading CUAD dataset: {str(e)}")
+        print("Downloading CUAD dataset from alternative source...")
+        # Implement alternative dataset loading here
+        return tokenizer, model
+    print(f"Dataset loaded with {len(dataset['train'])} training examples")
+    # Preprocess the dataset
+    def preprocess_function(examples):
+        questions = [q.strip() for q in examples["question"]]
+        contexts = [c.strip() for c in examples["context"]]
+        inputs = tokenizer(
+            questions,
+            contexts,
+            max_length=384,
+            truncation="only_second",
+            stride=128,
+            return_overflowing_tokens=True,
+            return_offsets_mapping=True,
+            padding="max_length",
+        )
+        offset_mapping = inputs.pop("offset_mapping")
+        sample_map = inputs.pop("overflow_to_sample_mapping")
+        answers = examples["answers"]
+        start_positions = []
+        end_positions = []
+        for i, offset in enumerate(offset_mapping):
+            sample_idx = sample_map[i]
+            answer = answers[sample_idx]
+            start_char = answer["answer_start"][0] if len(answer["answer_start"]) > 0 else 0
+            end_char = start_char + len(answer["text"][0]) if len(answer["text"]) > 0 else 0
+            sequence_ids = inputs.sequence_ids(i)
+            # Find the start and end of the context
+            idx = 0
+            while sequence_ids[idx] != 1:
+                idx += 1
+            context_start = idx
+            while idx < len(sequence_ids) and sequence_ids[idx] == 1:
+                idx += 1
+            context_end = idx - 1
+            # If the answer is not fully inside the context, label is (0, 0)
+            if offset[context_start][0] > start_char or offset[context_end][1] < end_char:
+                start_positions.append(0)
+                end_positions.append(0)
+            else:
+                # Otherwise it's the start and end token positions
+                idx = context_start
+                while idx <= context_end and offset[idx][0] <= start_char:
+                    idx += 1
+                start_positions.append(idx - 1)
+                idx = context_end
+                while idx >= context_start and offset[idx][1] >= end_char:
+                    idx -= 1
+                end_positions.append(idx + 1)
+        inputs["start_positions"] = start_positions
+        inputs["end_positions"] = end_positions
+        return inputs
+    print("Preprocessing dataset...")
+    processed_dataset = dataset.map(
+        preprocess_function,
+        batched=True,
+        remove_columns=dataset["train"].column_names,
+    )
+    print("Splitting dataset...")
+    train_dataset = processed_dataset["train"]
+    val_dataset = processed_dataset["validation"]
+    train_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "start_positions", "end_positions"])
+    val_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "start_positions", "end_positions"])
+    training_args = TrainingArguments(
+        output_dir="./fine_tuned_legal_qa",
+        evaluation_strategy="steps",
+        eval_steps=100,
+        learning_rate=2e-5,
+        per_device_train_batch_size=16,
+        per_device_eval_batch_size=16,
+        num_train_epochs=1,
+        weight_decay=0.01,
+        logging_steps=50,
+        save_steps=100,
+        load_best_model_at_end=True,
+        report_to=[]
+    )
+    print("✅ Starting fine tuning on CUAD QA dataset...")
+    trainer = Trainer(
+        model=model,
+        args=training_args,
+        train_dataset=train_dataset,
+        eval_dataset=val_dataset,
+        tokenizer=tokenizer,
+    )
+    trainer.train()
+    print("✅ Fine tuning completed. Saving model...")
+    model.save_pretrained("./fine_tuned_legal_qa")
+    tokenizer.save_pretrained("./fine_tuned_legal_qa")
+    return tokenizer, model
+#############################
+#    Load NLP Models       #
+#############################
+# Initialize model variables
+nlp = None
+summarizer = None
+embedding_model = None
+ner_model = None
+speech_to_text = None
+cuad_model = None
+cuad_tokenizer = None
+qa_model = None
+# Add model caching functionality
+import pickle
+import os.path
+MODELS_CACHE_DIR = "c:\\Users\\hardi\\OneDrive\\Desktop\\New folder (7)\\doc-vid-analyze-main\\models_cache"
+os.makedirs(MODELS_CACHE_DIR, exist_ok=True)
+def save_model_to_cache(model, model_name):
+    """Save a model to the cache directory"""
+    try:
+        cache_path = os.path.join(MODELS_CACHE_DIR, f"{model_name}.pkl")
+        with open(cache_path, 'wb') as f:
+            pickle.dump(model, f)
+        print(f"✅ Saved {model_name} to cache")
+        return True
+    except Exception as e:
+        print(f"⚠️ Failed to save {model_name} to cache: {str(e)}")
+        return False
+def load_model_from_cache(model_name):
+    """Load a model from the cache directory"""
+    try:
+        cache_path = os.path.join(MODELS_CACHE_DIR, f"{model_name}.pkl")
+        if os.path.exists(cache_path):
+            with open(cache_path, 'rb') as f:
+                model = pickle.load(f)
+            print(f"✅ Loaded {model_name} from cache")
+            return model
+        return None
+    except Exception as e:
+        print(f"⚠️ Failed to load {model_name} from cache: {str(e)}")
+        return None
+# Add a flag to control model loading
+LOAD_MODELS = os.getenv("LOAD_MODELS", "True").lower() in ("true", "1", "t")
+try:
+    if LOAD_MODELS:
+        # Try to load SpaCy from cache first
+        nlp = load_model_from_cache("spacy_model")
+        if nlp is None:
+            try:
+                nlp = spacy.load("en_core_web_sm")
+                save_model_to_cache(nlp, "spacy_model")
+            except:
+                print("⚠️ SpaCy model not found, downloading...")
+                spacy.cli.download("en_core_web_sm")
+                nlp = spacy.load("en_core_web_sm")
+                save_model_to_cache(nlp, "spacy_model")
+        print("✅ Loading NLP models...")
+        # Load the summarizer with caching
+        print("Loading summarizer model...")
+        summarizer = load_model_from_cache("summarizer_model")
+        if summarizer is None:
+            try:
+                summarizer = pipeline("summarization", model="facebook/bart-large-cnn",
+                                device=0 if torch.cuda.is_available() else -1)
+                save_model_to_cache(summarizer, "summarizer_model")
+                print("✅ Summarizer loaded successfully")
+            except Exception as e:
+                print(f"⚠️ Error loading summarizer: {str(e)}")
+                try:
+                    print("Trying alternative summarizer model...")
+                    summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6",
+                                        device=0 if torch.cuda.is_available() else -1)
+                    save_model_to_cache(summarizer, "summarizer_model")
+                    print("✅ Alternative summarizer loaded successfully")
+                except Exception as e2:
+                    print(f"⚠️ Error loading alternative summarizer: {str(e2)}")
+                    summarizer = None
+        # Load the embedding model with caching
+        print("Loading embedding model...")
+        embedding_model = load_model_from_cache("embedding_model")
+        if embedding_model is None:
+            try:
+                embedding_model = SentenceTransformer("all-mpnet-base-v2", device=device)
+                save_model_to_cache(embedding_model, "embedding_model")
+                print("✅ Embedding model loaded successfully")
+            except Exception as e:
+                print(f"⚠️ Error loading embedding model: {str(e)}")
+                embedding_model = None
+        # Load the NER model with caching
+        print("Loading NER model...")
+        ner_model = load_model_from_cache("ner_model")
+        if ner_model is None:
+            try:
+                ner_model = pipeline("ner", model="dslim/bert-base-NER",
+                                device=0 if torch.cuda.is_available() else -1)
+                save_model_to_cache(ner_model, "ner_model")
+                print("✅ NER model loaded successfully")
+            except Exception as e:
+                print(f"⚠️ Error loading NER model: {str(e)}")
+                ner_model = None
+        # Speech to text model with caching
+        print("Loading speech to text model...")
+        speech_to_text = load_model_from_cache("speech_to_text_model")
+        if speech_to_text is None:
+            try:
+                speech_to_text = pipeline("automatic-speech-recognition",
+                                    model="openai/whisper-medium",
+                                    chunk_length_s=30,
+                                    device_map="auto" if torch.cuda.is_available() else "cpu")
+                save_model_to_cache(speech_to_text, "speech_to_text_model")
+                print("✅ Speech to text model loaded successfully")
+            except Exception as e:
+                print(f"⚠️ Error loading speech to text model: {str(e)}")
+                speech_to_text = None
+        # Load the fine-tuned model with caching
+        print("Loading fine-tuned CUAD QA model...")
+        cuad_model = load_model_from_cache("cuad_model")
+        cuad_tokenizer = load_model_from_cache("cuad_tokenizer")
+        if cuad_model is None or cuad_tokenizer is None:
+            try:
+                cuad_tokenizer = AutoTokenizer.from_pretrained("hardik8588/fine-tuned-legal-qa")
+                from transformers import AutoModelForQuestionAnswering
+                cuad_model = AutoModelForQuestionAnswering.from_pretrained("hardik8588/fine-tuned-legal-qa")
+                cuad_model.to(device)
+                save_model_to_cache(cuad_tokenizer, "cuad_tokenizer")
+                save_model_to_cache(cuad_model, "cuad_model")
+                print("✅ Successfully loaded fine-tuned model")
+            except Exception as e:
+                print(f"⚠️ Error loading fine-tuned model: {str(e)}")
+                print("⚠️ Falling back to pre-trained model...")
+                try:
+                    cuad_tokenizer = AutoTokenizer.from_pretrained("deepset/roberta-base-squad2")
+                    from transformers import AutoModelForQuestionAnswering
+                    cuad_model = AutoModelForQuestionAnswering.from_pretrained("deepset/roberta-base-squad2")
+                    cuad_model.to(device)
+                    save_model_to_cache(cuad_tokenizer, "cuad_tokenizer")
+                    save_model_to_cache(cuad_model, "cuad_model")
+                    print("✅ Pre-trained model loaded successfully")
+                except Exception as e2:
+                    print(f"⚠️ Error loading pre-trained model: {str(e2)}")
+                    cuad_model = None
+                    cuad_tokenizer = None
+        # Load a general QA model with caching
+        print("Loading general QA model...")
+        qa_model = load_model_from_cache("qa_model")
+        if qa_model is None:
+            try:
+                qa_model = pipeline("question-answering", model="deepset/roberta-base-squad2")
+                save_model_to_cache(qa_model, "qa_model")
+                print("✅ QA model loaded successfully")
+            except Exception as e:
+                print(f"⚠️ Error loading QA model: {str(e)}")
+                qa_model = None
+        print("✅ All models loaded successfully")
+    else:
+        print("⚠️ Model loading skipped (LOAD_MODELS=False)")
+except Exception as e:
+    print(f"⚠️ Error loading models: {str(e)}")
+    # Instead of raising an error, set fallback behavior
+    nlp = None
+    summarizer = None
+    embedding_model = None
+    ner_model = None
+    speech_to_text = None
+    cuad_model = None
+    cuad_tokenizer = None
+    qa_model = None
+    print("⚠️ Running with limited functionality due to model loading errors")
+def legal_chatbot(user_input, context):
+    """Uses a real NLP model for legal Q&A."""
+    global chat_history
+    chat_history.append({"role": "user", "content": user_input})
+    response = qa_model(question=user_input, context=context)["answer"]
+    chat_history.append({"role": "assistant", "content": response})
+    return response
+def extract_text_from_pdf(pdf_file):
+    """Extracts text from a PDF file using pdfplumber."""
+    try:
+        # Suppress pdfplumber warnings about CropBox
+        import logging
+        logging.getLogger("pdfminer").setLevel(logging.ERROR)
+        with pdfplumber.open(pdf_file) as pdf:
+            print(f"Processing PDF with {len(pdf.pages)} pages")
+            text = ""
+            for i, page in enumerate(pdf.pages):
+                page_text = page.extract_text() or ""
+                text += page_text + "\n"
+                if (i + 1) % 10 == 0:  # Log progress every 10 pages
+                    print(f"Processed {i + 1} pages...")
+            print(f"✅ PDF text extraction complete: {len(text)} characters extracted")
+        return text.strip() if text else None
+    except Exception as e:
+        print(f"❌ PDF extraction error: {str(e)}")
+        raise HTTPException(status_code=400, detail=f"PDF extraction failed: {str(e)}")
+def process_video_to_text(video_file_path):
+    """Extract audio from video and convert to text."""
+    try:
+        print(f"Processing video file at {video_file_path}")
+        temp_audio_path = os.path.join("temp", "extracted_audio.wav")
+        video = mp.VideoFileClip(video_file_path)
+        video.audio.write_audiofile(temp_audio_path, codec='pcm_s16le')
+        print(f"Audio extracted to {temp_audio_path}")
+        result = speech_to_text(temp_audio_path)
+        transcript = result["text"]
+        print(f"Transcription completed: {len(transcript)} characters")
+        if os.path.exists(temp_audio_path):
+            os.remove(temp_audio_path)
+        return transcript
+    except Exception as e:
+        print(f"Error in video processing: {str(e)}")
+        raise HTTPException(status_code=400, detail=f"Video processing failed: {str(e)}")
+def process_audio_to_text(audio_file_path):
+    """Process audio file and convert to text."""
+    try:
+        print(f"Processing audio file at {audio_file_path}")
+        result = speech_to_text(audio_file_path)
+        transcript = result["text"]
+        print(f"Transcription completed: {len(transcript)} characters")
+        return transcript
+    except Exception as e:
+        print(f"Error in audio processing: {str(e)}")
+        raise HTTPException(status_code=400, detail=f"Audio processing failed: {str(e)}")
+def extract_named_entities(text):
+    """Extracts named entities from legal text."""
+    max_length = 10000
+    entities = []
+    for i in range(0, len(text), max_length):
+        chunk = text[i:i+max_length]
+        doc = nlp(chunk)
+        entities.extend([{"entity": ent.text, "label": ent.label_} for ent in doc.ents])
+    return entities
+def analyze_risk(text):
+    """Analyzes legal risk in the document using keyword-based analysis."""
+    risk_keywords = {
+        "Liability": ["liability", "responsible", "responsibility", "legal obligation"],
+        "Termination": ["termination", "breach", "contract end", "default"],
+        "Indemnification": ["indemnification", "indemnify", "hold harmless", "compensate", "compensation"],
+        "Payment Risk": ["payment", "terms", "reimbursement", "fee", "schedule", "invoice", "money"],
+        "Insurance": ["insurance", "coverage", "policy", "claims"],
+    }
+    risk_scores = {category: 0 for category in risk_keywords}
+    lower_text = text.lower()
+    for category, keywords in risk_keywords.items():
+        for keyword in keywords:
+            risk_scores[category] += lower_text.count(keyword.lower())
+    return risk_scores
+def extract_context_for_risk_terms(text, risk_keywords, window=1):
+    """
+    Extracts and summarizes the context around risk terms.
+    """
+    doc = nlp(text)
+    sentences = list(doc.sents)
+    risk_contexts = {category: [] for category in risk_keywords}
+    for i, sent in enumerate(sentences):
+        sent_text_lower = sent.text.lower()
+        for category, details in risk_keywords.items():
+            for keyword in details["keywords"]:
+                if keyword.lower() in sent_text_lower:
+                    start_idx = max(0, i - window)
+                    end_idx = min(len(sentences), i + window + 1)
+                    context_chunk = " ".join([s.text for s in sentences[start_idx:end_idx]])
+                    risk_contexts[category].append(context_chunk)
+    summarized_contexts = {}
+    for category, contexts in risk_contexts.items():
+        if contexts:
+            combined_context = " ".join(contexts)
+            try:
+                summary_result = summarizer(combined_context, max_length=100, min_length=30, do_sample=False)
+                summary = summary_result[0]['summary_text']
+            except Exception as e:
+                summary = "Context summarization failed."
+            summarized_contexts[category] = summary
+        else:
+            summarized_contexts[category] = "No contextual details found."
+    return summarized_contexts
+def get_detailed_risk_info(text):
+    """
+    Returns detailed risk information by merging risk scores with descriptive details
+    and contextual summaries from the document.
+    """
+    risk_details = {
+        "Liability": {
+            "description": "Liability refers to the legal responsibility for losses or damages.",
+            "common_concerns": "Broad liability clauses may expose parties to unforeseen risks.",
+            "recommendations": "Review and negotiate clear limits on liability.",
+            "example": "E.g., 'The party shall be liable for direct damages due to negligence.'"
+        },
+        "Termination": {
+            "description": "Termination involves conditions under which a contract can be ended.",
+            "common_concerns": "Unilateral termination rights or ambiguous conditions can be risky.",
+            "recommendations": "Ensure termination clauses are balanced and include notice periods.",
+            "example": "E.g., 'Either party may terminate the agreement with 30 days notice.'"
+        },
+        "Indemnification": {
+            "description": "Indemnification requires one party to compensate for losses incurred by the other.",
+            "common_concerns": "Overly broad indemnification can shift significant risk.",
+            "recommendations": "Negotiate clear limits and carve-outs where necessary.",
+            "example": "E.g., 'The seller shall indemnify the buyer against claims from product defects.'"
+        },
+        "Payment Risk": {
+            "description": "Payment risk pertains to terms regarding fees, schedules, and reimbursements.",
+            "common_concerns": "Vague payment terms or hidden charges increase risk.",
+            "recommendations": "Clarify payment conditions and include penalties for delays.",
+            "example": "E.g., 'Payments must be made within 30 days, with a 2% late fee thereafter.'"
+        },
+        "Insurance": {
+            "description": "Insurance risk covers the adequacy and scope of required coverage.",
+            "common_concerns": "Insufficient insurance can leave parties exposed in unexpected events.",
+            "recommendations": "Review insurance requirements to ensure they meet the risk profile.",
+            "example": "E.g., 'The contractor must maintain liability insurance with at least $1M coverage.'"
+        }
+    }
+    risk_scores = analyze_risk(text)
+    risk_keywords_context = {
+        "Liability": {"keywords": ["liability", "responsible", "responsibility", "legal obligation"]},
+        "Termination": {"keywords": ["termination", "breach", "contract end", "default"]},
+        "Indemnification": {"keywords": ["indemnification", "indemnify", "hold harmless", "compensate", "compensation"]},
+        "Payment Risk": {"keywords": ["payment", "terms", "reimbursement", "fee", "schedule", "invoice", "money"]},
+        "Insurance": {"keywords": ["insurance", "coverage", "policy", "claims"]}
+    }
+    risk_contexts = extract_context_for_risk_terms(text, risk_keywords_context, window=1)
+    detailed_info = {}
+    for risk_term, score in risk_scores.items():
+        if score > 0:
+            info = risk_details.get(risk_term, {"description": "No details available."})
+            detailed_info[risk_term] = {
+                "score": score,
+                "description": info.get("description", ""),
+                "common_concerns": info.get("common_concerns", ""),
+                "recommendations": info.get("recommendations", ""),
+                "example": info.get("example", ""),
+                "context_summary": risk_contexts.get(risk_term, "No context available.")
+            }
+    return detailed_info
+def analyze_contract_clauses(text):
+    """Analyzes contract clauses using the fine-tuned CUAD QA model."""
+    max_length = 512
+    step = 256
+    clauses_detected = []
+    try:
+        clause_types = list(cuad_model.config.id2label.values())
+    except Exception as e:
+        clause_types = [
+            "Obligations of Seller", "Governing Law", "Termination", "Indemnification",
+            "Confidentiality", "Insurance", "Non-Compete", "Change of Control",
+            "Assignment", "Warranty", "Limitation of Liability", "Arbitration",
+            "IP Rights", "Force Majeure", "Revenue/Profit Sharing", "Audit Rights"
+        ]
+    chunks = [text[i:i+max_length] for i in range(0, len(text), step) if i+step < len(text)]
+    for chunk in chunks:
+        inputs = cuad_tokenizer(chunk, return_tensors="pt", truncation=True, max_length=512).to(device)
+        with torch.no_grad():
+            outputs = cuad_model(**inputs)
+        predictions = torch.sigmoid(outputs.start_logits).cpu().numpy()[0]
+        for idx, confidence in enumerate(predictions):
+            if confidence > 0.5 and idx < len(clause_types):
+                clauses_detected.append({"type": clause_types[idx], "confidence": float(confidence)})
+    aggregated_clauses = {}
+    for clause in clauses_detected:
+        clause_type = clause["type"]
+        if clause_type not in aggregated_clauses or clause["confidence"] > aggregated_clauses[clause_type]["confidence"]:
+            aggregated_clauses[clause_type] = clause
+    return list(aggregated_clauses.values())
+def summarize_text(text):
+    """Summarizes legal text using the summarizer model."""
+    try:
+        if summarizer is None:
+            return "Basic analysis (NLP models not available)"
+        # Split text into chunks if it's too long
+        max_chunk_size = 1024
+        if len(text) > max_chunk_size:
+            chunks = [text[i:i+max_chunk_size] for i in range(0, len(text), max_chunk_size)]
+            summaries = []
+            for chunk in chunks:
+                summary = summarizer(chunk, max_length=100, min_length=30, do_sample=False)
+                summaries.append(summary[0]['summary_text'])
+            return " ".join(summaries)
+        else:
+            summary = summarizer(text, max_length=100, min_length=30, do_sample=False)
+            return summary[0]['summary_text']
+    except Exception as e:
+        print(f"Error in summarization: {str(e)}")
+        return "Summarization failed. Please try again later."
+@app.post("/analyze_legal_document")
+async def analyze_legal_document(
+    file: UploadFile = File(...),
+    current_user: User = Depends(get_current_active_user)
+):
+    """Analyzes a legal document (PDF) and returns insights based on subscription tier."""
+    try:
+        # Calculate file size in MB
+        file_content = await file.read()
+        file_size_mb = len(file_content) / (1024 * 1024)
+        # Check subscription access for document analysis
+        check_subscription_access(current_user, "document_analysis", file_size_mb)
+        print(f"Processing file: {file.filename}")
+        # Create a temporary file to store the uploaded PDF
+        with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp:
+            tmp.write(file_content)
+            tmp_path = tmp.name
+        # Extract text from PDF
+        text = extract_text_from_pdf(tmp_path)
+        # Clean up the temporary file
+        os.unlink(tmp_path)
+        if not text:
+            raise HTTPException(status_code=400, detail="Could not extract text from PDF")
+        # Generate a task ID
+        task_id = str(uuid.uuid4())
+        # Store document context for later retrieval
+        store_document_context(task_id, text)
+        # Basic analysis available to all tiers
+        summary = summarize_text(text)
+        entities = extract_named_entities(text)
+        risk_scores = analyze_risk(text)
+        # Prepare response based on subscription tier
+        response = {
+            "task_id": task_id,
+            "summary": summary,
+            "entities": entities,
+            "risk_assessment": risk_scores,
+            "subscription_tier": current_user.subscription_tier
+        }
+        # Add premium features if user has access
+        if current_user.subscription_tier == "premium_tier":
+            # Add detailed risk assessment
+            if "detailed_risk_assessment" in SUBSCRIPTION_TIERS[current_user.subscription_tier]["features"]:
+                detailed_risk = get_detailed_risk_info(text)
+                response["detailed_risk_assessment"] = detailed_risk
+            # Add contract clause analysis
+            if "contract_clause_analysis" in SUBSCRIPTION_TIERS[current_user.subscription_tier]["features"]:
+                clauses = analyze_contract_clauses(text)
+                response["contract_clauses"] = clauses
+        return response
+    except Exception as e:
+        print(f"Error analyzing document: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Error analyzing document: {str(e)}")
+@app.post("/analyze_legal_video")
+async def analyze_legal_video(
+    file: UploadFile = File(...),
+    current_user: User = Depends(get_current_active_user)
+):
+    """Analyzes legal video by transcribing and analyzing the transcript."""
+    try:
+        # Calculate file size in MB
+        file_content = await file.read()
+        file_size_mb = len(file_content) / (1024 * 1024)
+        # Check subscription access for video analysis
+        check_subscription_access(current_user, "video_analysis", file_size_mb)
+        print(f"Processing video file: {file.filename}")
+        # Create a temporary file to store the uploaded video
+        with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as tmp:
+            tmp.write(file_content)
+            tmp_path = tmp.name
+        # Process video to extract transcript
+        transcript = process_video_to_text(tmp_path)
+        # Clean up the temporary file
+        os.unlink(tmp_path)
+        if not transcript:
+            raise HTTPException(status_code=400, detail="Could not extract transcript from video")
+        # Generate a task ID
+        task_id = str(uuid.uuid4())
+        # Store document context for later retrieval
+        store_document_context(task_id, transcript)
+        # Basic analysis
+        summary = summarize_text(transcript)
+        entities = extract_named_entities(transcript)
+        risk_scores = analyze_risk(transcript)
+        # Prepare response
+        response = {
+            "task_id": task_id,
+            "transcript": transcript,
+            "summary": summary,
+            "entities": entities,
+            "risk_assessment": risk_scores,
+            "subscription_tier": current_user.subscription_tier
+        }
+        # Add premium features if user has access
+        if current_user.subscription_tier == "premium_tier":
+            # Add detailed risk assessment
+            if "detailed_risk_assessment" in SUBSCRIPTION_TIERS[current_user.subscription_tier]["features"]:
+                detailed_risk = get_detailed_risk_info(transcript)
+                response["detailed_risk_assessment"] = detailed_risk
+        return response
+    except Exception as e:
+        print(f"Error analyzing video: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Error analyzing video: {str(e)}")
+@app.post("/legal_chatbot/{task_id}")
+async def chat_with_document(
+    task_id: str,
+    question: str = Form(...),
+    current_user: User = Depends(get_current_active_user)
+):
+    """Chat with a document using the legal chatbot."""
+    try:
+        # Check if user has access to chatbot feature
+        if "chatbot" not in SUBSCRIPTION_TIERS[current_user.subscription_tier]["features"]:
+            raise HTTPException(
+                status_code=403,
+                detail=f"The chatbot feature is not available in your {current_user.subscription_tier} subscription. Please upgrade to access this feature."
+            )
+        # Check if document context exists
+        context = load_document_context(task_id)
+        if not context:
+            raise HTTPException(status_code=404, detail="Document context not found. Please analyze a document first.")
+        # Use the chatbot to answer the question
+        answer = legal_chatbot(question, context)
+        return {"answer": answer, "chat_history": chat_history}
+    except Exception as e:
+        print(f"Error in chatbot: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Error in chatbot: {str(e)}")
+@app.get("/")
+async def root():
+    """Root endpoint that returns a welcome message."""
+    return HTMLResponse(content="""
+    <html>
+        <head>
+            <title>Legal Document Analysis API</title>
+            <style>
+                body {
+                    font-family: Arial, sans-serif;
+                    max-width: 800px;
+                    margin: 0 auto;
+                    padding: 20px;
+                }
+                h1 {
+                    color: #2c3e50;
+                }
+                .endpoint {
+                    background-color: #f8f9fa;
+                    padding: 15px;
+                    margin-bottom: 10px;
+                    border-radius: 5px;
+                }
+                .method {
+                    font-weight: bold;
+                    color: #e74c3c;
+                }
+            </style>
+        </head>
+        <body>
+            <h1>Legal Document Analysis API</h1>
+            <p>Welcome to the Legal Document Analysis API. This API provides tools for analyzing legal documents, videos, and audio.</p>
+            <h2>Available Endpoints:</h2>
+            <div class="endpoint">
+                <p><span class="method">POST</span> /analyze_legal_document - Analyze a legal document (PDF)</p>
+            </div>
+            <div class="endpoint">
+                <p><span class="method">POST</span> /analyze_legal_video - Analyze a legal video</p>
+            </div>
+            <div class="endpoint">
+                <p><span class="method">POST</span> /analyze_legal_audio - Analyze legal audio</p>
+            </div>
+            <div class="endpoint">
+                <p><span class="method">POST</span> /legal_chatbot/{task_id} - Chat with a document</p>
+            </div>
+            <div class="endpoint">
+                <p><span class="method">POST</span> /register - Register a new user</p>
+            </div>
+            <div class="endpoint">
+                <p><span class="method">POST</span> /token - Login to get an access token</p>
+            </div>
+            <div class="endpoint">
+                <p><span class="method">GET</span> /users/me - Get current user information</p>
+            </div>
+            <div class="endpoint">
+                <p><span class="method">POST</span> /subscribe/{tier} - Subscribe to a plan</p>
+            </div>
+            <p>For more details, visit the <a href="/docs">API documentation</a>.</p>
+        </body>
+    </html>
+    """)
+@app.post("/register", response_model=Token)
+async def register_new_user(user_data: UserCreate):
+    """Register a new user with a free subscription"""
+    try:
+        success, result = register_user(user_data.email, user_data.password)
+        if not success:
+            raise HTTPException(status_code=400, detail=result)
+        return {"access_token": result["access_token"], "token_type": "bearer"}
+    except HTTPException:
+        # Re-raise HTTP exceptions
+        raise
+    except Exception as e:
+        print(f"Registration error: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Registration failed: {str(e)}")
+@app.post("/token", response_model=Token)
+async def login_for_access_token(form_data: OAuth2PasswordRequestForm = Depends()):
+    """Endpoint for OAuth2 token generation"""
+    try:
+        # Add debug logging
+        logger.info(f"Token request for username: {form_data.username}")
+        user = authenticate_user(form_data.username, form_data.password)
+        if not user:
+            logger.warning(f"Authentication failed for: {form_data.username}")
+            raise HTTPException(
+                status_code=status.HTTP_401_UNAUTHORIZED,
+                detail="Incorrect username or password",
+                headers={"WWW-Authenticate": "Bearer"},
+            )
+        access_token = create_access_token(user.id)
+        if not access_token:
+            logger.error(f"Failed to create access token for user: {user.id}")
+            raise HTTPException(
+                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+                detail="Could not create access token",
+            )
+        logger.info(f"Login successful for: {form_data.username}")
+        return {"access_token": access_token, "token_type": "bearer"}
+    except Exception as e:
+        logger.error(f"Token endpoint error: {e}")
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Login error: {str(e)}",
+        )
+@app.get("/debug/token")
+async def debug_token(authorization: str = Header(None)):
+    """Debug endpoint to check token validity"""
+    try:
+        if not authorization:
+            return {"valid": False, "error": "No authorization header provided"}
+        # Extract token from Authorization header
+        scheme, token = authorization.split()
+        if scheme.lower() != 'bearer':
+            return {"valid": False, "error": "Not a bearer token"}
+        # Log the token for debugging
+        logger.info(f"Debugging token: {token[:10]}...")
+        # Try to validate the token
+        try:
+            user = await get_current_active_user(token)
+            return {"valid": True, "user_id": user.id, "email": user.email}
+        except Exception as e:
+            return {"valid": False, "error": str(e)}
+    except Exception as e:
+        return {"valid": False, "error": f"Token debug error: {str(e)}"}
+@app.post("/login")
+async def api_login(email: str, password: str):
+    success, result = login_user(email, password)
+    if not success:
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail=result
+        )
+    return result
+@app.get("/health")
+def health_check():
+    """Simple health check endpoint to verify the API is running"""
+    return {"status": "ok", "message": "API is running"}
+@app.get("/users/me", response_model=User)
+async def read_users_me(current_user: User = Depends(get_current_active_user)):
+    return current_user
+@app.post("/analyze_legal_audio")
+async def analyze_legal_audio(
+    file: UploadFile = File(...),
+    current_user: User = Depends(get_current_active_user)
+):
+    """Analyzes legal audio by transcribing and analyzing the transcript."""
+    try:
+        # Calculate file size in MB
+        file_content = await file.read()
+        file_size_mb = len(file_content) / (1024 * 1024)
+        # Check subscription access for audio analysis
+        check_subscription_access(current_user, "audio_analysis", file_size_mb)
+        print(f"Processing audio file: {file.filename}")
+        # Create a temporary file to store the uploaded audio
+        with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp:
+            tmp.write(file_content)
+            tmp_path = tmp.name
+        # Process audio to extract transcript
+        transcript = process_audio_to_text(tmp_path)
+        # Clean up the temporary file
+        os.unlink(tmp_path)
+        if not transcript:
+            raise HTTPException(status_code=400, detail="Could not extract transcript from audio")
+        # Generate a task ID
+        task_id = str(uuid.uuid4())
+        # Store document context for later retrieval
+        store_document_context(task_id, transcript)
+        # Basic analysis
+        summary = summarize_text(transcript)
+        entities = extract_named_entities(transcript)
+        risk_scores = analyze_risk(transcript)
+        # Prepare response
+        response = {
+            "task_id": task_id,
+            "transcript": transcript,
+            "summary": summary,
+            "entities": entities,
+            "risk_assessment": risk_scores,
+            "subscription_tier": current_user.subscription_tier
+        }
+        # Add premium features if user has access
+        if current_user.subscription_tier == "premium_tier":  # Change from premium_tier to premium
+            # Add detailed risk assessment
+            if "detailed_risk_assessment" in SUBSCRIPTION_TIERS[current_user.subscription_tier]["features"]:
+                detailed_risk = get_detailed_risk_info(transcript)
+                response["detailed_risk_assessment"] = detailed_risk
+        return response
+    except Exception as e:
+        print(f"Error analyzing audio: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Error analyzing audio: {str(e)}")
+# Add these new endpoints before the if __name__ == "__main__" line
+@app.get("/users/me/subscription")
+async def get_user_subscription(current_user: User = Depends(get_current_active_user)):
+    """Get the current user's subscription details"""
+    try:
+        # Get subscription details from database
+        conn = get_db_connection()
+        cursor = conn.cursor()
+        # Get the most recent active subscription
+        try:
+            cursor.execute(
+                "SELECT id, tier, status, created_at, expires_at, paypal_subscription_id FROM subscriptions "
+                "WHERE user_id = ? AND status = 'active' ORDER BY created_at DESC LIMIT 1",
+                (current_user.id,)
+            )
+            subscription = cursor.fetchone()
+        except sqlite3.OperationalError as e:
+            # Handle missing tier column
+            if "no such column: tier" in str(e):
+                logger.warning("Subscriptions table missing 'tier' column. Returning default subscription.")
+                subscription = None
+            else:
+                raise
+        # Get subscription tiers with pricing directly from SUBSCRIPTION_TIERS
+        subscription_tiers = {
+            "free_tier": {
+                "price": SUBSCRIPTION_TIERS["free_tier"]["price"],
+                "currency": SUBSCRIPTION_TIERS["free_tier"]["currency"],
+                "features": SUBSCRIPTION_TIERS["free_tier"]["features"]
+            },
+            "standard_tier": {
+                "price": SUBSCRIPTION_TIERS["standard_tier"]["price"],
+                "currency": SUBSCRIPTION_TIERS["standard_tier"]["currency"],
+                "features": SUBSCRIPTION_TIERS["standard_tier"]["features"]
+            },
+            "premium_tier": {
+                "price": SUBSCRIPTION_TIERS["premium_tier"]["price"],
+                "currency": SUBSCRIPTION_TIERS["premium_tier"]["currency"],
+                "features": SUBSCRIPTION_TIERS["premium_tier"]["features"]
+            }
+        }
+        if subscription:
+            sub_id, tier, status, created_at, expires_at, paypal_id = subscription
+            result = {
+                "id": sub_id,
+                "tier": tier,
+                "status": status,
+                "created_at": created_at,
+                "expires_at": expires_at,
+                "paypal_subscription_id": paypal_id,
+                "current_tier": current_user.subscription_tier,
+                "subscription_tiers": subscription_tiers
+            }
+        else:
+            result = {
+                "tier": "free_tier",
+                "status": "active",
+                "current_tier": current_user.subscription_tier,
+                "subscription_tiers": subscription_tiers
+            }
+        conn.close()
+        return result
+    except Exception as e:
+        logger.error(f"Error getting subscription: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Error getting subscription: {str(e)}")
+# Add this model definition before your endpoints
+class SubscriptionCreate(BaseModel):
+    tier: str
+@app.post("/create_subscription")
+async def create_subscription(
+    subscription: SubscriptionCreate,
+    current_user: User = Depends(get_current_active_user)
+):
+    """Create a subscription for the current user"""
+    try:
+        # Log the request for debugging
+        logger.info(f"Creating subscription for user {current_user.email} with tier {subscription.tier}")
+        logger.info(f"Available tiers: {list(SUBSCRIPTION_TIERS.keys())}")
+        # Validate tier
+        valid_tiers = ["standard_tier", "premium_tier"]
+        if subscription.tier not in valid_tiers:
+            logger.warning(f"Invalid tier requested: {subscription.tier}")
+            raise HTTPException(status_code=400, detail=f"Invalid tier: {subscription.tier}. Must be one of {valid_tiers}")
+        # Create subscription
+        logger.info(f"Calling create_user_subscription with email: {current_user.email}, tier: {subscription.tier}")
+        success, result = create_user_subscription(current_user.email, subscription.tier)
+        if not success:
+            logger.error(f"Failed to create subscription: {result}")
+            raise HTTPException(status_code=400, detail=result)
+        logger.info(f"Subscription created successfully: {result}")
+        return result
+    except Exception as e:
+        logger.error(f"Error creating subscription: {str(e)}")
+        # Include the full traceback for better debugging
+        import traceback
+        logger.error(f"Traceback: {traceback.format_exc()}")
+        raise HTTPException(status_code=500, detail=f"Error creating subscription: {str(e)}")
+@app.post("/subscribe/{tier}")
+async def subscribe_to_tier(
+    tier: str,
+    current_user: User = Depends(get_current_active_user)
+):
+    """Subscribe to a specific tier"""
+    try:
+        # Validate tier
+        valid_tiers = ["standard_tier", "premium_tier"]
+        if tier not in valid_tiers:
+            raise HTTPException(status_code=400, detail=f"Invalid tier: {tier}. Must be one of {valid_tiers}")
+        # Create subscription
+        success, result = create_user_subscription(current_user.email, tier)
+        if not success:
+            raise HTTPException(status_code=400, detail=result)
+        return result
+    except Exception as e:
+        logger.error(f"Error creating subscription: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Error creating subscription: {str(e)}")
+@app.post("/subscription/create")
+async def create_subscription(request: Request, current_user: User = Depends(get_current_active_user)):
+    """Create a subscription for the current user"""
+    try:
+        data = await request.json()
+        tier = data.get("tier")
+        if not tier:
+            return JSONResponse(
+                status_code=400,
+                content={"detail": "Tier is required"}
+            )
+        # Log the request for debugging
+        logger.info(f"Creating subscription for user {current_user.email} with tier {tier}")
+        # Create the subscription using the imported function directly
+        success, result = create_user_subscription(current_user.email, tier)
+        if success:
+            # Make sure we're returning the approval_url in the response
+            logger.info(f"Subscription created successfully: {result}")
+            logger.info(f"Approval URL: {result.get('approval_url')}")
+            return {
+                "success": True,
+                "data": {
+                    "approval_url": result["approval_url"],
+                    "subscription_id": result["subscription_id"],
+                    "tier": result["tier"]
+                }
+            }
+        else:
+            logger.error(f"Failed to create subscription: {result}")
+            return JSONResponse(
+                status_code=400,
+                content={"success": False, "detail": result}
+            )
+    except Exception as e:
+        logger.error(f"Error creating subscription: {str(e)}")
+        import traceback
+        logger.error(f"Traceback: {traceback.format_exc()}")
+        return JSONResponse(
+            status_code=500,
+            content={"success": False, "detail": f"Error creating subscription: {str(e)}"}
+        )
+@app.post("/admin/initialize-paypal-plans")
+async def initialize_paypal_plans(request: Request):
+    """Initialize PayPal subscription plans"""
+    try:
+        # This should be protected with admin authentication in production
+        plans = initialize_subscription_plans()
+        if plans:
+            return JSONResponse(
+                status_code=200,
+                content={"success": True, "plans": plans}
+            )
+        else:
+            return JSONResponse(
+                status_code=500,
+                content={"success": False, "detail": "Failed to initialize plans"}
+            )
+    except Exception as e:
+        logger.error(f"Error initializing PayPal plans: {str(e)}")
+        return JSONResponse(
+            status_code=500,
+            content={"success": False, "detail": f"Error initializing plans: {str(e)}"}
+        )
+@app.post("/subscription/verify")
+async def verify_subscription(request: Request, current_user: User = Depends(get_current_active_user)):
+    """Verify a subscription after payment"""
+    try:
+        data = await request.json()
+        subscription_id = data.get("subscription_id")
+        if not subscription_id:
+            return JSONResponse(
+                status_code=400,
+                content={"success": False, "detail": "Subscription ID is required"}
+            )
+        logger.info(f"Verifying subscription: {subscription_id}")
+        # Verify the subscription with PayPal
+        success, result = verify_paypal_subscription(subscription_id)
+        if not success:
+            logger.error(f"Subscription verification failed: {result}")
+            return JSONResponse(
+                status_code=400,
+                content={"success": False, "detail": str(result)}
+            )
+        # Update the user's subscription in the database
+        conn = get_db_connection()
+        cursor = conn.cursor()
+        # Get the subscription details
+        cursor.execute(
+            "SELECT tier FROM subscriptions WHERE paypal_subscription_id = ?",
+            (subscription_id,)
+        )
+        subscription = cursor.fetchone()
+        if not subscription:
+            # This is a new subscription, get the tier from the PayPal response
+            tier = "standard_tier"  # Default to standard tier
+            # You could extract the tier from the PayPal plan ID if needed
+            # Create a new subscription record
+            sub_id = str(uuid.uuid4())
+            start_date = datetime.now()
+            expires_at = start_date + timedelta(days=30)
+            cursor.execute(
+                "INSERT INTO subscriptions (id, user_id, tier, status, created_at, expires_at, paypal_subscription_id) VALUES (?, ?, ?, ?, ?, ?, ?)",
+                (sub_id, current_user.id, tier, "active", start_date, expires_at, subscription_id)
+            )
+        else:
+            # Update existing subscription
+            tier = subscription[0]
+            cursor.execute(
+                "UPDATE subscriptions SET status = 'active' WHERE paypal_subscription_id = ?",
+                (subscription_id,)
+            )
+        # Update user's subscription tier
+        cursor.execute(
+            "UPDATE users SET subscription_tier = ? WHERE id = ?",
+            (tier, current_user.id)
+        )
+        conn.commit()
+        conn.close()
+        return JSONResponse(
+            status_code=200,
+            content={"success": True, "detail": "Subscription verified successfully"}
+        )
+    except Exception as e:
+        logger.error(f"Error verifying subscription: {str(e)}")
+        return JSONResponse(
+            status_code=500,
+            content={"success": False, "detail": f"Error verifying subscription: {str(e)}"}
+        )
+@app.post("/subscription/webhook")
+async def subscription_webhook(request: Request):
+    """Handle PayPal subscription webhooks"""
+    try:
+        payload = await request.json()
+        success, result = handle_subscription_webhook(payload)
+        if not success:
+            logger.error(f"Webhook processing failed: {result}")
+            return {"status": "error", "message": result}
+        return {"status": "success", "message": result}
+    except Exception as e:
+        logger.error(f"Error processing webhook: {str(e)}")
+        return {"status": "error", "message": f"Error processing webhook: {str(e)}"}
+@app.get("/subscription/verify/{subscription_id}")
+async def verify_subscription(
+    subscription_id: str,
+    current_user: User = Depends(get_current_active_user)
+):
+    """Verify a subscription payment and update user tier"""
+    try:
+        # Verify the subscription
+        success, result = verify_subscription_payment(subscription_id)
+        if not success:
+            raise HTTPException(status_code=400, detail=f"Subscription verification failed: {result}")
+        # Get the plan ID from the subscription to determine tier
+        plan_id = result.get("plan_id", "")
+        # Connect to DB to get the tier for this plan
+        conn = get_db_connection()
+        cursor = conn.cursor()
+        cursor.execute("SELECT tier FROM paypal_plans WHERE plan_id = ?", (plan_id,))
+        tier_result = cursor.fetchone()
+        conn.close()
+        if not tier_result:
+            raise HTTPException(status_code=400, detail="Could not determine subscription tier")
+        tier = tier_result[0]
+        # Update the user's subscription
+        success, update_result = update_user_subscription(current_user.email, subscription_id, tier)
+        if not success:
+            raise HTTPException(status_code=500, detail=f"Failed to update subscription: {update_result}")
+        return {
+            "message": f"Successfully subscribed to {tier} tier",
+            "subscription_id": subscription_id,
+            "status": result.get("status", ""),
+            "next_billing_time": result.get("billing_info", {}).get("next_billing_time", "")
+        }
+    except HTTPException:
+        raise
+    except Exception as e:
+        print(f"Subscription verification error: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Subscription verification failed: {str(e)}")
+@app.post("/webhook/paypal")
+async def paypal_webhook(request: Request):
+    """Handle PayPal subscription webhooks"""
+    try:
+        payload = await request.json()
+        logger.info(f"Received PayPal webhook: {payload.get('event_type', 'unknown event')}")
+        # Process the webhook
+        result = handle_subscription_webhook(payload)
+        return {"status": "success", "message": "Webhook processed"}
+    except Exception as e:
+        logger.error(f"Webhook processing error: {str(e)}")
+        # Return 200 even on error to acknowledge receipt to PayPal
+        return {"status": "error", "message": str(e)}
+# Add this to your startup code
+@app.on_event("startup")
+async def startup_event():
+    """Initialize subscription plans on startup"""
+    try:
+        # Initialize PayPal subscription plans if needed
+        # If you have an initialize_subscription_plans function in your paypal_integration.py,
+        # you can call it here
+        print("Application started successfully")
+    except Exception as e:
+        print(f"Error during startup: {str(e)}")
+if __name__ == "__main__":
+    uvicorn.run("app:app", host="0.0.0.0", port=8500, reload=True)

auth.py ADDED Viewed

	@@ -0,0 +1,655 @@

+import sqlite3
+import uuid
+import os
+import logging
+from datetime import datetime, timedelta
+import hashlib  # Use hashlib instead of jwt
+from passlib.hash import bcrypt
+from dotenv import load_dotenv
+from fastapi import Depends, HTTPException
+from fastapi.security import OAuth2PasswordBearer
+from pydantic import BaseModel
+from typing import Optional
+from fastapi import HTTPException, status
+import jwt
+from jwt.exceptions import PyJWTError
+import sqlite3
+# Load environment variables
+load_dotenv()
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger('auth')
+# Security configuration
+SECRET_KEY = os.getenv("JWT_SECRET", "your-secret-key-for-development-only")
+ALGORITHM = "HS256"
+JWT_EXPIRATION_DELTA = timedelta(days=1)  # Token valid for 1 day
+# Database path from environment variable or default
+# Fix the incorrect DB_PATH
+DB_PATH = os.getenv("DB_PATH", os.path.join(os.path.dirname(__file__), "data/user_data.db"))
+# FastAPI OAuth2 scheme
+oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
+# Pydantic models for FastAPI
+class User(BaseModel):
+    id: str
+    email: str
+    subscription_tier: str = "free_tier"
+    subscription_expiry: Optional[datetime] = None
+    api_calls_remaining: int = 5
+    last_reset_date: Optional[datetime] = None
+class UserCreate(BaseModel):
+    email: str
+    password: str
+class Token(BaseModel):
+    access_token: str
+    token_type: str
+class TokenData(BaseModel):
+    user_id: Optional[str] = None
+# Subscription tiers and limits
+# Update the SUBSCRIPTION_TIERS dictionary
+SUBSCRIPTION_TIERS = {
+    "free_tier": {
+        "price": 0,
+        "currency": "INR",
+        "features": ["basic_document_analysis", "basic_risk_assessment"],
+        "limits": {
+            "document_size_mb": 5,
+            "documents_per_month": 3,
+            "video_size_mb": 0,
+            "audio_size_mb": 0,
+            "daily_api_calls": 10,              # <-- Add this
+            "max_document_size_mb": 5           # <-- Add this
+        }
+    },
+    "standard_tier": {
+        "price": 799,
+        "currency": "INR",
+        "features": ["basic_document_analysis", "basic_risk_assessment", "video_analysis", "audio_analysis", "chatbot"],
+        "limits": {
+            "document_size_mb": 20,
+            "documents_per_month": 20,
+            "video_size_mb": 100,
+            "audio_size_mb": 50,
+            "daily_api_calls": 100,             # <-- Add this
+            "max_document_size_mb": 20          # <-- Add this
+        }
+    },
+    "premium_tier": {
+        "price": 1499,
+        "currency": "INR",
+        "features": ["basic_document_analysis", "basic_risk_assessment", "video_analysis", "audio_analysis", "chatbot", "detailed_risk_assessment", "contract_clause_analysis"],
+        "limits": {
+            "document_size_mb": 50,
+            "documents_per_month": 999999,
+            "video_size_mb": 500,
+            "audio_size_mb": 200,
+            "daily_api_calls": 1000,            # <-- Add this
+            "max_document_size_mb": 50          # <-- Add this
+        }
+    }
+}
+# Database connection management
+def get_db_connection():
+    """Create and return a database connection with proper error handling"""
+    try:
+        # Ensure the directory exists
+        db_dir = os.path.dirname(DB_PATH)
+        os.makedirs(db_dir, exist_ok=True)
+        conn = sqlite3.connect(DB_PATH)
+        conn.row_factory = sqlite3.Row  # Return rows as dictionaries
+        return conn
+    except sqlite3.Error as e:
+        logger.error(f"Database connection error: {e}")
+        raise Exception(f"Database connection failed: {e}")
+# Database setup
+# In the init_auth_db function, update the CREATE TABLE statement to match our schema
+def init_auth_db():
+    """Initialize the authentication database with required tables"""
+    try:
+        conn = get_db_connection()
+        c = conn.cursor()
+        # Create users table with the correct schema
+        c.execute('''
+        CREATE TABLE IF NOT EXISTS users (
+            id TEXT PRIMARY KEY,
+            email TEXT UNIQUE NOT NULL,
+            hashed_password TEXT NOT NULL,
+            password TEXT,
+            subscription_tier TEXT DEFAULT 'free_tier',
+            is_active BOOLEAN DEFAULT 1,
+            created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+            api_calls_remaining INTEGER DEFAULT 10,
+            last_reset_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+        )
+        ''')
+        # Create subscriptions table
+        c.execute('''
+        CREATE TABLE IF NOT EXISTS subscriptions (
+            id TEXT PRIMARY KEY,
+            user_id TEXT,
+            tier TEXT,
+            plan_id TEXT,
+            status TEXT,
+            created_at TIMESTAMP,
+            expires_at TIMESTAMP,
+            paypal_subscription_id TEXT,
+            FOREIGN KEY (user_id) REFERENCES users (id)
+        )
+        ''')
+        # Create usage stats table
+        c.execute('''
+        CREATE TABLE IF NOT EXISTS usage_stats (
+            id TEXT PRIMARY KEY,
+            user_id TEXT,
+            month INTEGER,
+            year INTEGER,
+            analyses_used INTEGER,
+            FOREIGN KEY (user_id) REFERENCES users (id)
+        )
+        ''')
+        # Create tokens table for refresh tokens
+        c.execute('''
+        CREATE TABLE IF NOT EXISTS refresh_tokens (
+            user_id TEXT,
+            token TEXT,
+            expires_at TIMESTAMP,
+            FOREIGN KEY (user_id) REFERENCES users (id)
+        )
+        ''')
+        conn.commit()
+        logger.info("Database initialized successfully")
+    except Exception as e:
+        logger.error(f"Database initialization error: {e}")
+        raise
+    finally:
+        if conn:
+            conn.close()
+# Initialize the database
+init_auth_db()
+# Password hashing with bcrypt
+# Update the password hashing and verification functions to use a more reliable method
+# Replace these functions
+# Remove these conflicting functions
+# def hash_password(password):
+#     """Hash a password using bcrypt"""
+#     return bcrypt.hash(password)
+#
+# def verify_password(plain_password, hashed_password):
+#     """Verify a password against its hash"""
+#     return bcrypt.verify(plain_password, hashed_password)
+# Keep only these improved functions
+def hash_password(password):
+    """Hash a password using bcrypt"""
+    # Use a more direct approach to avoid bcrypt version issues
+    import bcrypt
+    # Convert password to bytes if it's not already
+    if isinstance(password, str):
+        password = password.encode('utf-8')
+    # Generate salt and hash
+    salt = bcrypt.gensalt()
+    hashed = bcrypt.hashpw(password, salt)
+    # Return as string for storage
+    return hashed.decode('utf-8')
+def verify_password(plain_password, hashed_password):
+    """Verify a password against its hash"""
+    import bcrypt
+    # Convert inputs to bytes if they're not already
+    if isinstance(plain_password, str):
+        plain_password = plain_password.encode('utf-8')
+    if isinstance(hashed_password, str):
+        hashed_password = hashed_password.encode('utf-8')
+    try:
+        # Use direct bcrypt verification
+        return bcrypt.checkpw(plain_password, hashed_password)
+    except Exception as e:
+        logger.error(f"Password verification error: {e}")
+        return False
+# User registration
+def register_user(email, password):
+    try:
+        conn = get_db_connection()
+        c = conn.cursor()
+        # Check if user already exists
+        c.execute("SELECT * FROM users WHERE email = ?", (email,))
+        if c.fetchone():
+            return False, "Email already registered"
+        # Create new user
+        user_id = str(uuid.uuid4())
+        # Add more detailed logging
+        logger.info(f"Registering new user with email: {email}")
+        hashed_pw = hash_password(password)
+        logger.info(f"Password hashed successfully: {bool(hashed_pw)}")
+        c.execute("""
+            INSERT INTO users
+            (id, email, hashed_password, subscription_tier, api_calls_remaining, last_reset_date)
+            VALUES (?, ?, ?, ?, ?, ?)
+        """, (user_id, email, hashed_pw, "free_tier", 5, datetime.now()))
+        conn.commit()
+        logger.info(f"User registered successfully: {email}")
+        # Verify the user was actually stored
+        c.execute("SELECT * FROM users WHERE email = ?", (email,))
+        stored_user = c.fetchone()
+        logger.info(f"User verification after registration: {bool(stored_user)}")
+        access_token = create_access_token(user_id)
+        return True, {
+            "user_id": user_id,
+            "access_token": access_token,
+            "token_type": "bearer"
+        }
+    except Exception as e:
+        logger.error(f"User registration error: {e}")
+        return False, f"Registration failed: {str(e)}"
+    finally:
+        if conn:
+            conn.close()
+# User login
+# Fix the authenticate_user function
+# In the authenticate_user function, update the password verification to use hashed_password
+def authenticate_user(email, password):
+    """Authenticate a user and return user data with tokens"""
+    try:
+        conn = get_db_connection()
+        c = conn.cursor()
+        # Get user by email
+        c.execute("SELECT * FROM users WHERE email = ? AND is_active = 1", (email,))
+        user = c.fetchone()
+        if not user:
+            logger.warning(f"User not found: {email}")
+            return None
+        # Add debug logging for password verification
+        logger.info(f"Verifying password for user: {email}")
+        logger.info(f"Stored hashed password: {user['hashed_password'][:20]}...")
+        try:
+            # Check if password verification works
+            is_valid = verify_password(password, user['hashed_password'])
+            logger.info(f"Password verification result: {is_valid}")
+            if not is_valid:
+                logger.warning(f"Password verification failed for user: {email}")
+                return None
+        except Exception as e:
+            logger.error(f"Password verification error: {e}")
+            return None
+        # Update last login time if column exists
+        try:
+            c.execute("UPDATE users SET last_login = ? WHERE id = ?",
+                    (datetime.now(), user['id']))
+            conn.commit()
+        except sqlite3.OperationalError:
+            # last_login column might not exist
+            pass
+        # Convert sqlite3.Row to dict to use get() method
+        user_dict = dict(user)
+        # Create and return a User object
+        return User(
+            id=user_dict['id'],
+            email=user_dict['email'],
+            subscription_tier=user_dict.get('subscription_tier', 'free_tier'),
+            subscription_expiry=None,  # Handle this properly if needed
+            api_calls_remaining=user_dict.get('api_calls_remaining', 5),
+            last_reset_date=user_dict.get('last_reset_date')
+        )
+    except Exception as e:
+        logger.error(f"Login error: {e}")
+        return None
+    finally:
+        if conn:
+            conn.close()
+# Token generation and validation - completely replaced
+def create_access_token(user_id):
+    """Create a new access token for a user"""
+    try:
+        # Create a JWT token with user_id and expiration
+        expiration = datetime.now() + JWT_EXPIRATION_DELTA
+        # Create a token payload
+        payload = {
+            "sub": user_id,
+            "exp": expiration.timestamp()
+        }
+        # Generate the JWT token
+        token = jwt.encode(payload, SECRET_KEY, algorithm=ALGORITHM)
+        logger.info(f"Created access token for user: {user_id}")
+        return token
+    except Exception as e:
+        logger.error(f"Token creation error: {e}")
+        return None
+def update_auth_db_schema():
+    """Update the authentication database schema with any missing columns"""
+    try:
+        conn = get_db_connection()
+        c = conn.cursor()
+        # Check if tier column exists in subscriptions table
+        c.execute("PRAGMA table_info(subscriptions)")
+        columns = [column[1] for column in c.fetchall()]
+        # Add tier column if it doesn't exist
+        if "tier" not in columns:
+            logger.info("Adding 'tier' column to subscriptions table")
+            c.execute("ALTER TABLE subscriptions ADD COLUMN tier TEXT")
+            conn.commit()
+            logger.info("Database schema updated successfully")
+        conn.close()
+    except Exception as e:
+        logger.error(f"Database schema update error: {e}")
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Database schema update error: {str(e)}"
+        )
+# Add this to your get_current_user function
+async def get_current_user(token: str = Depends(oauth2_scheme)):
+    credentials_exception = HTTPException(
+        status_code=status.HTTP_401_UNAUTHORIZED,
+        detail="Could not validate credentials",
+        headers={"WWW-Authenticate": "Bearer"},
+    )
+    try:
+        # Decode the JWT token
+        payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM])
+        user_id: str = payload.get("sub")
+        if user_id is None:
+            logger.error("Token missing 'sub' field")
+            raise credentials_exception
+    except Exception as e:
+        logger.error(f"Token validation error: {str(e)}")
+        raise credentials_exception
+    # Get user from database
+    conn = get_db_connection()
+    cursor = conn.cursor()
+    cursor.execute("SELECT id, email, subscription_tier, is_active FROM users WHERE id = ?", (user_id,))
+    user_data = cursor.fetchone()
+    conn.close()
+    if user_data is None:
+        logger.error(f"User not found: {user_id}")
+        raise credentials_exception
+    user = User(
+        id=user_data[0],
+        email=user_data[1],
+        subscription_tier=user_data[2],
+        is_active=bool(user_data[3])
+    )
+    return user
+async def get_current_active_user(current_user: User = Depends(get_current_user)):
+    """Get the current active user"""
+    return current_user
+def create_user_subscription(email, tier):
+    """Create a subscription for a user"""
+    try:
+        # Get user by email
+        conn = get_db_connection()
+        c = conn.cursor()
+        # Get user ID
+        c.execute("SELECT id FROM users WHERE email = ?", (email,))
+        user_data = c.fetchone()
+        if not user_data:
+            return False, "User not found"
+        user_id = user_data['id']
+        # Check if tier is valid
+        valid_tiers = ["standard_tier", "premium_tier"]
+        if tier not in valid_tiers:
+            return False, f"Invalid tier: {tier}. Must be one of {valid_tiers}"
+        # Create subscription
+        subscription_id = str(uuid.uuid4())
+        created_at = datetime.now()
+        expires_at = created_at + timedelta(days=30)  # 30-day subscription
+        # Insert subscription
+        c.execute("""
+            INSERT INTO subscriptions
+            (id, user_id, tier, status, created_at, expires_at)
+            VALUES (?, ?, ?, ?, ?, ?)
+        """, (subscription_id, user_id, tier, "active", created_at, expires_at))
+        # Update user's subscription tier
+        c.execute("""
+            UPDATE users
+            SET subscription_tier = ?
+            WHERE id = ?
+        """, (tier, user_id))
+        conn.commit()
+        return True, {
+            "id": subscription_id,
+            "user_id": user_id,
+            "tier": tier,
+            "status": "active",
+            "created_at": created_at.isoformat(),
+            "expires_at": expires_at.isoformat()
+        }
+    except Exception as e:
+        logger.error(f"Subscription creation error: {e}")
+        return False, f"Failed to create subscription: {str(e)}"
+    finally:
+        if conn:
+            conn.close()
+def get_user(user_id: str):
+    """Get user by ID"""
+    try:
+        conn = get_db_connection()
+        c = conn.cursor()
+        # Get user
+        c.execute("SELECT * FROM users WHERE id = ? AND is_active = 1", (user_id,))
+        user_data = c.fetchone()
+        if not user_data:
+            return None
+        # Convert to User model
+        user_dict = dict(user_data)
+        # Handle datetime conversions if needed
+        if user_dict.get("subscription_expiry") and isinstance(user_dict["subscription_expiry"], str):
+            user_dict["subscription_expiry"] = datetime.fromisoformat(user_dict["subscription_expiry"])
+        if user_dict.get("last_reset_date") and isinstance(user_dict["last_reset_date"], str):
+            user_dict["last_reset_date"] = datetime.fromisoformat(user_dict["last_reset_date"])
+        return User(
+            id=user_dict['id'],
+            email=user_dict['email'],
+            subscription_tier=user_dict['subscription_tier'],
+            subscription_expiry=user_dict.get('subscription_expiry'),
+            api_calls_remaining=user_dict.get('api_calls_remaining', 5),
+            last_reset_date=user_dict.get('last_reset_date')
+        )
+    except Exception as e:
+        logger.error(f"Get user error: {e}")
+        return None
+    finally:
+        if conn:
+            conn.close()
+def check_subscription_access(user: User, feature: str, file_size_mb: Optional[float] = None):
+    """Check if the user has access to the requested feature and file size"""
+    # Check if subscription is expired
+    if user.subscription_tier != "free_tier" and user.subscription_expiry and user.subscription_expiry < datetime.now():
+        # Downgrade to free tier if subscription expired
+        user.subscription_tier = "free_tier"
+        user.api_calls_remaining = SUBSCRIPTION_TIERS["free_tier"]["daily_api_calls"]
+        with get_db_connection() as conn:
+            c = conn.cursor()
+            c.execute("""
+                UPDATE users
+                SET subscription_tier = ?, api_calls_remaining = ?
+                WHERE id = ?
+            """, (user.subscription_tier, user.api_calls_remaining, user.id))
+            conn.commit()
+    # Reset API calls if needed
+    user = reset_api_calls_if_needed(user)
+    # Check if user has API calls remaining
+    if user.api_calls_remaining <= 0:
+        raise HTTPException(
+            status_code=429,
+            detail="API call limit reached for today. Please upgrade your subscription or try again tomorrow."
+        )
+    # Check if feature is available in user's subscription tier
+    tier_features = SUBSCRIPTION_TIERS[user.subscription_tier]["features"]
+    if feature not in tier_features:
+        raise HTTPException(
+            status_code=403,
+            detail=f"The {feature} feature is not available in your {user.subscription_tier} subscription. Please upgrade to access this feature."
+        )
+    # Check file size limit if applicable
+    if file_size_mb:
+        max_size = SUBSCRIPTION_TIERS[user.subscription_tier]["max_document_size_mb"]
+        if file_size_mb > max_size:
+            raise HTTPException(
+                status_code=413,
+                detail=f"File size exceeds the {max_size}MB limit for your {user.subscription_tier} subscription. Please upgrade or use a smaller file."
+            )
+    # Decrement API calls remaining
+    user.api_calls_remaining -= 1
+    with get_db_connection() as conn:
+        c = conn.cursor()
+        c.execute("""
+            UPDATE users
+            SET api_calls_remaining = ?
+            WHERE id = ?
+        """, (user.api_calls_remaining, user.id))
+        conn.commit()
+    return True
+def reset_api_calls_if_needed(user: User):
+    """Reset API call counter if it's a new day"""
+    today = datetime.now().date()
+    if user.last_reset_date is None or user.last_reset_date.date() < today:
+        tier_limits = SUBSCRIPTION_TIERS[user.subscription_tier]
+        user.api_calls_remaining = tier_limits["daily_api_calls"]
+        user.last_reset_date = datetime.now()
+        # Update the user in the database
+        with get_db_connection() as conn:
+            c = conn.cursor()
+            c.execute("""
+                UPDATE users
+                SET api_calls_remaining = ?, last_reset_date = ?
+                WHERE id = ?
+            """, (user.api_calls_remaining, user.last_reset_date, user.id))
+            conn.commit()
+    return user
+def login_user(email, password):
+    """Login a user with email and password"""
+    try:
+        # Authenticate user
+        user = authenticate_user(email, password)
+        if not user:
+            return False, "Incorrect username or password"
+        # Create access token
+        access_token = create_access_token(user.id)
+        # Create refresh token
+        refresh_token = str(uuid.uuid4())
+        expires_at = datetime.now() + timedelta(days=30)
+        # Store refresh token
+        conn = get_db_connection()
+        c = conn.cursor()
+        c.execute("INSERT INTO refresh_tokens VALUES (?, ?, ?)",
+                 (user.id, refresh_token, expires_at))
+        conn.commit()
+        # Get subscription info
+        c.execute("SELECT * FROM subscriptions WHERE user_id = ? AND status = 'active'", (user.id,))
+        subscription = c.fetchone()
+        # Convert subscription to dict if it exists, otherwise set to None
+        subscription_dict = dict(subscription) if subscription else None
+        conn.close()
+        return True, {
+            "user_id": user.id,
+            "email": user.email,
+            "access_token": access_token,
+            "refresh_token": refresh_token,
+            "subscription": subscription_dict
+        }
+    except Exception as e:
+        logger.error(f"Login error: {e}")
+        return False, f"Login failed: {str(e)}"
+def get_subscription_plans():
+    """
+    Returns a list of available subscription plans based on SUBSCRIPTION_TIERS.
+    """
+    plans = []
+    for tier, details in SUBSCRIPTION_TIERS.items():
+        plans.append({
+            "tier": tier,
+            "price": details["price"],
+            "currency": details["currency"],
+            "features": details["features"],
+            "limits": details["limits"]
+        })
+    return plans

fix_users_table.py ADDED Viewed

	@@ -0,0 +1,180 @@

+import sqlite3
+import os
+import uuid
+import datetime
+# Define both database paths
+DB_PATH_1 = os.path.join(os.path.dirname(__file__), "../data/user_data.db")
+DB_PATH_2 = os.path.join(os.path.dirname(__file__), "data/user_data.db")
+# Define the function to create users table
+# Make sure the create_users_table function allows NULL for hashed_password temporarily
+def create_users_table(cursor):
+    """Create the users table with all required columns"""
+    cursor.execute('''
+    CREATE TABLE users (
+        id TEXT PRIMARY KEY,
+        email TEXT UNIQUE NOT NULL,
+        hashed_password TEXT DEFAULT 'temp_hash_for_migration',
+        password TEXT,
+        subscription_tier TEXT DEFAULT 'free',
+        is_active BOOLEAN DEFAULT 1,
+        created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+        api_calls_remaining INTEGER DEFAULT 10,
+        last_reset_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+    )
+    ''')
+# Update the CREATE TABLE statement to include all necessary columns
+def fix_users_table(db_path):
+    # Make sure the data directory exists
+    data_dir = os.path.dirname(db_path)
+    if not os.path.exists(data_dir):
+        print(f"Creating data directory: {data_dir}")
+        os.makedirs(data_dir, exist_ok=True)
+    if not os.path.exists(db_path):
+        print(f"Database does not exist at: {os.path.abspath(db_path)}")
+        return False
+    print(f"Using database path: {os.path.abspath(db_path)}")
+    # Connect to the database
+    conn = sqlite3.connect(db_path)
+    cursor = conn.cursor()
+    # Check if users table exists
+    cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='users'")
+    if cursor.fetchone():
+        print("Users table exists, checking schema...")
+        # Check columns
+        cursor.execute("PRAGMA table_info(users)")
+        columns_info = cursor.fetchall()
+        columns = [column[1] for column in columns_info]
+        # List of all required columns
+        required_columns = ['id', 'email', 'hashed_password', 'password', 'subscription_tier',
+                           'is_active', 'created_at', 'api_calls_remaining', 'last_reset_date']
+        # Check if any required column is missing
+        missing_columns = [col for col in required_columns if col not in columns]
+        if missing_columns:
+            print(f"Schema needs fixing. Missing columns: {', '.join(missing_columns)}")
+            # Dynamically build the SELECT query based on available columns
+            available_columns = [col for col in columns if col != 'id']  # Exclude id as we'll generate new ones
+            if not available_columns:
+                print("No usable columns found in users table, creating new table...")
+                cursor.execute("DROP TABLE users")
+                create_users_table(cursor)
+                print("Created new empty users table with correct schema")
+            else:
+                # Backup existing users with available columns
+                select_query = f"SELECT {', '.join(available_columns)} FROM users"
+                print(f"Backing up users with query: {select_query}")
+                cursor.execute(select_query)
+                existing_users = cursor.fetchall()
+                # Drop the existing table
+                cursor.execute("DROP TABLE users")
+                # Create the table with the correct schema
+                create_users_table(cursor)
+                # Restore the users with new UUIDs for IDs
+                if existing_users:
+                    current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+                    for user in existing_users:
+                        user_id = str(uuid.uuid4())
+                        # Create a dictionary to map column names to values
+                        user_data = {'id': user_id}
+                        for i, col in enumerate(available_columns):
+                            user_data[col] = user[i]
+                        # Set default values for missing columns
+                        # Add a default value for hashed_password in the Set default values section
+                        if 'hashed_password' not in user_data:
+                            user_data['hashed_password'] = 'temp_hash_for_migration'  # Temporary hash for migration
+                        if 'subscription_tier' not in user_data:
+                            user_data['subscription_tier'] = 'free'
+                        if 'is_active' not in user_data:
+                            user_data['is_active'] = 1
+                        if 'created_at' not in user_data:
+                            user_data['created_at'] = current_time
+                        if 'api_calls_remaining' not in user_data:
+                            user_data['api_calls_remaining'] = 10
+                        if 'last_reset_date' not in user_data:
+                            user_data['last_reset_date'] = current_time
+                        # Build INSERT query with all required columns
+                        insert_columns = ['id']
+                        insert_values = [user_id]
+                        # Add values for columns that exist in the old table
+                        for col in available_columns:
+                            insert_columns.append(col)
+                            insert_values.append(user_data[col])
+                        # Add default values for columns that don't exist in the old table
+                        for col in required_columns:
+                            # Add hashed_password to the column default values section
+                            if col not in ['id'] + available_columns:
+                                insert_columns.append(col)
+                                if col == 'subscription_tier':
+                                    insert_values.append('free')
+                                elif col == 'is_active':
+                                    insert_values.append(1)
+                                elif col == 'created_at':
+                                    insert_values.append(current_time)
+                                elif col == 'api_calls_remaining':
+                                    insert_values.append(10)
+                                elif col == 'last_reset_date':
+                                    insert_values.append(current_time)
+                                elif col == 'hashed_password':
+                                    insert_values.append('temp_hash_for_migration')  # Temporary hash for migration
+                                else:
+                                    insert_values.append(None)  # Default to NULL for other columns
+                        placeholders = ', '.join(['?'] * len(insert_columns))
+                        insert_query = f"INSERT INTO users ({', '.join(insert_columns)}) VALUES ({placeholders})"
+                        cursor.execute(insert_query, insert_values)
+                print(f"Fixed users table, restored {len(existing_users)} users")
+        else:
+            print("Users table schema is correct")
+    else:
+        print("Users table doesn't exist, creating it now...")
+        create_users_table(cursor)
+        print("Users table created successfully")
+    # Commit changes and close connection
+    conn.commit()
+    conn.close()
+    return True
+if __name__ == "__main__":
+    print("Checking first database location...")
+    success1 = fix_users_table(DB_PATH_1)
+    print("\nChecking second database location...")
+    success2 = fix_users_table(DB_PATH_2)
+    if not (success1 or success2):
+        print("\nWarning: Could not find any existing database files.")
+        print("Creating a new database at the primary location...")
+        # Create a new database at the primary location
+        data_dir = os.path.dirname(DB_PATH_1)
+        if not os.path.exists(data_dir):
+            os.makedirs(data_dir, exist_ok=True)
+        conn = sqlite3.connect(DB_PATH_1)
+        cursor = conn.cursor()
+        create_users_table(cursor)
+        conn.commit()
+        conn.close()
+        print(f"Created new database at: {os.path.abspath(DB_PATH_1)}")

initialize_plans.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import os
+import sys
+from dotenv import load_dotenv
+from paypal_integration import initialize_subscription_plans
+# Load environment variables
+load_dotenv()
+def main():
+    """Initialize PayPal subscription plans"""
+    print("Initializing PayPal subscription plans...")
+    plans = initialize_subscription_plans()
+    if plans:
+        print("✅ Plans initialized successfully:")
+        for tier, plan_id in plans.items():
+            print(f"  - {tier}: {plan_id}")
+        return True
+    else:
+        print("❌ Failed to initialize plans. Check the logs for details.")
+        return False
+if __name__ == "__main__":
+    success = main()
+    sys.exit(0 if success else 1)

legal_analysis.db ADDED Viewed

Binary file (28.7 kB). View file

paypal_integration.py ADDED Viewed

	@@ -0,0 +1,1004 @@

+import requests
+import json
+import sqlite3
+from datetime import datetime, timedelta
+import uuid
+import os
+import logging
+from requests.adapters import HTTPAdapter
+from requests.packages.urllib3.util.retry import Retry
+from auth import get_db_connection
+from dotenv import load_dotenv
+# PayPal API Configuration - Remove default values for production
+PAYPAL_CLIENT_ID = os.getenv("PAYPAL_CLIENT_ID")
+PAYPAL_SECRET = os.getenv("PAYPAL_SECRET")
+PAYPAL_BASE_URL = os.getenv("PAYPAL_BASE_URL", "https://api-m.sandbox.paypal.com")
+# Add validation to ensure credentials are provided
+# Set up logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    handlers=[
+        logging.FileHandler(os.path.join(os.path.dirname(__file__), "../logs/paypal.log")),
+        logging.StreamHandler()
+    ]
+)
+logger = logging.getLogger("paypal_integration")
+# Then replace print statements with logger calls
+# For example:
+if not PAYPAL_CLIENT_ID or not PAYPAL_SECRET:
+    logger.warning("PayPal credentials not found in environment variables")
+# Get PayPal access token
+# Add better error handling for production
+# Create a session with retry capability
+def create_retry_session(retries=3, backoff_factor=0.3):
+    session = requests.Session()
+    retry = Retry(
+        total=retries,
+        read=retries,
+        connect=retries,
+        backoff_factor=backoff_factor,
+        status_forcelist=[500, 502, 503, 504],
+    )
+    adapter = HTTPAdapter(max_retries=retry)
+    session.mount('http://', adapter)
+    session.mount('https://', adapter)
+    return session
+# Then use this session for API calls
+# Replace get_access_token with logger instead of print
+def get_access_token():
+    url = f"{PAYPAL_BASE_URL}/v1/oauth2/token"
+    headers = {
+        "Accept": "application/json",
+        "Accept-Language": "en_US"
+    }
+    data = "grant_type=client_credentials"
+    try:
+        session = create_retry_session()
+        response = session.post(
+            url,
+            auth=(PAYPAL_CLIENT_ID, PAYPAL_SECRET),
+            headers=headers,
+            data=data
+        )
+        if response.status_code == 200:
+            return response.json()["access_token"]
+        else:
+            logger.error(f"Error getting access token: {response.status_code}")
+            return None
+    except Exception as e:
+        logger.error(f"Exception in get_access_token: {str(e)}")
+        return None
+def call_paypal_api(endpoint, method="GET", data=None, token=None):
+    """
+    Helper function to make PayPal API calls
+    Args:
+        endpoint: API endpoint (without base URL)
+        method: HTTP method (GET, POST, etc.)
+        data: Request payload (for POST/PUT)
+        token: PayPal access token (will be fetched if None)
+    Returns:
+        tuple: (success, response_data or error_message)
+    """
+    try:
+        if not token:
+            token = get_access_token()
+            if not token:
+                return False, "Failed to get PayPal access token"
+        url = f"{PAYPAL_BASE_URL}{endpoint}"
+        headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {token}"
+        }
+        session = create_retry_session()
+        if method.upper() == "GET":
+            response = session.get(url, headers=headers)
+        elif method.upper() == "POST":
+            response = session.post(url, headers=headers, data=json.dumps(data) if data else None)
+        elif method.upper() == "PUT":
+            response = session.put(url, headers=headers, data=json.dumps(data) if data else None)
+        else:
+            return False, f"Unsupported HTTP method: {method}"
+        if response.status_code in [200, 201, 204]:
+            if response.status_code == 204:  # No content
+                return True, {}
+            return True, response.json() if response.text else {}
+        else:
+            logger.error(f"PayPal API error: {response.status_code} - {response.text}")
+            return False, f"PayPal API error: {response.status_code} - {response.text}"
+    except Exception as e:
+        logger.error(f"Error calling PayPal API: {str(e)}")
+        return False, f"Error calling PayPal API: {str(e)}"
+def create_paypal_subscription(user_id, tier):
+    """Create a PayPal subscription for a user"""
+    try:
+        # Get the price from the subscription tier
+        from auth import SUBSCRIPTION_TIERS
+        if tier not in SUBSCRIPTION_TIERS:
+            return False, f"Invalid tier: {tier}"
+        price = SUBSCRIPTION_TIERS[tier]["price"]
+        currency = SUBSCRIPTION_TIERS[tier]["currency"]
+        # Create a PayPal subscription (implement PayPal API calls here)
+        # For now, just return a success response
+        return True, {
+            "subscription_id": f"test_sub_{uuid.uuid4()}",
+            "status": "ACTIVE",
+            "tier": tier,
+            "price": price,
+            "currency": currency
+        }
+    except Exception as e:
+        logger.error(f"Error creating PayPal subscription: {str(e)}")
+        return False, f"Failed to create PayPal subscription: {str(e)}"
+# Create a product in PayPal
+def create_product(name, description):
+    """Create a product in PayPal"""
+    payload = {
+        "name": name,
+        "description": description,
+        "type": "SERVICE",
+        "category": "SOFTWARE"
+    }
+    success, result = call_paypal_api("/v1/catalogs/products", "POST", payload)
+    if success:
+        return result["id"]
+    else:
+        logger.error(f"Failed to create product: {result}")
+        return None
+# Create a subscription plan in PayPal
+# Update create_plan to use INR instead of USD
+def create_plan(product_id, name, price, interval="MONTH", interval_count=1):
+    """Create a subscription plan in PayPal"""
+    payload = {
+        "product_id": product_id,
+        "name": name,
+        "billing_cycles": [
+            {
+                "frequency": {
+                    "interval_unit": interval,
+                    "interval_count": interval_count
+                },
+                "tenure_type": "REGULAR",
+                "sequence": 1,
+                "total_cycles": 0,  # Infinite cycles
+                "pricing_scheme": {
+                    "fixed_price": {
+                        "value": str(price),
+                        "currency_code": "USD"
+                    }
+                }
+            }
+        ],
+        "payment_preferences": {
+            "auto_bill_outstanding": True,
+            "setup_fee": {
+                "value": "0",
+                "currency_code": "USD"
+            },
+            "setup_fee_failure_action": "CONTINUE",
+            "payment_failure_threshold": 3
+        }
+    }
+    success, result = call_paypal_api("/v1/billing/plans", "POST", payload)
+    if success:
+        return result["id"]
+    else:
+        logger.error(f"Failed to create plan: {result}")
+        return None
+# Update initialize_subscription_plans to use INR pricing
+def initialize_subscription_plans():
+    """
+    Initialize PayPal subscription plans for the application.
+    This should be called once to set up the plans in PayPal.
+    """
+    try:
+        # Check if plans already exist
+        existing_plans = get_subscription_plans()
+        if existing_plans and len(existing_plans) >= 2:
+            logger.info("PayPal plans already initialized")
+            return existing_plans
+        # First, create products for each tier
+        products = {
+            "standard_tier": {
+                "name": "Standard Legal Document Analysis",
+                "description": "Standard subscription with document analysis features",
+                "type": "SERVICE",
+                "category": "SOFTWARE"
+            },
+            "premium_tier": {
+                "name": "Premium Legal Document Analysis",
+                "description": "Premium subscription with all document analysis features",
+                "type": "SERVICE",
+                "category": "SOFTWARE"
+            }
+        }
+        product_ids = {}
+        for tier, product_data in products.items():
+            success, result = call_paypal_api("/v1/catalogs/products", "POST", product_data)
+            if success:
+                product_ids[tier] = result["id"]
+                logger.info(f"Created PayPal product for {tier}: {result['id']}")
+            else:
+                logger.error(f"Failed to create product for {tier}: {result}")
+                return None
+        # Define the plans with product IDs - Changed currency to USD
+        plans = {
+            "standard_tier": {
+                "product_id": product_ids["standard_tier"],
+                "name": "Standard Plan",
+                "description": "Standard subscription with basic features",
+                "billing_cycles": [
+                    {
+                        "frequency": {
+                            "interval_unit": "MONTH",
+                            "interval_count": 1
+                        },
+                        "tenure_type": "REGULAR",
+                        "sequence": 1,
+                        "total_cycles": 0,
+                        "pricing_scheme": {
+                            "fixed_price": {
+                                "value": "9.99",
+                                "currency_code": "USD"
+                            }
+                        }
+                    }
+                ],
+                "payment_preferences": {
+                    "auto_bill_outstanding": True,
+                    "setup_fee": {
+                        "value": "0",
+                        "currency_code": "USD"
+                    },
+                    "setup_fee_failure_action": "CONTINUE",
+                    "payment_failure_threshold": 3
+                }
+            },
+            "premium_tier": {
+                "product_id": product_ids["premium_tier"],
+                "name": "Premium Plan",
+                "description": "Premium subscription with all features",
+                "billing_cycles": [
+                    {
+                        "frequency": {
+                            "interval_unit": "MONTH",
+                            "interval_count": 1
+                        },
+                        "tenure_type": "REGULAR",
+                        "sequence": 1,
+                        "total_cycles": 0,
+                        "pricing_scheme": {
+                            "fixed_price": {
+                                "value": "19.99",
+                                "currency_code": "USD"
+                            }
+                        }
+                    }
+                ],
+                "payment_preferences": {
+                    "auto_bill_outstanding": True,
+                    "setup_fee": {
+                        "value": "0",
+                        "currency_code": "USD"
+                    },
+                    "setup_fee_failure_action": "CONTINUE",
+                    "payment_failure_threshold": 3
+                }
+            }
+        }
+        # Create the plans in PayPal
+        created_plans = {}
+        for tier, plan_data in plans.items():
+            success, result = call_paypal_api("/v1/billing/plans", "POST", plan_data)
+            if success:
+                created_plans[tier] = result["id"]
+                logger.info(f"Created PayPal plan for {tier}: {result['id']}")
+            else:
+                logger.error(f"Failed to create plan for {tier}: {result}")
+        # Save the plan IDs to a file
+        if created_plans:
+            save_subscription_plans(created_plans)
+            return created_plans
+        else:
+            logger.error("Failed to create any PayPal plans")
+            return None
+    except Exception as e:
+        logger.error(f"Error initializing subscription plans: {str(e)}")
+        return None
+# Update create_subscription_link to use call_paypal_api helper
+def create_subscription_link(plan_id):
+    # Get the plan IDs
+    plans = get_subscription_plans()
+    if not plans:
+        return None
+    # Use environment variable for the app URL to make it work in different environments
+    app_url = os.getenv("APP_URL", "http://localhost:8501")
+    payload = {
+        "plan_id": plans[plan_id],
+        "application_context": {
+            "brand_name": "Legal Document Analyzer",
+            "locale": "en_US",
+            "shipping_preference": "NO_SHIPPING",
+            "user_action": "SUBSCRIBE_NOW",
+            "return_url": f"{app_url}?status=success&subscription_id={{id}}",
+            "cancel_url": f"{app_url}?status=cancel"
+        }
+    }
+    success, data = call_paypal_api("/v1/billing/subscriptions", "POST", payload)
+    if not success:
+        logger.error(f"Error creating subscription: {data}")
+        return None
+    try:
+        return {
+            "subscription_id": data["id"],
+            "approval_url": next(link["href"] for link in data["links"] if link["rel"] == "approve")
+        }
+    except Exception as e:
+        logger.error(f"Exception processing subscription response: {str(e)}")
+        return None
+# Fix the webhook handler function signature to match how it's called in app.py
+def handle_subscription_webhook(payload):
+    """
+    Handle PayPal subscription webhooks
+    Args:
+        payload: The full webhook payload
+    Returns:
+        tuple: (success, result)
+            - success: True if successful, False otherwise
+            - result: Success message or error message
+    """
+    try:
+        event_type = payload.get("event_type")
+        resource = payload.get("resource", {})
+        logger.info(f"Received PayPal webhook: {event_type}")
+        # Handle different event types
+        if event_type == "BILLING.SUBSCRIPTION.CREATED":
+            # A subscription was created
+            subscription_id = resource.get("id")
+            if not subscription_id:
+                return False, "Missing subscription ID in webhook"
+            # Update subscription status in database
+            conn = get_db_connection()
+            cursor = conn.cursor()
+            cursor.execute(
+                "UPDATE subscriptions SET status = 'pending' WHERE paypal_subscription_id = ?",
+                (subscription_id,)
+            )
+            conn.commit()
+            conn.close()
+            return True, "Subscription created successfully"
+        elif event_type == "BILLING.SUBSCRIPTION.ACTIVATED":
+            # A subscription was activated
+            subscription_id = resource.get("id")
+            if not subscription_id:
+                return False, "Missing subscription ID in webhook"
+            # Update subscription status in database
+            conn = get_db_connection()
+            cursor = conn.cursor()
+            cursor.execute(
+                "UPDATE subscriptions SET status = 'active' WHERE paypal_subscription_id = ?",
+                (subscription_id,)
+            )
+            conn.commit()
+            conn.close()
+            return True, "Subscription activated successfully"
+        elif event_type == "BILLING.SUBSCRIPTION.CANCELLED":
+            # A subscription was cancelled
+            subscription_id = resource.get("id")
+            if not subscription_id:
+                return False, "Missing subscription ID in webhook"
+            # Update subscription status in database
+            conn = get_db_connection()
+            cursor = conn.cursor()
+            cursor.execute(
+                "UPDATE subscriptions SET status = 'cancelled' WHERE paypal_subscription_id = ?",
+                (subscription_id,)
+            )
+            conn.commit()
+            conn.close()
+            return True, "Subscription cancelled successfully"
+        elif event_type == "BILLING.SUBSCRIPTION.SUSPENDED":
+            # A subscription was suspended
+            subscription_id = resource.get("id")
+            if not subscription_id:
+                return False, "Missing subscription ID in webhook"
+            # Update subscription status in database
+            conn = get_db_connection()
+            cursor = conn.cursor()
+            cursor.execute(
+                "UPDATE subscriptions SET status = 'suspended' WHERE paypal_subscription_id = ?",
+                (subscription_id,)
+            )
+            conn.commit()
+            conn.close()
+            return True, "Subscription suspended successfully"
+        else:
+            # Unhandled event type
+            logger.info(f"Unhandled webhook event type: {event_type}")
+            return True, f"Unhandled event type: {event_type}"
+    except Exception as e:
+        logger.error(f"Error handling webhook: {str(e)}")
+        return False, f"Error handling webhook: {str(e)}"
+# Add this function to update user subscription
+def update_user_subscription(user_email, subscription_id, tier):
+    """
+    Update a user's subscription status
+    Args:
+        user_email: The email of the user
+        subscription_id: The PayPal subscription ID
+        tier: The subscription tier
+    Returns:
+        tuple: (success, result)
+            - success: True if successful, False otherwise
+            - result: Success message or error message
+    """
+    try:
+        # Get user ID from email
+        conn = get_db_connection()
+        cursor = conn.cursor()
+        cursor.execute("SELECT id FROM users WHERE email = ?", (user_email,))
+        user_result = cursor.fetchone()
+        if not user_result:
+            conn.close()
+            return False, f"User not found: {user_email}"
+        user_id = user_result[0]
+        # Update the subscription status
+        cursor.execute(
+            "UPDATE subscriptions SET status = 'active' WHERE user_id = ? AND paypal_subscription_id = ?",
+            (user_id, subscription_id)
+        )
+        # Deactivate any other active subscriptions for this user
+        cursor.execute(
+            "UPDATE subscriptions SET status = 'inactive' WHERE user_id = ? AND paypal_subscription_id != ? AND status = 'active'",
+            (user_id, subscription_id)
+        )
+        # Update the user's subscription tier
+        cursor.execute(
+            "UPDATE users SET subscription_tier = ? WHERE email = ?",
+            (tier, user_email)
+        )
+        conn.commit()
+        conn.close()
+        return True, f"Subscription updated to {tier} tier"
+    except Exception as e:
+        logger.error(f"Error updating user subscription: {str(e)}")
+        return False, f"Error updating subscription: {str(e)}"
+# Add this near the top with other path definitions
+# Update the PLAN_IDS_PATH definition to use the correct path
+PLAN_IDS_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), "data", "plan_ids.json"))
+# Make sure the data directory exists
+os.makedirs(os.path.dirname(PLAN_IDS_PATH), exist_ok=True)
+# Add this debug log to see where the file is expected
+logger.info(f"PayPal plans will be stored at: {PLAN_IDS_PATH}")
+# Add this function if it's not defined elsewhere
+def get_db_connection():
+    """Get a connection to the SQLite database"""
+    DB_PATH = os.getenv("DB_PATH", os.path.join(os.path.dirname(__file__), "../data/user_data.db"))
+    # Make sure the data directory exists
+    os.makedirs(os.path.dirname(DB_PATH), exist_ok=True)
+    return sqlite3.connect(DB_PATH)
+# Add this function to create subscription tables if needed
+def initialize_database():
+    """Initialize the database tables needed for subscriptions"""
+    conn = get_db_connection()
+    cursor = conn.cursor()
+    # Check if subscriptions table exists
+    cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='subscriptions'")
+    if cursor.fetchone():
+        # Table exists, check if required columns exist
+        cursor.execute("PRAGMA table_info(subscriptions)")
+        columns = [column[1] for column in cursor.fetchall()]
+        # Check for missing columns and add them if needed
+        if "user_id" not in columns:
+            logger.info("Adding 'user_id' column to subscriptions table")
+            cursor.execute("ALTER TABLE subscriptions ADD COLUMN user_id TEXT NOT NULL DEFAULT ''")
+        if "created_at" not in columns:
+            logger.info("Adding 'created_at' column to subscriptions table")
+            cursor.execute("ALTER TABLE subscriptions ADD COLUMN created_at TIMESTAMP")
+        if "expires_at" not in columns:
+            logger.info("Adding 'expires_at' column to subscriptions table")
+            cursor.execute("ALTER TABLE subscriptions ADD COLUMN expires_at TIMESTAMP")
+        if "paypal_subscription_id" not in columns:
+            logger.info("Adding 'paypal_subscription_id' column to subscriptions table")
+            cursor.execute("ALTER TABLE subscriptions ADD COLUMN paypal_subscription_id TEXT")
+    else:
+        # Create subscriptions table with all required columns
+        cursor.execute('''
+        CREATE TABLE IF NOT EXISTS subscriptions (
+            id TEXT PRIMARY KEY,
+            user_id TEXT NOT NULL,
+            tier TEXT NOT NULL,
+            status TEXT NOT NULL,
+            created_at TIMESTAMP NOT NULL,
+            expires_at TIMESTAMP,
+            paypal_subscription_id TEXT
+        )
+        ''')
+        logger.info("Created subscriptions table with all required columns")
+    # Create PayPal plans table if it doesn't exist
+    cursor.execute('''
+    CREATE TABLE IF NOT EXISTS paypal_plans (
+        plan_id TEXT PRIMARY KEY,
+        tier TEXT NOT NULL,
+        price REAL NOT NULL,
+        currency TEXT NOT NULL,
+        created_at TIMESTAMP NOT NULL
+    )
+    ''')
+    conn.commit()
+    conn.close()
+    logger.info("Database initialization completed")
+def create_user_subscription_mock(user_email, tier):
+    """
+    Create a mock subscription for testing
+    Args:
+        user_email: The email of the user
+        tier: The subscription tier
+    Returns:
+        tuple: (success, result)
+    """
+    try:
+        logger.info(f"Creating mock subscription for {user_email} at tier {tier}")
+        # Get user ID from email
+        conn = get_db_connection()
+        cursor = conn.cursor()
+        cursor.execute("SELECT id FROM users WHERE email = ?", (user_email,))
+        user_result = cursor.fetchone()
+        if not user_result:
+            conn.close()
+            return False, f"User not found: {user_email}"
+        user_id = user_result[0]
+        # Create a mock subscription ID
+        subscription_id = f"mock_sub_{uuid.uuid4()}"
+        # Store the subscription in database
+        sub_id = str(uuid.uuid4())
+        start_date = datetime.now()
+        cursor.execute(
+            "INSERT INTO subscriptions (id, user_id, tier, status, created_at, expires_at, paypal_subscription_id) VALUES (?, ?, ?, ?, ?, ?, ?)",
+            (sub_id, user_id, tier, "active", start_date, start_date + timedelta(days=30), subscription_id)
+        )
+        # Update user's subscription tier
+        cursor.execute(
+            "UPDATE users SET subscription_tier = ? WHERE id = ?",
+            (tier, user_id)
+        )
+        conn.commit()
+        conn.close()
+        # Use environment variable for the app URL
+        app_url = os.getenv("APP_URL", "http://localhost:3000")
+        # Return success with mock approval URL that matches the real PayPal URL pattern
+        return True, {
+            "subscription_id": subscription_id,
+            "approval_url": f"{app_url}/subscription/callback?status=success&subscription_id={subscription_id}",
+            "tier": tier
+        }
+    except Exception as e:
+        logger.error(f"Error creating mock subscription: {str(e)}")
+        return False, f"Error creating subscription: {str(e)}"
+# Add this at the end of the file
+def initialize():
+    """Initialize the PayPal integration module"""
+    try:
+        # Create necessary directories
+        os.makedirs(os.path.dirname(PLAN_IDS_PATH), exist_ok=True)
+        # Initialize database
+        initialize_database()
+        # Initialize subscription plans
+        plans = get_subscription_plans()
+        if plans:
+            logger.info(f"Subscription plans initialized: {plans}")
+        else:
+            logger.warning("Failed to initialize subscription plans")
+        return True
+    except Exception as e:
+        logger.error(f"Error initializing PayPal integration: {str(e)}")
+        return False
+# Call initialize when the module is imported
+initialize()
+# Add this function to get subscription plans
+def get_subscription_plans():
+    """
+    Get all available subscription plans with correct pricing
+    """
+    try:
+        # Check if we have plan IDs saved in a file
+        if os.path.exists(PLAN_IDS_PATH):
+            try:
+                with open(PLAN_IDS_PATH, 'r') as f:
+                    plans = json.load(f)
+                    logger.info(f"Loaded subscription plans from {PLAN_IDS_PATH}: {plans}")
+                    return plans
+            except Exception as e:
+                logger.error(f"Error reading plan IDs file: {str(e)}")
+                return {}
+        # If no file exists, return empty dict
+        logger.warning(f"No plan IDs file found at {PLAN_IDS_PATH}. Please initialize subscription plans.")
+        return {}
+    except Exception as e:
+        logger.error(f"Error getting subscription plans: {str(e)}")
+        return {}
+# Add this function to create subscription tables if needed
+def initialize_database():
+    """Initialize the database tables needed for subscriptions"""
+    conn = get_db_connection()
+    cursor = conn.cursor()
+    # Create subscriptions table if it doesn't exist
+    cursor.execute('''
+    CREATE TABLE IF NOT EXISTS subscriptions (
+        id TEXT PRIMARY KEY,
+        user_id TEXT NOT NULL,
+        tier TEXT NOT NULL,
+        status TEXT NOT NULL,
+        created_at TIMESTAMP NOT NULL,
+        expires_at TIMESTAMP,
+        paypal_subscription_id TEXT
+    )
+    ''')
+    # Create PayPal plans table if it doesn't exist
+    cursor.execute('''
+    CREATE TABLE IF NOT EXISTS paypal_plans (
+        plan_id TEXT PRIMARY KEY,
+        tier TEXT NOT NULL,
+        price REAL NOT NULL,
+        currency TEXT NOT NULL,
+        created_at TIMESTAMP NOT NULL
+    )
+    ''')
+    conn.commit()
+    conn.close()
+def create_user_subscription(user_email, tier):
+    """
+    Create a real PayPal subscription for a user
+    Args:
+        user_email: The email of the user
+        tier: The subscription tier (standard_tier or premium_tier)
+    Returns:
+        tuple: (success, result)
+            - success: True if successful, False otherwise
+            - result: Dictionary with subscription details or error message
+    """
+    try:
+        # Validate tier
+        valid_tiers = ["standard_tier", "premium_tier"]
+        if tier not in valid_tiers:
+            return False, f"Invalid tier: {tier}. Must be one of {valid_tiers}"
+        # Get the plan IDs
+        plans = get_subscription_plans()
+        # Log the plans for debugging
+        logger.info(f"Available subscription plans: {plans}")
+        # If no plans found, check if the file exists and try to load it directly
+        if not plans:
+            if os.path.exists(PLAN_IDS_PATH):
+                logger.info(f"Plan IDs file exists at {PLAN_IDS_PATH}, but couldn't load plans. Trying direct load.")
+                try:
+                    with open(PLAN_IDS_PATH, 'r') as f:
+                        plans = json.load(f)
+                        logger.info(f"Directly loaded plans: {plans}")
+                except Exception as e:
+                    logger.error(f"Error directly loading plans: {str(e)}")
+            else:
+                logger.error(f"Plan IDs file does not exist at {PLAN_IDS_PATH}")
+            # If still no plans, return error
+            if not plans:
+                logger.error("No PayPal plans found. Please initialize plans first.")
+                return False, "PayPal plans not configured. Please contact support."
+        # Check if the tier exists in plans
+        if tier not in plans:
+            return False, f"No plan found for tier: {tier}"
+        # Use environment variable for the app URL
+        app_url = os.getenv("APP_URL", "http://localhost:3000")
+        # Create the subscription with PayPal
+        payload = {
+            "plan_id": plans[tier],
+            "subscriber": {
+                "email_address": user_email
+            },
+            "application_context": {
+                "brand_name": "Legal Document Analyzer",
+                "locale": "en-US",  # Changed from en_US to en-US
+                "shipping_preference": "NO_SHIPPING",
+                "user_action": "SUBSCRIBE_NOW",
+                "return_url": f"{app_url}/subscription/callback?status=success",
+                "cancel_url": f"{app_url}/subscription/callback?status=cancel"
+            }
+        }
+        # Make the API call to PayPal
+        success, subscription_data = call_paypal_api("/v1/billing/subscriptions", "POST", payload)
+        if not success:
+            return False, subscription_data  # This is already an error message
+        # Extract the approval URL
+        approval_url = next((link["href"] for link in subscription_data["links"]
+                            if link["rel"] == "approve"), None)
+        if not approval_url:
+            return False, "No approval URL found in PayPal response"
+        # Get user ID from email
+        conn = get_db_connection()
+        cursor = conn.cursor()
+        cursor.execute("SELECT id FROM users WHERE email = ?", (user_email,))
+        user_result = cursor.fetchone()
+        if not user_result:
+            conn.close()
+            return False, f"User not found: {user_email}"
+        user_id = user_result[0]
+        # Store pending subscription in database
+        sub_id = str(uuid.uuid4())
+        start_date = datetime.now()
+        cursor.execute(
+            "INSERT INTO subscriptions (id, user_id, tier, status, created_at, expires_at, paypal_subscription_id) VALUES (?, ?, ?, ?, ?, ?, ?)",
+            (sub_id, user_id, tier, "pending", start_date, None, subscription_data["id"])
+        )
+        conn.commit()
+        conn.close()
+        # Return success with approval URL
+        return True, {
+            "subscription_id": subscription_data["id"],
+            "approval_url": approval_url,
+            "tier": tier
+        }
+    except Exception as e:
+        logger.error(f"Error creating user subscription: {str(e)}")
+        return False, f"Error creating subscription: {str(e)}"
+# Add a function to cancel a subscription
+def cancel_subscription(subscription_id, reason="Customer requested cancellation"):
+    """
+    Cancel a PayPal subscription
+    Args:
+        subscription_id: The PayPal subscription ID
+        reason: The reason for cancellation
+    Returns:
+        tuple: (success, result)
+            - success: True if successful, False otherwise
+            - result: Success message or error message
+    """
+    try:
+        # Cancel the subscription with PayPal
+        payload = {
+            "reason": reason
+        }
+        success, result = call_paypal_api(
+            f"/v1/billing/subscriptions/{subscription_id}/cancel",
+            "POST",
+            payload
+        )
+        if not success:
+            return False, result
+        # Update subscription status in database
+        conn = get_db_connection()
+        cursor = conn.cursor()
+        cursor.execute(
+            "UPDATE subscriptions SET status = 'cancelled' WHERE paypal_subscription_id = ?",
+            (subscription_id,)
+        )
+        # Get the user ID for this subscription
+        cursor.execute(
+            "SELECT user_id FROM subscriptions WHERE paypal_subscription_id = ?",
+            (subscription_id,)
+        )
+        user_result = cursor.fetchone()
+        if user_result:
+            # Update user to free tier
+            cursor.execute(
+                "UPDATE users SET subscription_tier = 'free_tier' WHERE id = ?",
+                (user_result[0],)
+            )
+        conn.commit()
+        conn.close()
+        return True, "Subscription cancelled successfully"
+    except Exception as e:
+        logger.error(f"Error cancelling subscription: {str(e)}")
+        return False, f"Error cancelling subscription: {str(e)}"
+def verify_subscription_payment(subscription_id):
+    """
+    Verify a subscription payment with PayPal
+    Args:
+        subscription_id: The PayPal subscription ID
+    Returns:
+        tuple: (success, result)
+            - success: True if successful, False otherwise
+            - result: Dictionary with subscription details or error message
+    """
+    try:
+        # Get subscription details from PayPal using our helper
+        success, subscription_data = call_paypal_api(f"/v1/billing/subscriptions/{subscription_id}")
+        if not success:
+            return False, subscription_data  # This is already an error message
+        # Check subscription status
+        status = subscription_data.get("status", "").upper()
+        if status not in ["ACTIVE", "APPROVED"]:
+            return False, f"Subscription is not active: {status}"
+        # Return success with subscription data
+        return True, subscription_data
+    except Exception as e:
+        logger.error(f"Error verifying subscription: {str(e)}")
+        return False, f"Error verifying subscription: {str(e)}"
+def verify_paypal_subscription(subscription_id):
+    """
+    Verify a PayPal subscription
+    Args:
+        subscription_id: The PayPal subscription ID
+    Returns:
+        tuple: (success, result)
+    """
+    try:
+        # Skip verification for mock subscriptions
+        if subscription_id.startswith("mock_sub_"):
+            return True, {"status": "ACTIVE"}
+        # For real subscriptions, call PayPal API
+        success, result = call_paypal_api(f"/v1/billing/subscriptions/{subscription_id}", "GET")
+        if success:
+            # Check subscription status
+            if result.get("status") == "ACTIVE":
+                return True, result
+            else:
+                return False, f"Subscription is not active: {result.get('status')}"
+        else:
+            logger.error(f"PayPal API error: {result}")
+            return False, f"Failed to verify subscription: {result}"
+    except Exception as e:
+        logger.error(f"Error verifying PayPal subscription: {str(e)}")
+        return False, f"Error verifying subscription: {str(e)}"
+# Add this function to save subscription plans
+def save_subscription_plans(plans):
+    """
+    Save subscription plans to a file
+    Args:
+        plans: Dictionary of plan IDs by tier
+    """
+    try:
+        with open(PLAN_IDS_PATH, 'w') as f:
+            json.dump(plans, f)
+        logger.info(f"Saved subscription plans to {PLAN_IDS_PATH}")
+        return True
+    except Exception as e:
+        logger.error(f"Error saving subscription plans: {str(e)}")
+        return False

requirements.txt ADDED Viewed

	@@ -0,0 +1,21 @@

+fastapi>=0.95.0
+uvicorn>=0.21.1
+pydantic>=1.10.7
+python-multipart>=0.0.6
+python-dotenv>=1.0.0
+pdfplumber>=0.9.0
+spacy>=3.5.2
+torch>=2.0.0
+transformers>=4.28.1
+sentence-transformers>=2.2.2
+moviepy>=1.0.3
+matplotlib>=3.7.1
+numpy>=1.24.2
+passlib>=1.7.4
+python-jose[cryptography]>=3.3.0
+bcrypt>=4.0.1
+requests>=2.28.2
+SQLAlchemy>=2.0.9
+aiofiles>=23.1.0
+huggingface_hub>=0.16.4
+en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.5.0/en_core_web_sm-3.5.0-py3-none-any.whl