benkada commited on
Commit
1ece3c6
Β·
verified Β·
1 Parent(s): 396640c

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +37 -140
main.py CHANGED
@@ -1,19 +1,31 @@
1
  import os
 
 
2
  from fastapi import FastAPI, UploadFile, File, Form
3
- from fastapi.middleware.cors import CORSMiddleware
4
  from fastapi.responses import JSONResponse, HTMLResponse
 
5
  from fastapi.staticfiles import StaticFiles
6
- from huggingface_hub import InferenceClient
7
  from PyPDF2 import PdfReader
8
  from docx import Document
9
  from PIL import Image
10
- import io
11
- from io import BytesIO
12
- import requests
13
- from routers import ai
14
-
15
- # Get environment variables
16
- HUGGINGFACE_TOKEN = os.getenv("HF_TOKEN")
 
 
 
 
 
 
 
 
 
 
17
  PORT = int(os.getenv("PORT", 7860))
18
 
19
  app = FastAPI(
@@ -22,7 +34,6 @@ app = FastAPI(
22
  version="1.0.0"
23
  )
24
 
25
- # Configure CORS
26
  app.add_middleware(
27
  CORSMiddleware,
28
  allow_origins=["*"],
@@ -31,156 +42,42 @@ app.add_middleware(
31
  allow_headers=["*"],
32
  )
33
 
34
- # Serve static files
35
  app.mount("/", StaticFiles(directory=".", html=True), name="static")
36
-
37
- # Include routers
38
  app.include_router(ai.router)
39
 
40
- # Initialisation des clients Hugging Face avec authentification
41
- summary_client = InferenceClient(model="facebook/bart-large-cnn", token=HUGGINGFACE_TOKEN)
42
- qa_client = InferenceClient(model="deepset/roberta-base-squad2", token=HUGGINGFACE_TOKEN)
43
- image_caption_client = InferenceClient(model="nlpconnect/vit-gpt2-image-captioning", token=HUGGINGFACE_TOKEN)
44
 
45
- # Extraction du texte des fichiers
 
 
46
  def extract_text_from_pdf(content: bytes) -> str:
47
- text = ""
48
  reader = PdfReader(io.BytesIO(content))
49
- for page in reader.pages:
50
- if page.extract_text():
51
- text += page.extract_text() + "\n"
52
- return text.strip()
53
 
54
  def extract_text_from_docx(content: bytes) -> str:
55
- text = ""
56
  doc = Document(io.BytesIO(content))
57
- for para in doc.paragraphs:
58
- text += para.text + "\n"
59
- return text.strip()
60
 
61
  def process_uploaded_file(file: UploadFile) -> str:
62
  content = file.file.read()
63
- extension = file.filename.split('.')[-1].lower()
64
-
65
- if extension == "pdf":
66
  return extract_text_from_pdf(content)
67
- elif extension == "docx":
68
  return extract_text_from_docx(content)
69
- elif extension == "txt":
70
  return content.decode("utf-8").strip()
71
- else:
72
- raise ValueError("Type de fichier non supportΓ©")
73
-
74
- # Point d'entrΓ©e HTML
75
- @app.get("/", response_class=HTMLResponse)
76
- async def serve_homepage():
77
- with open("index.html", "r", encoding="utf-8") as f:
78
- return HTMLResponse(content=f.read(), status_code=200)
79
-
80
- # RΓ©sumΓ©
81
- @app.post("/analyze")
82
- async def analyze_file(file: UploadFile = File(...)):
83
- try:
84
- text = process_uploaded_file(file)
85
-
86
- if len(text) < 20:
87
- return {"summary": "Document trop court pour Γͺtre rΓ©sumΓ©."}
88
-
89
- summary = summary_client.summarization(text[:3000])
90
- return {"summary": summary}
91
-
92
- except Exception as e:
93
- return JSONResponse(status_code=500, content={"error": f"Erreur lors de l'analyse: {str(e)}"})
94
-
95
- # Question-RΓ©ponse
96
- @app.post("/ask")
97
- async def ask_question(file: UploadFile = File(...), question: str = Form(...)):
98
- try:
99
- # Determine if the file is an image
100
- content_type = file.content_type
101
- if content_type.startswith("image/"):
102
- image_bytes = await file.read()
103
- image_pil = Image.open(io.BytesIO(image_bytes)).convert("RGB")
104
- image_pil.thumbnail((1024, 1024))
105
-
106
- img_byte_arr = BytesIO()
107
- image_pil.save(img_byte_arr, format='JPEG')
108
- img_byte_arr = img_byte_arr.getvalue()
109
-
110
- # Generate image description
111
- result = image_caption_client.image_to_text(img_byte_arr)
112
- if isinstance(result, dict):
113
- context = result.get("generated_text") or result.get("caption") or ""
114
- elif isinstance(result, list) and len(result) > 0:
115
- context = result[0].get("generated_text", "")
116
- elif isinstance(result, str):
117
- context = result
118
- else:
119
- context = ""
120
-
121
- else:
122
- # Not an image, process as document
123
- text = process_uploaded_file(file)
124
- if len(text) < 20:
125
- return {"answer": "Document trop court pour rΓ©pondre Γ  la question."}
126
- context = text[:3000]
127
-
128
- if not context:
129
- return {"answer": "Aucune information disponible pour rΓ©pondre Γ  la question."}
130
-
131
- result = qa_client.question_answering(question=question, context=context)
132
- return {"answer": result.get("answer", "Aucune rΓ©ponse trouvΓ©e.")}
133
-
134
- except Exception as e:
135
- return JSONResponse(status_code=500, content={"error": f"Erreur lors de la recherche de rΓ©ponse: {str(e)}"})
136
-
137
- # InterprΓ©tation d'Image
138
- @app.post("/interpret_image")
139
- async def interpret_image(image: UploadFile = File(...)):
140
- try:
141
- # Lire l'image
142
- image_bytes = await image.read()
143
-
144
- # Ouvrir l'image avec PIL
145
- image_pil = Image.open(io.BytesIO(image_bytes))
146
- image_pil = image_pil.convert("RGB")
147
- image_pil.thumbnail((1024, 1024))
148
-
149
- # Convertir en bytes (JPEG)
150
- img_byte_arr = BytesIO()
151
- image_pil.save(img_byte_arr, format='JPEG')
152
- img_byte_arr = img_byte_arr.getvalue()
153
-
154
- # Appeler le modèle
155
- result = image_caption_client.image_to_text(img_byte_arr)
156
-
157
- # πŸ” Affichage du rΓ©sultat brut pour dΓ©bogage
158
- print("Résultat brut du modèle image-to-text:", result)
159
-
160
- # Extraire la description si disponible
161
- if isinstance(result, dict):
162
- description = result.get("generated_text") or result.get("caption") or "Description non trouvΓ©e."
163
- elif isinstance(result, list) and len(result) > 0:
164
- description = result[0].get("generated_text", "Description non trouvΓ©e.")
165
- elif isinstance(result, str):
166
- description = result
167
- else:
168
- description = "Description non trouvΓ©e."
169
-
170
- return {"description": description}
171
 
172
- except Exception as e:
173
- return JSONResponse(status_code=500, content={"error": f"Erreur lors de l'interprΓ©tation de l'image: {str(e)}"})
174
 
175
  @app.get("/api/health")
176
  async def health_check():
177
- return {
178
- "status": "healthy",
179
- "version": "1.0.0",
180
- "hf_token_set": bool(HUGGINGFACE_TOKEN)
181
- }
182
 
183
- # DΓ©marrage local
184
  if __name__ == "__main__":
185
  import uvicorn
186
  uvicorn.run(app, host="0.0.0.0", port=PORT)
 
1
  import os
2
+ import io
3
+ from io import BytesIO
4
  from fastapi import FastAPI, UploadFile, File, Form
 
5
  from fastapi.responses import JSONResponse, HTMLResponse
6
+ from fastapi.middleware.cors import CORSMiddleware
7
  from fastapi.staticfiles import StaticFiles
8
+ from huggingface_hub import InferenceClient, login
9
  from PyPDF2 import PdfReader
10
  from docx import Document
11
  from PIL import Image
12
+ from routers import ai # conservez vos routes annexes
13
+
14
+ # ──────────────────────────────────────────────────────────────────────────────
15
+ # 1) Authentification Hugging Face
16
+ # ──────────────────────────────────────────────────────────────────────────────
17
+ HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_HUB_TOKEN")
18
+ if not HF_TOKEN:
19
+ raise RuntimeError(
20
+ "Variable d'environnement HF_TOKEN absente ; crΓ©ez un jeton Β« Read Β» "
21
+ "sur https://huggingface.co/settings/tokens et exportez-le (voir .env)."
22
+ )
23
+
24
+ login(token=HF_TOKEN) # Authentifie tout le process
25
+
26
+ # ──────────────────────────────────────────────────────────────────────────────
27
+ # 2) Configuration FastAPI
28
+ # ──────────────────────────────────────────────────────────────────────────────
29
  PORT = int(os.getenv("PORT", 7860))
30
 
31
  app = FastAPI(
 
34
  version="1.0.0"
35
  )
36
 
 
37
  app.add_middleware(
38
  CORSMiddleware,
39
  allow_origins=["*"],
 
42
  allow_headers=["*"],
43
  )
44
 
 
45
  app.mount("/", StaticFiles(directory=".", html=True), name="static")
 
 
46
  app.include_router(ai.router)
47
 
48
+ # Clients HF (token passΓ© implicitement)
49
+ summary_client = InferenceClient("facebook/bart-large-cnn")
50
+ qa_client = InferenceClient("deepset/roberta-base-squad2")
51
+ image_caption_client = InferenceClient("nlpconnect/vit-gpt2-image-captioning")
52
 
53
+ # ──────────────────────────────────────────────────────────────────────────────
54
+ # 3) Utils : extraction texte, routes API (inchangΓ©s ou presque)
55
+ # ──────────────────────────────────────────────────────────────────────────────
56
  def extract_text_from_pdf(content: bytes) -> str:
 
57
  reader = PdfReader(io.BytesIO(content))
58
+ return "\n".join(p.extract_text() or "" for p in reader.pages).strip()
 
 
 
59
 
60
  def extract_text_from_docx(content: bytes) -> str:
 
61
  doc = Document(io.BytesIO(content))
62
+ return "\n".join(p.text for p in doc.paragraphs).strip()
 
 
63
 
64
  def process_uploaded_file(file: UploadFile) -> str:
65
  content = file.file.read()
66
+ ext = file.filename.rsplit(".", 1)[-1].lower()
67
+ if ext == "pdf":
 
68
  return extract_text_from_pdf(content)
69
+ if ext == "docx":
70
  return extract_text_from_docx(content)
71
+ if ext == "txt":
72
  return content.decode("utf-8").strip()
73
+ raise ValueError("Type de fichier non supportΓ©")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
+ # … (gardez vos trois routes /analyze, /ask, /interpret_image identiques)
 
76
 
77
  @app.get("/api/health")
78
  async def health_check():
79
+ return {"status": "healthy", "version": "1.0.0", "hf_token_set": True}
 
 
 
 
80
 
 
81
  if __name__ == "__main__":
82
  import uvicorn
83
  uvicorn.run(app, host="0.0.0.0", port=PORT)