sitammeur commited on
Commit
cd32385
Β·
verified Β·
1 Parent(s): d769608

Update src/florence/model.py

Browse files
Files changed (1) hide show
  1. src/florence/model.py +87 -87
src/florence/model.py CHANGED
@@ -1,87 +1,87 @@
1
- # Importing necessary libraries
2
- import sys
3
- import subprocess
4
- from typing import Optional
5
- from PIL import Image
6
- import gradio as gr
7
- import spaces
8
- from transformers import AutoProcessor, AutoModelForCausalLM
9
-
10
- # Local imports
11
- from src.logger import logging
12
- from src.exception import CustomExceptionHandling
13
-
14
-
15
- # Install the required dependencies
16
- subprocess.run(
17
- "pip install flash-attn --no-build-isolation",
18
- env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
19
- shell=True,
20
- )
21
-
22
- # Load model and processor from Hugging Face
23
- model_id = "microsoft/Florence-2-large-ft"
24
- try:
25
- model = (
26
- AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True)
27
- .to("cuda")
28
- .eval()
29
- )
30
- processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
31
- logging.info("Model and processor loaded successfully.")
32
-
33
- # Handle exceptions that may occur during the process
34
- except Exception as e:
35
- # Custom exception handling
36
- raise CustomExceptionHandling(e, sys) from e
37
-
38
-
39
- @spaces.GPU
40
- def run_example(
41
- task_prompt: str, image: Image.Image, text_input: Optional[str] = None
42
- ) -> str:
43
- """
44
- Runs an example using the given task prompt and image.
45
-
46
- Args:
47
- - task_prompt (str): The task prompt for the example.
48
- - image (PIL.Image.Image): The image to be processed.
49
- - text_input (str, optional): Additional text input to be appended to the task prompt. Defaults to None.
50
-
51
- Returns:
52
- str: The parsed answer generated by the model.
53
- """
54
- try:
55
- # Check if image is None
56
- if image is None:
57
- gr.Warning("Please provide an image.")
58
-
59
- # If there is no text input, use the task prompt as the prompt
60
- prompt = task_prompt if text_input is None else task_prompt + text_input
61
-
62
- # Process the image and text input
63
- inputs = processor(text=prompt, images=image, return_tensors="pt").to("cuda")
64
-
65
- # Generate the answer using the model
66
- generated_ids = model.generate(
67
- input_ids=inputs["input_ids"],
68
- pixel_values=inputs["pixel_values"],
69
- max_new_tokens=1024,
70
- early_stopping=False,
71
- do_sample=False,
72
- num_beams=3,
73
- )
74
- generated_text = processor.batch_decode(
75
- generated_ids, skip_special_tokens=False
76
- )[0]
77
- parsed_answer = processor.post_process_generation(
78
- generated_text, task=task_prompt, image_size=(image.width, image.height)
79
- )
80
-
81
- # Return the parsed answer
82
- return parsed_answer
83
-
84
- # Handle exceptions that may occur during the process
85
- except Exception as e:
86
- # Custom exception handling
87
- raise CustomExceptionHandling(e, sys) from e
 
1
+ # Importing necessary libraries
2
+ import sys
3
+ import subprocess
4
+ from typing import Optional
5
+ from PIL import Image
6
+ import gradio as gr
7
+ import spaces
8
+ from transformers import AutoProcessor, AutoModelForCausalLM
9
+
10
+ # Local imports
11
+ from src.logger import logging
12
+ from src.exception import CustomExceptionHandling
13
+
14
+
15
+ # Install the required dependencies
16
+ subprocess.run(
17
+ "pip install flash-attn --no-build-isolation",
18
+ env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
19
+ shell=True,
20
+ )
21
+
22
+ # Load model and processor from Hugging Face
23
+ model_id = "microsoft/Florence-2-large-ft"
24
+ try:
25
+ model = (
26
+ AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True, from_tf=True)
27
+ .to("cuda")
28
+ .eval()
29
+ )
30
+ processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
31
+ logging.info("Model and processor loaded successfully.")
32
+
33
+ # Handle exceptions that may occur during the process
34
+ except Exception as e:
35
+ # Custom exception handling
36
+ raise CustomExceptionHandling(e, sys) from e
37
+
38
+
39
+ @spaces.GPU
40
+ def run_example(
41
+ task_prompt: str, image: Image.Image, text_input: Optional[str] = None
42
+ ) -> str:
43
+ """
44
+ Runs an example using the given task prompt and image.
45
+
46
+ Args:
47
+ - task_prompt (str): The task prompt for the example.
48
+ - image (PIL.Image.Image): The image to be processed.
49
+ - text_input (str, optional): Additional text input to be appended to the task prompt. Defaults to None.
50
+
51
+ Returns:
52
+ str: The parsed answer generated by the model.
53
+ """
54
+ try:
55
+ # Check if image is None
56
+ if image is None:
57
+ gr.Warning("Please provide an image.")
58
+
59
+ # If there is no text input, use the task prompt as the prompt
60
+ prompt = task_prompt if text_input is None else task_prompt + text_input
61
+
62
+ # Process the image and text input
63
+ inputs = processor(text=prompt, images=image, return_tensors="pt").to("cuda")
64
+
65
+ # Generate the answer using the model
66
+ generated_ids = model.generate(
67
+ input_ids=inputs["input_ids"],
68
+ pixel_values=inputs["pixel_values"],
69
+ max_new_tokens=1024,
70
+ early_stopping=False,
71
+ do_sample=False,
72
+ num_beams=3,
73
+ )
74
+ generated_text = processor.batch_decode(
75
+ generated_ids, skip_special_tokens=False
76
+ )[0]
77
+ parsed_answer = processor.post_process_generation(
78
+ generated_text, task=task_prompt, image_size=(image.width, image.height)
79
+ )
80
+
81
+ # Return the parsed answer
82
+ return parsed_answer
83
+
84
+ # Handle exceptions that may occur during the process
85
+ except Exception as e:
86
+ # Custom exception handling
87
+ raise CustomExceptionHandling(e, sys) from e