ai: Refactor the code for JARVI multi platform.
Browse files
jarvis.py
CHANGED
@@ -26,23 +26,27 @@ os.system("apt-get update -q -y && apt-get install -q -y tesseract-ocr tesseract
|
|
26 |
INTERNAL_AI_GET_SERVER = os.getenv("INTERNAL_AI_GET_SERVER")
|
27 |
INTERNAL_TRAINING_DATA = os.getenv("INTERNAL_TRAINING_DATA")
|
28 |
|
29 |
-
|
|
|
|
|
|
|
30 |
LINUX_SERVER_HOSTS_MARKED = set()
|
31 |
LINUX_SERVER_HOSTS_ATTEMPTS = {}
|
32 |
|
33 |
-
LINUX_SERVER_PROVIDER_KEYS = [
|
34 |
LINUX_SERVER_PROVIDER_KEYS_MARKED = set()
|
35 |
LINUX_SERVER_PROVIDER_KEYS_ATTEMPTS = {}
|
36 |
|
37 |
-
LINUX_SERVER_ERRORS = set(map(int, os.getenv("LINUX_SERVER_ERROR").split(",")))
|
38 |
|
39 |
-
AI_TYPES = {f"AI_TYPE_{i}": os.getenv(f"AI_TYPE_{i}") for i in range(1,
|
40 |
RESPONSES = {f"RESPONSE_{i}": os.getenv(f"RESPONSE_{i}") for i in range(1, 10)}
|
41 |
|
42 |
MODEL_MAPPING = json.loads(os.getenv("MODEL_MAPPING", "{}"))
|
43 |
MODEL_CONFIG = json.loads(os.getenv("MODEL_CONFIG", "{}"))
|
44 |
-
MODEL_CHOICES = list(MODEL_MAPPING.values())
|
45 |
DEFAULT_CONFIG = json.loads(os.getenv("DEFAULT_CONFIG", "{}"))
|
|
|
46 |
|
47 |
META_TAGS = os.getenv("META_TAGS")
|
48 |
|
@@ -50,157 +54,152 @@ ALLOWED_EXTENSIONS = json.loads(os.getenv("ALLOWED_EXTENSIONS", "[]"))
|
|
50 |
|
51 |
ACTIVE_CANDIDATE = None
|
52 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
def get_available_items(items, marked):
|
54 |
-
|
55 |
-
random.shuffle(
|
56 |
-
return
|
57 |
|
58 |
def marked_item(item, marked, attempts):
|
59 |
marked.add(item)
|
60 |
attempts[item] = attempts.get(item, 0) + 1
|
61 |
if attempts[item] >= 3:
|
62 |
-
def
|
63 |
marked.discard(item)
|
64 |
attempts.pop(item, None)
|
65 |
-
threading.Timer(300,
|
66 |
-
|
67 |
-
class SessionWithID(requests.Session):
|
68 |
-
def __init__(self):
|
69 |
-
super().__init__()
|
70 |
-
self.session_id = str(uuid.uuid4())
|
71 |
-
|
72 |
-
def create_session():
|
73 |
-
return SessionWithID()
|
74 |
|
75 |
-
def get_model_key(
|
76 |
-
return next((k for k, v in MODEL_MAPPING.items() if v ==
|
77 |
|
78 |
-
def extract_file_content(
|
79 |
-
ext = Path(
|
80 |
-
|
81 |
try:
|
82 |
if ext == ".pdf":
|
83 |
-
with pdfplumber.open(
|
84 |
-
for
|
85 |
-
|
86 |
-
|
87 |
-
content += text + "\n"
|
88 |
-
for table in page.extract_tables():
|
89 |
-
table_str = "\n".join([", ".join(row) for row in table if row])
|
90 |
-
content += "\n" + table_str + "\n"
|
91 |
elif ext in [".doc", ".docx"]:
|
92 |
-
|
93 |
-
for para in
|
94 |
-
|
95 |
elif ext in [".xlsx", ".xls"]:
|
96 |
-
df = pd.read_excel(
|
97 |
-
|
98 |
elif ext in [".ppt", ".pptx"]:
|
99 |
-
prs = Presentation(
|
100 |
-
for
|
101 |
-
for
|
102 |
-
if hasattr(
|
103 |
-
|
104 |
-
elif ext in [".png", ".jpg", ".jpeg", ".tiff", ".bmp", ".gif", ".webp"]:
|
105 |
-
pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract"
|
106 |
-
image = Image.open(file_path)
|
107 |
-
content += pytesseract.image_to_string(image) + "\n"
|
108 |
else:
|
109 |
-
|
110 |
except Exception as e:
|
111 |
-
|
112 |
-
return
|
113 |
|
114 |
-
async def fetch_response_async(host,
|
115 |
-
|
116 |
-
for timeout in timeouts:
|
117 |
try:
|
118 |
-
async with httpx.AsyncClient(timeout=
|
119 |
-
|
120 |
-
if
|
121 |
-
marked_item(
|
122 |
return None
|
123 |
-
|
124 |
-
|
125 |
-
if isinstance(
|
126 |
-
|
127 |
-
if
|
128 |
-
return
|
129 |
return None
|
130 |
-
except
|
131 |
continue
|
132 |
-
marked_item(
|
133 |
return None
|
134 |
|
135 |
-
async def chat_with_model_async(history, user_input,
|
136 |
-
if not get_available_items(LINUX_SERVER_PROVIDER_KEYS, LINUX_SERVER_PROVIDER_KEYS_MARKED) or not get_available_items(LINUX_SERVER_HOSTS,
|
137 |
return RESPONSES["RESPONSE_3"]
|
138 |
if not hasattr(sess, "session_id"):
|
139 |
sess.session_id = str(uuid.uuid4())
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
if
|
144 |
-
|
145 |
-
|
|
|
|
|
|
|
146 |
global ACTIVE_CANDIDATE
|
147 |
if ACTIVE_CANDIDATE:
|
148 |
-
|
149 |
-
if
|
150 |
-
return
|
151 |
ACTIVE_CANDIDATE = None
|
152 |
keys = get_available_items(LINUX_SERVER_PROVIDER_KEYS, LINUX_SERVER_PROVIDER_KEYS_MARKED)
|
153 |
-
hosts = get_available_items(LINUX_SERVER_HOSTS,
|
154 |
-
|
155 |
-
random.shuffle(
|
156 |
-
for
|
157 |
-
|
158 |
-
if
|
159 |
-
ACTIVE_CANDIDATE = (
|
160 |
-
return
|
161 |
return RESPONSES["RESPONSE_2"]
|
162 |
|
163 |
-
async def respond_async(
|
164 |
-
|
165 |
-
if not
|
166 |
yield history, gr.MultimodalTextbox(value=None, interactive=True), sess
|
167 |
return
|
168 |
-
|
169 |
-
for
|
170 |
-
|
171 |
-
|
172 |
-
if
|
173 |
-
|
174 |
-
history.append([
|
175 |
-
|
176 |
history[-1][1] = ""
|
177 |
-
def
|
178 |
-
if isinstance(
|
179 |
-
|
180 |
-
if isinstance(
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
return json.dumps(data, ensure_ascii=False)
|
186 |
-
return repr(data)
|
187 |
-
for character in ai_response:
|
188 |
-
history[-1][1] += convert_to_string(character)
|
189 |
await asyncio.sleep(0.0001)
|
190 |
yield history, gr.MultimodalTextbox(value=None, interactive=True), sess
|
191 |
|
192 |
-
def change_model(
|
193 |
-
|
|
|
|
|
194 |
|
195 |
with gr.Blocks(fill_height=True, fill_width=True, title=AI_TYPES["AI_TYPE_4"], head=META_TAGS) as jarvis:
|
196 |
user_history = gr.State([])
|
197 |
user_session = gr.State(create_session())
|
198 |
selected_model = gr.State(MODEL_CHOICES[0] if MODEL_CHOICES else "")
|
|
|
199 |
chatbot = gr.Chatbot(label=AI_TYPES["AI_TYPE_1"], show_copy_button=True, scale=1, elem_id=AI_TYPES["AI_TYPE_2"])
|
200 |
with gr.Row():
|
201 |
msg = gr.MultimodalTextbox(show_label=False, placeholder=RESPONSES["RESPONSE_5"], interactive=True, file_count="single", file_types=ALLOWED_EXTENSIONS)
|
202 |
with gr.Accordion(AI_TYPES["AI_TYPE_6"], open=False):
|
203 |
model_dropdown = gr.Dropdown(show_label=False, choices=MODEL_CHOICES, value=MODEL_CHOICES[0])
|
204 |
-
|
205 |
-
|
|
|
|
|
206 |
jarvis.launch(max_file_size="1mb")
|
|
|
26 |
INTERNAL_AI_GET_SERVER = os.getenv("INTERNAL_AI_GET_SERVER")
|
27 |
INTERNAL_TRAINING_DATA = os.getenv("INTERNAL_TRAINING_DATA")
|
28 |
|
29 |
+
SYSTEM_PROMPT_MAPPING = json.loads(os.getenv("SYSTEM_PROMPT_MAPPING", "{}"))
|
30 |
+
SYSTEM_PROMPT_DEFAULT = os.getenv("DEFAULT_SYSTEM")
|
31 |
+
|
32 |
+
LINUX_SERVER_HOSTS = [h for h in json.loads(os.getenv("LINUX_SERVER_HOST", "[]")) if h]
|
33 |
LINUX_SERVER_HOSTS_MARKED = set()
|
34 |
LINUX_SERVER_HOSTS_ATTEMPTS = {}
|
35 |
|
36 |
+
LINUX_SERVER_PROVIDER_KEYS = [k for k in json.loads(os.getenv("LINUX_SERVER_PROVIDER_KEY", "[]")) if k]
|
37 |
LINUX_SERVER_PROVIDER_KEYS_MARKED = set()
|
38 |
LINUX_SERVER_PROVIDER_KEYS_ATTEMPTS = {}
|
39 |
|
40 |
+
LINUX_SERVER_ERRORS = set(map(int, os.getenv("LINUX_SERVER_ERROR", "").split(",")))
|
41 |
|
42 |
+
AI_TYPES = {f"AI_TYPE_{i}": os.getenv(f"AI_TYPE_{i}") for i in range(1, 8)}
|
43 |
RESPONSES = {f"RESPONSE_{i}": os.getenv(f"RESPONSE_{i}") for i in range(1, 10)}
|
44 |
|
45 |
MODEL_MAPPING = json.loads(os.getenv("MODEL_MAPPING", "{}"))
|
46 |
MODEL_CONFIG = json.loads(os.getenv("MODEL_CONFIG", "{}"))
|
47 |
+
MODEL_CHOICES = list(MODEL_MAPPING.values())
|
48 |
DEFAULT_CONFIG = json.loads(os.getenv("DEFAULT_CONFIG", "{}"))
|
49 |
+
DEFAULT_MODEL_KEY = list(MODEL_MAPPING.keys())[0] if MODEL_MAPPING else None
|
50 |
|
51 |
META_TAGS = os.getenv("META_TAGS")
|
52 |
|
|
|
54 |
|
55 |
ACTIVE_CANDIDATE = None
|
56 |
|
57 |
+
class SessionWithID(requests.Session):
|
58 |
+
def __init__(self):
|
59 |
+
super().__init__()
|
60 |
+
self.session_id = str(uuid.uuid4())
|
61 |
+
|
62 |
+
def create_session():
|
63 |
+
return SessionWithID()
|
64 |
+
|
65 |
def get_available_items(items, marked):
|
66 |
+
a = [i for i in items if i not in marked]
|
67 |
+
random.shuffle(a)
|
68 |
+
return a
|
69 |
|
70 |
def marked_item(item, marked, attempts):
|
71 |
marked.add(item)
|
72 |
attempts[item] = attempts.get(item, 0) + 1
|
73 |
if attempts[item] >= 3:
|
74 |
+
def remove():
|
75 |
marked.discard(item)
|
76 |
attempts.pop(item, None)
|
77 |
+
threading.Timer(300, remove).start()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
|
79 |
+
def get_model_key(display):
|
80 |
+
return next((k for k, v in MODEL_MAPPING.items() if v == display), DEFAULT_MODEL_KEY)
|
81 |
|
82 |
+
def extract_file_content(fp):
|
83 |
+
ext = Path(fp).suffix.lower()
|
84 |
+
c = ""
|
85 |
try:
|
86 |
if ext == ".pdf":
|
87 |
+
with pdfplumber.open(fp) as pdf:
|
88 |
+
for p in pdf.pages:
|
89 |
+
t = p.extract_text() or ""
|
90 |
+
c += t + "\n"
|
|
|
|
|
|
|
|
|
91 |
elif ext in [".doc", ".docx"]:
|
92 |
+
d = docx.Document(fp)
|
93 |
+
for para in d.paragraphs:
|
94 |
+
c += para.text + "\n"
|
95 |
elif ext in [".xlsx", ".xls"]:
|
96 |
+
df = pd.read_excel(fp)
|
97 |
+
c += df.to_csv(index=False)
|
98 |
elif ext in [".ppt", ".pptx"]:
|
99 |
+
prs = Presentation(fp)
|
100 |
+
for s in prs.slides:
|
101 |
+
for sh in s.shapes:
|
102 |
+
if hasattr(sh, "text") and sh.text:
|
103 |
+
c += sh.text + "\n"
|
|
|
|
|
|
|
|
|
104 |
else:
|
105 |
+
c = Path(fp).read_text(encoding="utf-8")
|
106 |
except Exception as e:
|
107 |
+
c = f"{fp}: {e}"
|
108 |
+
return c.strip()
|
109 |
|
110 |
+
async def fetch_response_async(host, key, model, msgs, cfg, sid):
|
111 |
+
for t in [60, 80, 120, 240]:
|
|
|
112 |
try:
|
113 |
+
async with httpx.AsyncClient(timeout=t) as client:
|
114 |
+
r = await client.post(host, json={"model": model, "messages": msgs, **cfg, "session_id": sid}, headers={"Authorization": f"Bearer {key}"})
|
115 |
+
if r.status_code in LINUX_SERVER_ERRORS:
|
116 |
+
marked_item(key, LINUX_SERVER_PROVIDER_KEYS_MARKED, LINUX_SERVER_PROVIDER_KEYS_ATTEMPTS)
|
117 |
return None
|
118 |
+
r.raise_for_status()
|
119 |
+
j = r.json()
|
120 |
+
if isinstance(j, dict) and j.get("choices"):
|
121 |
+
ch = j["choices"][0]
|
122 |
+
if ch.get("message") and isinstance(ch["message"].get("content"), str):
|
123 |
+
return ch["message"]["content"]
|
124 |
return None
|
125 |
+
except:
|
126 |
continue
|
127 |
+
marked_item(key, LINUX_SERVER_PROVIDER_KEYS_MARKED, LINUX_SERVER_PROVIDER_KEYS_ATTEMPTS)
|
128 |
return None
|
129 |
|
130 |
+
async def chat_with_model_async(history, user_input, model_display, sess, custom_prompt):
|
131 |
+
if not get_available_items(LINUX_SERVER_PROVIDER_KEYS, LINUX_SERVER_PROVIDER_KEYS_MARKED) or not get_available_items(LINUX_SERVER_HOSTS, LINUX_SERVER_HOSTS_ATTEMPTS):
|
132 |
return RESPONSES["RESPONSE_3"]
|
133 |
if not hasattr(sess, "session_id"):
|
134 |
sess.session_id = str(uuid.uuid4())
|
135 |
+
model_key = get_model_key(model_display)
|
136 |
+
cfg = MODEL_CONFIG.get(model_key, DEFAULT_CONFIG)
|
137 |
+
msgs = [{"role": "user", "content": u} for u, _ in history] + [{"role": "assistant", "content": a} for _, a in history if a]
|
138 |
+
if model_key == DEFAULT_MODEL_KEY and INTERNAL_TRAINING_DATA:
|
139 |
+
prompt = INTERNAL_TRAINING_DATA
|
140 |
+
else:
|
141 |
+
prompt = custom_prompt or SYSTEM_PROMPT_MAPPING.get(model_key, SYSTEM_PROMPT_DEFAULT)
|
142 |
+
msgs.insert(0, {"role": "system", "content": prompt})
|
143 |
+
msgs.append({"role": "user", "content": user_input})
|
144 |
global ACTIVE_CANDIDATE
|
145 |
if ACTIVE_CANDIDATE:
|
146 |
+
res = await fetch_response_async(ACTIVE_CANDIDATE[0], ACTIVE_CANDIDATE[1], model_key, msgs, cfg, sess.session_id)
|
147 |
+
if res:
|
148 |
+
return res
|
149 |
ACTIVE_CANDIDATE = None
|
150 |
keys = get_available_items(LINUX_SERVER_PROVIDER_KEYS, LINUX_SERVER_PROVIDER_KEYS_MARKED)
|
151 |
+
hosts = get_available_items(LINUX_SERVER_HOSTS, LINUX_SERVER_HOSTS_ATTEMPTS)
|
152 |
+
cands = [(h, k) for h in hosts for k in keys]
|
153 |
+
random.shuffle(cands)
|
154 |
+
for h, k in cands:
|
155 |
+
res = await fetch_response_async(h, k, model_key, msgs, cfg, sess.session_id)
|
156 |
+
if res:
|
157 |
+
ACTIVE_CANDIDATE = (h, k)
|
158 |
+
return res
|
159 |
return RESPONSES["RESPONSE_2"]
|
160 |
|
161 |
+
async def respond_async(multi, history, model_display, sess, custom_prompt):
|
162 |
+
msg = {"text": multi.get("text", "").strip(), "files": multi.get("files", [])}
|
163 |
+
if not msg["text"] and not msg["files"]:
|
164 |
yield history, gr.MultimodalTextbox(value=None, interactive=True), sess
|
165 |
return
|
166 |
+
inp = ""
|
167 |
+
for f in msg["files"]:
|
168 |
+
p = f["name"] if isinstance(f, dict) and "name" in f else f
|
169 |
+
inp += f"{Path(p).name}\n\n{extract_file_content(p)}\n\n"
|
170 |
+
if msg["text"]:
|
171 |
+
inp += msg["text"]
|
172 |
+
history.append([inp, ""])
|
173 |
+
ai = await chat_with_model_async(history, inp, model_display, sess, custom_prompt)
|
174 |
history[-1][1] = ""
|
175 |
+
def to_str(d):
|
176 |
+
if isinstance(d, (str, int, float)): return str(d)
|
177 |
+
if isinstance(d, bytes): return d.decode("utf-8", errors="ignore")
|
178 |
+
if isinstance(d, (list, tuple)): return "".join(map(to_str, d))
|
179 |
+
if isinstance(d, dict): return json.dumps(d, ensure_ascii=False)
|
180 |
+
return repr(d)
|
181 |
+
for c in ai:
|
182 |
+
history[-1][1] += to_str(c)
|
|
|
|
|
|
|
|
|
183 |
await asyncio.sleep(0.0001)
|
184 |
yield history, gr.MultimodalTextbox(value=None, interactive=True), sess
|
185 |
|
186 |
+
def change_model(new):
|
187 |
+
visible = new != MODEL_CHOICES[0]
|
188 |
+
default = SYSTEM_PROMPT_MAPPING.get(get_model_key(new), SYSTEM_PROMPT_DEFAULT)
|
189 |
+
return [], create_session(), new, default, gr.update(value=default, visible=visible)
|
190 |
|
191 |
with gr.Blocks(fill_height=True, fill_width=True, title=AI_TYPES["AI_TYPE_4"], head=META_TAGS) as jarvis:
|
192 |
user_history = gr.State([])
|
193 |
user_session = gr.State(create_session())
|
194 |
selected_model = gr.State(MODEL_CHOICES[0] if MODEL_CHOICES else "")
|
195 |
+
custom_prompt_state = gr.State("")
|
196 |
chatbot = gr.Chatbot(label=AI_TYPES["AI_TYPE_1"], show_copy_button=True, scale=1, elem_id=AI_TYPES["AI_TYPE_2"])
|
197 |
with gr.Row():
|
198 |
msg = gr.MultimodalTextbox(show_label=False, placeholder=RESPONSES["RESPONSE_5"], interactive=True, file_count="single", file_types=ALLOWED_EXTENSIONS)
|
199 |
with gr.Accordion(AI_TYPES["AI_TYPE_6"], open=False):
|
200 |
model_dropdown = gr.Dropdown(show_label=False, choices=MODEL_CHOICES, value=MODEL_CHOICES[0])
|
201 |
+
system_prompt = gr.Textbox(label=AI_TYPES["AI_TYPE_7"], lines=2, interactive=True, visible=False)
|
202 |
+
model_dropdown.change(fn=change_model, inputs=[model_dropdown], outputs=[user_history, user_session, selected_model, custom_prompt_state, system_prompt])
|
203 |
+
system_prompt.change(fn=lambda x: x, inputs=[system_prompt], outputs=[custom_prompt_state])
|
204 |
+
msg.submit(fn=respond_async, inputs=[msg, user_history, selected_model, user_session, custom_prompt_state], outputs=[chatbot, msg, user_session], api_name=INTERNAL_AI_GET_SERVER)
|
205 |
jarvis.launch(max_file_size="1mb")
|