ngebodh commited on
Commit
a1ab263
·
verified ·
1 Parent(s): 54f5b9c

Updated the inference endpoints

Browse files

HF got rid of previous endpoints. I updated with the new serverless endpoints.

Files changed (1) hide show
  1. app.py +63 -42
app.py CHANGED
@@ -15,25 +15,35 @@ load_dotenv()
15
  #Comment_test_11_09_2024
16
 
17
 
18
- # initialize the client
19
- client = OpenAI(
20
- base_url="https://api-inference.huggingface.co/v1",
21
- api_key=os.environ.get('HUGGINGFACEHUB_API_TOKEN')#"hf_xxx" # Replace with your token
22
- )
23
-
24
 
25
 
26
 
27
- #Create supported models
28
  model_links ={
29
- "Meta-Llama-3-8B":"meta-llama/Meta-Llama-3-8B-Instruct",
30
- "Mistral-7B":"mistralai/Mistral-7B-Instruct-v0.2",
31
- "Gemma-7B":"google/gemma-1.1-7b-it",
32
- "Gemma-2B":"google/gemma-1.1-2b-it",
33
- "Zephyr-7B-β":"HuggingFaceH4/zephyr-7b-beta",
34
- #"Meta-Llama-3.1-8B":"meta-llama/Meta-Llama-3.1-8B-Instruct", #TODO: Update when/if Serverless Inference available
35
-
36
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
  #Pull info about the model to display
39
  model_info ={
@@ -41,41 +51,46 @@ model_info ={
41
  {'description':"""The Mistral model is a **Large Language Model (LLM)** that's able to have question and answer interactions.\n \
42
  \nIt was created by the [**Mistral AI**](https://mistral.ai/news/announcing-mistral-7b/) team as has over **7 billion parameters.** \n""",
43
  'logo':'https://cdn-avatars.huggingface.co/v1/production/uploads/62dac1c7a8ead43d20e3e17a/wrLf5yaGC6ng4XME70w6Z.png'},
44
- "Gemma-7B":
 
 
 
 
45
  {'description':"""The Gemma model is a **Large Language Model (LLM)** that's able to have question and answer interactions.\n \
46
- \nIt was created by the [**Google's AI Team**](https://blog.google/technology/developers/gemma-open-models/) team as has over **7 billion parameters.** \n""",
 
 
 
 
47
  'logo':'https://pbs.twimg.com/media/GG3sJg7X0AEaNIq.jpg'},
48
- "Gemma-2B":
49
- {'description':"""The Gemma model is a **Large Language Model (LLM)** that's able to have question and answer interactions.\n \
50
- \nIt was created by the [**Google's AI Team**](https://blog.google/technology/developers/gemma-open-models/) team as has over **2 billion parameters.** \n""",
51
- 'logo':'https://pbs.twimg.com/media/GG3sJg7X0AEaNIq.jpg'},
52
  "Zephyr-7B":
53
- {'description':"""The Zephyr model is a **Large Language Model (LLM)** that's able to have question and answer interactions.\n \
54
- \nFrom Huggingface: \n\
55
- Zephyr is a series of language models that are trained to act as helpful assistants. \
56
- [Zephyr 7B Gemma](https://huggingface.co/HuggingFaceH4/zephyr-7b-gemma-v0.1)\
57
- is the third model in the series, and is a fine-tuned version of google/gemma-7b \
58
- that was trained on on a mix of publicly available, synthetic datasets using Direct Preference Optimization (DPO)\n""",
59
- 'logo':'https://huggingface.co/HuggingFaceH4/zephyr-7b-gemma-v0.1/resolve/main/thumbnail.png'},
60
  "Zephyr-7B-β":
61
- {'description':"""The Zephyr model is a **Large Language Model (LLM)** that's able to have question and answer interactions.\n \
62
- \nFrom Huggingface: \n\
63
- Zephyr is a series of language models that are trained to act as helpful assistants. \
64
- [Zephyr-7B-β](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta)\
65
- is the second model in the series, and is a fine-tuned version of mistralai/Mistral-7B-v0.1 \
66
- that was trained on on a mix of publicly available, synthetic datasets using Direct Preference Optimization (DPO)\n""",
67
- 'logo':'https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha/resolve/main/thumbnail.png'},
68
  "Meta-Llama-3-8B":
69
- {'description':"""The Llama (3) model is a **Large Language Model (LLM)** that's able to have question and answer interactions.\n \
70
- \nIt was created by the [**Meta's AI**](https://llama.meta.com/) team and has over **8 billion parameters.** \n""",
71
- 'logo':'Llama_logo.png'},
72
  "Meta-Llama-3.1-8B":
73
- {'description':"""The Llama (3.1) model is a **Large Language Model (LLM)** that's able to have question and answer interactions.\n \
74
- \nIt was created by the [**Meta's AI**](https://llama.meta.com/) team and has over **8 billion parameters.** \n""",
75
- 'logo':'Llama3_1_logo.png'},
76
  }
77
 
78
 
 
79
  #Random dog images for error message
80
  random_dog = ["0f476473-2d8b-415e-b944-483768418a95.jpg",
81
  "1bd75c81-f1d7-4e55-9310-a27595fa8762.jpg",
@@ -143,6 +158,12 @@ if st.session_state.prev_option != selected_model:
143
  #Pull in the model we want to use
144
  repo_id = model_links[selected_model]
145
 
 
 
 
 
 
 
146
 
147
  st.subheader(f'AI - {selected_model}')
148
  # st.title(f'ChatBot Using {selected_model}')
@@ -178,7 +199,7 @@ if prompt := st.chat_input(f"Hi I'm {selected_model}, ask me a question"):
178
 
179
  try:
180
  stream = client.chat.completions.create(
181
- model=model_links[selected_model],
182
  messages=[
183
  {"role": m["role"], "content": m["content"]}
184
  for m in st.session_state.messages
 
15
  #Comment_test_11_09_2024
16
 
17
 
 
 
 
 
 
 
18
 
19
 
20
 
 
21
  model_links ={
22
+ "Gemma-3-27B-it":{
23
+ "inf_point":"https://router.huggingface.co/nebius/v1",
24
+ "link":"google/gemma-3-27b-it-fast",
25
+ },
26
+ "Meta-Llama-3.1-8B":{
27
+ "inf_point":"https://router.huggingface.co/nebius/v1",
28
+ "link":"meta-llama/Meta-Llama-3.1-8B-Instruct-fast",
29
+ },
30
+ "Mistral-7B":{
31
+ "inf_point":"https://router.huggingface.co/together/v1",
32
+ "link":"mistralai/Mistral-7B-Instruct-v0.3",
33
+ },
34
+ "Gemma-2-27B-it":{
35
+ "inf_point":"https://router.huggingface.co/nebius/v1",
36
+ "link":"google/gemma-2-27b-it-fast",
37
+ },
38
+ "Gemma-2-2B-it":{
39
+ "inf_point":"https://router.huggingface.co/nebius/v1",
40
+ "link":"google/gemma-2-2b-it-fast",
41
+ },
42
+ "Zephyr-7B-β":{
43
+ "inf_point":"https://router.huggingface.co/hf-inference/models/HuggingFaceH4/zephyr-7b-beta/v1",
44
+ "link":"HuggingFaceH4/zephyr-7b-beta",
45
+ },
46
+ }
47
 
48
  #Pull info about the model to display
49
  model_info ={
 
51
  {'description':"""The Mistral model is a **Large Language Model (LLM)** that's able to have question and answer interactions.\n \
52
  \nIt was created by the [**Mistral AI**](https://mistral.ai/news/announcing-mistral-7b/) team as has over **7 billion parameters.** \n""",
53
  'logo':'https://cdn-avatars.huggingface.co/v1/production/uploads/62dac1c7a8ead43d20e3e17a/wrLf5yaGC6ng4XME70w6Z.png'},
54
+ "Gemma-2-27B-it":
55
+ {'description':"""The Gemma model is a **Large Language Model (LLM)** that's able to have question and answer interactions.\n \
56
+ \nIt was created by the [**Google's AI Team**](https://blog.google/technology/developers/gemma-open-models/) team as has over **27 billion parameters.** \n""",
57
+ 'logo':'https://pbs.twimg.com/media/GG3sJg7X0AEaNIq.jpg'},
58
+ "Gemma-3-27B-it":
59
  {'description':"""The Gemma model is a **Large Language Model (LLM)** that's able to have question and answer interactions.\n \
60
+ \nIt was created by the [**Google's AI Team**](https://blog.google/technology/developers/gemma-open-models/) team as has over **27 billion parameters.** \n""",
61
+ 'logo':'https://pbs.twimg.com/media/GG3sJg7X0AEaNIq.jpg'},
62
+ "Gemma-2-2B-it":
63
+ {'description':"""The Gemma model is a **Large Language Model (LLM)** that's able to have question and answer interactions.\n \
64
+ \nIt was created by the [**Google's AI Team**](https://blog.google/technology/developers/gemma-open-models/) team as has over **2 billion parameters.** \n""",
65
  'logo':'https://pbs.twimg.com/media/GG3sJg7X0AEaNIq.jpg'},
 
 
 
 
66
  "Zephyr-7B":
67
+ {'description':"""The Zephyr model is a **Large Language Model (LLM)** that's able to have question and answer interactions.\n \
68
+ \nFrom Huggingface: \n\
69
+ Zephyr is a series of language models that are trained to act as helpful assistants. \
70
+ [Zephyr 7B Gemma](https://huggingface.co/HuggingFaceH4/zephyr-7b-gemma-v0.1)\
71
+ is the third model in the series, and is a fine-tuned version of google/gemma-7b \
72
+ that was trained on on a mix of publicly available, synthetic datasets using Direct Preference Optimization (DPO)\n""",
73
+ 'logo':'https://huggingface.co/HuggingFaceH4/zephyr-7b-gemma-v0.1/resolve/main/thumbnail.png'},
74
  "Zephyr-7B-β":
75
+ {'description':"""The Zephyr model is a **Large Language Model (LLM)** that's able to have question and answer interactions.\n \
76
+ \nFrom Huggingface: \n\
77
+ Zephyr is a series of language models that are trained to act as helpful assistants. \
78
+ [Zephyr-7B-β](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta)\
79
+ is the second model in the series, and is a fine-tuned version of mistralai/Mistral-7B-v0.1 \
80
+ that was trained on on a mix of publicly available, synthetic datasets using Direct Preference Optimization (DPO)\n""",
81
+ 'logo':'https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha/resolve/main/thumbnail.png'},
82
  "Meta-Llama-3-8B":
83
+ {'description':"""The Llama (3) model is a **Large Language Model (LLM)** that's able to have question and answer interactions.\n \
84
+ \nIt was created by the [**Meta's AI**](https://llama.meta.com/) team and has over **8 billion parameters.** \n""",
85
+ 'logo':'Llama_logo.png'},
86
  "Meta-Llama-3.1-8B":
87
+ {'description':"""The Llama (3.1) model is a **Large Language Model (LLM)** that's able to have question and answer interactions.\n \
88
+ \nIt was created by the [**Meta's AI**](https://llama.meta.com/) team and has over **8 billion parameters.** \n""",
89
+ 'logo':'Llama3_1_logo.png'},
90
  }
91
 
92
 
93
+
94
  #Random dog images for error message
95
  random_dog = ["0f476473-2d8b-415e-b944-483768418a95.jpg",
96
  "1bd75c81-f1d7-4e55-9310-a27595fa8762.jpg",
 
158
  #Pull in the model we want to use
159
  repo_id = model_links[selected_model]
160
 
161
+ # initialize the client
162
+ client = OpenAI(
163
+ base_url=model_links[selected_model]["inf_point"],#"https://api-inference.huggingface.co/v1",
164
+ api_key=os.environ.get('HUGGINGFACEHUB_API_TOKEN')#"hf_xxx" # Replace with your token
165
+ )
166
+
167
 
168
  st.subheader(f'AI - {selected_model}')
169
  # st.title(f'ChatBot Using {selected_model}')
 
199
 
200
  try:
201
  stream = client.chat.completions.create(
202
+ model=model_links[selected_model]["link"],
203
  messages=[
204
  {"role": m["role"], "content": m["content"]}
205
  for m in st.session_state.messages