Spaces:

jdelavande
/

chat-ui-energy

Running on CPU Upgrade

App Files Files Community

lgarbarini

nsarrazin HF Staff commited on Mar 12

Commit

df0f84c

unverified ·

1 Parent(s): 28b6d44

feat(openai): added support for non-streaming o1 (e.g. Azure) models (#1687)

Browse files

* feat(openai): added support for non-streaming o1 (e.g. Azure) models

* feat(docs): add non streaming example

* feat: moar docs

---------

Co-authored-by: Nathan Sarrazin <[email protected]>

Files changed (4) hide show

README.md +23 -0
docs/source/configuration/models/providers/openai.md +27 -0
src/lib/server/endpoints/openai/endpointOai.ts +34 -12
src/lib/server/endpoints/openai/openAIChatToTextGenerationStream.ts +22 -0

README.md CHANGED Viewed

@@ -482,6 +482,29 @@ MODELS=`[{
 }]`
 ```
 ##### Llama.cpp API server
 chat-ui also supports the llama.cpp API server directly without the need for an adapter. You can do this using the `llamacpp` endpoint type.

 }]`
 ```
+_Non-streaming endpoints_
+For endpoints that don´t support streaming like o1 on Azure, you can pass `streamingSupported: false` in your endpoint config:
+```
+MODELS=`[{
+  "id": "o1-preview",
+  "name": "o1-preview",
+  "displayName": "o1-preview",
+  "systemRoleSupported": false,
+  "endpoints": [
+    {
+      "type": "openai",
+      "baseURL": "https://my-deployment.openai.azure.com/openai/deployments/o1-preview",
+      "defaultHeaders": {
+        "api-key": "$SECRET"
+      },
+      "streamingSupported": false,
+    }
+  ]
+}]`
+```
 ##### Llama.cpp API server
 chat-ui also supports the llama.cpp API server directly without the need for an adapter. You can do this using the `llamacpp` endpoint type.

docs/source/configuration/models/providers/openai.md CHANGED Viewed

@@ -146,9 +146,36 @@ MODELS=`[{
 }]`
 ```
 ## Other
 Some other providers and their `baseURL` for reference.
 [Groq](https://groq.com/): https://api.groq.com/openai/v1
 [Fireworks](https://fireworks.ai/): https://api.fireworks.ai/inference/v1

 }]`
 ```
+_Non-streaming endpoints_
+For endpoints that don´t support streaming like o1 on Azure, you can pass `streamingSupported: false` in your endpoint config:
+```
+MODELS=`[{
+  "id": "o1-preview",
+  "name": "o1-preview",
+  "displayName": "o1-preview",
+  "systemRoleSupported": false,
+  "endpoints": [
+    {
+      "type": "openai",
+      "baseURL": "https://my-deployment.openai.azure.com/openai/deployments/o1-preview",
+      "defaultHeaders": {
+        "api-key": "$SECRET"
+      },
+      "streamingSupported": false,
+    }
+  ]
+}]`
+```
 ## Other
 Some other providers and their `baseURL` for reference.
 [Groq](https://groq.com/): https://api.groq.com/openai/v1
 [Fireworks](https://fireworks.ai/): https://api.fireworks.ai/inference/v1
+```
+```

src/lib/server/endpoints/openai/endpointOai.ts CHANGED Viewed

@@ -1,8 +1,12 @@
 import { z } from "zod";
 import { openAICompletionToTextGenerationStream } from "./openAICompletionToTextGenerationStream";
-import { openAIChatToTextGenerationStream } from "./openAIChatToTextGenerationStream";
 import type { CompletionCreateParamsStreaming } from "openai/resources/completions";
 import type {
 	ChatCompletionCreateParamsStreaming,
 	ChatCompletionTool,
 } from "openai/resources/chat/completions";
@@ -113,6 +117,7 @@ export const endpointOAIParametersSchema = z.object({
 		.default({}),
 	/* enable use of max_completion_tokens in place of max_tokens */
 	useCompletionTokens: z.boolean().default(false),
 });
 export async function endpointOai(
@@ -128,6 +133,7 @@ export async function endpointOai(
 		multimodal,
 		extraBody,
 		useCompletionTokens,
 	} = endpointOAIParametersSchema.parse(input);
 	let OpenAI;
@@ -249,10 +255,10 @@ export async function endpointOai(
 			const parameters = { ...model.parameters, ...generateSettings };
 			const toolCallChoices = createChatCompletionToolsArray(tools);
-			const body: ChatCompletionCreateParamsStreaming = {
 				model: model.id ?? model.name,
 				messages: messagesOpenAI,
-				stream: true,
 				...(useCompletionTokens
 					? { max_completion_tokens: parameters?.max_new_tokens }
 					: { max_tokens: parameters?.max_new_tokens }),
@@ -264,15 +270,31 @@ export async function endpointOai(
 				...(toolCallChoices.length > 0 ? { tools: toolCallChoices, tool_choice: "auto" } : {}),
 			};
-			const openChatAICompletion = await openai.chat.completions.create(body, {
-				body: { ...body, ...extraBody },
-				headers: {
-					"ChatUI-Conversation-ID": conversationId?.toString() ?? "",
-					"X-use-cache": "false",
-				},
-			});
-			return openAIChatToTextGenerationStream(openChatAICompletion);
 		};
 	} else {
 		throw new Error("Invalid completion type");

 import { z } from "zod";
 import { openAICompletionToTextGenerationStream } from "./openAICompletionToTextGenerationStream";
+import {
+	openAIChatToTextGenerationSingle,
+	openAIChatToTextGenerationStream,
+} from "./openAIChatToTextGenerationStream";
 import type { CompletionCreateParamsStreaming } from "openai/resources/completions";
 import type {
+	ChatCompletionCreateParamsNonStreaming,
 	ChatCompletionCreateParamsStreaming,
 	ChatCompletionTool,
 } from "openai/resources/chat/completions";
 		.default({}),
 	/* enable use of max_completion_tokens in place of max_tokens */
 	useCompletionTokens: z.boolean().default(false),
+	streamingSupported: z.boolean().default(true),
 });
 export async function endpointOai(
 		multimodal,
 		extraBody,
 		useCompletionTokens,
+		streamingSupported,
 	} = endpointOAIParametersSchema.parse(input);
 	let OpenAI;
 			const parameters = { ...model.parameters, ...generateSettings };
 			const toolCallChoices = createChatCompletionToolsArray(tools);
+			const body = {
 				model: model.id ?? model.name,
 				messages: messagesOpenAI,
+				stream: streamingSupported,
 				...(useCompletionTokens
 					? { max_completion_tokens: parameters?.max_new_tokens }
 					: { max_tokens: parameters?.max_new_tokens }),
 				...(toolCallChoices.length > 0 ? { tools: toolCallChoices, tool_choice: "auto" } : {}),
 			};
+			if (streamingSupported) {
+				const openChatAICompletion = await openai.chat.completions.create(
+					body as ChatCompletionCreateParamsStreaming,
+					{
+						body: { ...body, ...extraBody },
+						headers: {
+							"ChatUI-Conversation-ID": conversationId?.toString() ?? "",
+							"X-use-cache": "false",
+						},
+					}
+				);
+				return openAIChatToTextGenerationStream(openChatAICompletion);
+			} else {
+				const openChatAICompletion = await openai.chat.completions.create(
+					body as ChatCompletionCreateParamsNonStreaming,
+					{
+						body: { ...body, ...extraBody },
+						headers: {
+							"ChatUI-Conversation-ID": conversationId?.toString() ?? "",
+							"X-use-cache": "false",
+						},
+					}
+				);
+				return openAIChatToTextGenerationSingle(openChatAICompletion);
+			}
 		};
 	} else {
 		throw new Error("Invalid completion type");

src/lib/server/endpoints/openai/openAIChatToTextGenerationStream.ts CHANGED Viewed

@@ -94,3 +94,25 @@ export async function* openAIChatToTextGenerationStream(
 		}
 	}
 }

 		}
 	}
 }
+/**
+ * Transform a non-streaming OpenAI chat completion into a stream of TextGenerationStreamOutput
+ */
+export async function* openAIChatToTextGenerationSingle(
+	completion: OpenAI.Chat.Completions.ChatCompletion
+) {
+	const content = completion.choices[0]?.message?.content || "";
+	const tokenId = 0;
+	// Yield the content as a single token
+	yield {
+		token: {
+			id: tokenId,
+			text: content,
+			logprob: 0,
+			special: false,
+		},
+		generated_text: content,
+		details: null,
+	} as TextGenerationStreamOutput;
+}