Stanley Xu commited on
Commit
2ae3a0e
·
1 Parent(s): 3cc40a3

add a simple vllm api application

Browse files
Files changed (3) hide show
  1. README.md +15 -13
  2. app.py +27 -0
  3. requirements.txt +63 -0
README.md CHANGED
@@ -1,13 +1,15 @@
1
- ---
2
- title: Vllm Test
3
- emoji:
4
- colorFrom: yellow
5
- colorTo: red
6
- sdk: gradio
7
- sdk_version: 3.44.4
8
- app_file: app.py
9
- pinned: false
10
- license: mit
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
1
+ # vllm-test
2
+
3
+ conda create -n vllm_test python=3.11
4
+ conda activate vllm_test
5
+ pip install -r requirements.txt
6
+
7
+ ## environment settings
8
+
9
+ Please set the following environment variables first
10
+
11
+ ```
12
+ export SHOPPAL_VLLM_API_URL=
13
+ export SHOPPAL_VLLM_MODEL_NAME=
14
+ export SHOPPAL_VLLM_API_KEY=
15
+ ```
app.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import openai
2
+ import gradio as gr
3
+ import os
4
+
5
+ openai.api_key = os.environ["SHOPPAL_VLLM_API_KEY"]
6
+ openai.api_base = os.environ["SHOPPAL_VLLM_API_URL"]
7
+
8
+ model_name = os.environ["SHOPPAL_VLLM_MODEL_NAME"]
9
+
10
+ def predict(message, history):
11
+ history_openai_format = []
12
+ for human, assistant in history:
13
+ history_openai_format.append({"role": "user", "content": human })
14
+ history_openai_format.append({"role": "assistant", "content":assistant})
15
+ history_openai_format.append({"role": "user", "content": message})
16
+
17
+ response = openai.ChatCompletion.create(
18
+ model=model_name,
19
+ messages= history_openai_format,
20
+ stop=[" Human:", " Assistant:"],
21
+ temperature=0.5,
22
+ max_tokens=2048,
23
+ )
24
+ return response.choices[0].message.content
25
+
26
+
27
+ gr.ChatInterface(predict).queue().launch()
requirements.txt ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==23.2.1
2
+ aiohttp==3.8.5
3
+ aiosignal==1.3.1
4
+ altair==5.1.1
5
+ annotated-types==0.5.0
6
+ anyio==3.7.1
7
+ async-timeout==4.0.3
8
+ attrs==23.1.0
9
+ certifi==2023.7.22
10
+ charset-normalizer==3.2.0
11
+ click==8.1.7
12
+ contourpy==1.1.1
13
+ cycler==0.11.0
14
+ fastapi==0.103.1
15
+ ffmpy==0.3.1
16
+ filelock==3.12.4
17
+ fonttools==4.42.1
18
+ frozenlist==1.4.0
19
+ fsspec==2023.9.2
20
+ gradio==3.44.4
21
+ gradio_client==0.5.1
22
+ h11==0.14.0
23
+ httpcore==0.18.0
24
+ httpx==0.25.0
25
+ huggingface-hub==0.17.2
26
+ idna==3.4
27
+ importlib-resources==6.1.0
28
+ Jinja2==3.1.2
29
+ jsonschema==4.19.1
30
+ jsonschema-specifications==2023.7.1
31
+ kiwisolver==1.4.5
32
+ MarkupSafe==2.1.3
33
+ matplotlib==3.8.0
34
+ multidict==6.0.4
35
+ numpy==1.26.0
36
+ openai==0.28.0
37
+ orjson==3.9.7
38
+ packaging==23.1
39
+ pandas==2.1.1
40
+ Pillow==10.0.1
41
+ pydantic==2.3.0
42
+ pydantic_core==2.6.3
43
+ pydub==0.25.1
44
+ pyparsing==3.1.1
45
+ python-dateutil==2.8.2
46
+ python-multipart==0.0.6
47
+ pytz==2023.3.post1
48
+ PyYAML==6.0.1
49
+ referencing==0.30.2
50
+ requests==2.31.0
51
+ rpds-py==0.10.3
52
+ semantic-version==2.10.0
53
+ six==1.16.0
54
+ sniffio==1.3.0
55
+ starlette==0.27.0
56
+ toolz==0.12.0
57
+ tqdm==4.66.1
58
+ typing_extensions==4.8.0
59
+ tzdata==2023.3
60
+ urllib3==2.0.5
61
+ uvicorn==0.23.2
62
+ websockets==11.0.3
63
+ yarl==1.9.2