Gradio 网页服务器#
源代码 vllm-project/vllm。
1import argparse
2import json
3
4import gradio as gr
5import requests
6
7
8def http_bot(prompt):
9 headers = {"User-Agent": "vLLM Client"}
10 pload = {
11 "prompt": prompt,
12 "stream": True,
13 "max_tokens": 128,
14 }
15 response = requests.post(args.model_url,
16 headers=headers,
17 json=pload,
18 stream=True)
19
20 for chunk in response.iter_lines(chunk_size=8192,
21 decode_unicode=False,
22 delimiter=b"\0"):
23 if chunk:
24 data = json.loads(chunk.decode("utf-8"))
25 output = data["text"][0]
26 yield output
27
28
29def build_demo():
30 with gr.Blocks() as demo:
31 gr.Markdown("# vLLM text completion demo\n")
32 inputbox = gr.Textbox(label="Input",
33 placeholder="Enter text and press ENTER")
34 outputbox = gr.Textbox(label="Output",
35 placeholder="Generated result from the model")
36 inputbox.submit(http_bot, [inputbox], [outputbox])
37 return demo
38
39
40if __name__ == "__main__":
41 parser = argparse.ArgumentParser()
42 parser.add_argument("--host", type=str, default=None)
43 parser.add_argument("--port", type=int, default=8001)
44 parser.add_argument("--model-url",
45 type=str,
46 default="http://localhost:8000/generate")
47 args = parser.parse_args()
48
49 demo = build_demo()
50 demo.queue().launch(server_name=args.host,
51 server_port=args.port,
52 share=True)