离线聊天工具#
源代码 vllm-project/vllm。
1# ruff: noqa
2import json
3import random
4import string
5
6from vllm import LLM
7from vllm.sampling_params import SamplingParams
8
9# This script is an offline demo for function calling
10#
11# If you want to run a server/client setup, please follow this code:
12#
13# - Server:
14#
15# ```bash
16# vllm serve mistralai/Mistral-7B-Instruct-v0.3 --tokenizer-mode mistral --load-format mistral --config-format mistral
17# ```
18#
19# - Client:
20#
21# ```bash
22# curl --location 'http://<your-node-url>:8000/v1/chat/completions' \
23# --header 'Content-Type: application/json' \
24# --header 'Authorization: Bearer token' \
25# --data '{
26# "model": "mistralai/Mistral-7B-Instruct-v0.3"
27# "messages": [
28# {
29# "role": "user",
30# "content": [
31# {"type" : "text", "text": "Describe this image in detail please."},
32# {"type": "image_url", "image_url": {"url": "https://s3.amazonaws.com/cms.ipressroom.com/338/files/201808/5b894ee1a138352221103195_A680%7Ejogging-edit/A680%7Ejogging-edit_hero.jpg"}},
33# {"type" : "text", "text": "and this one as well. Answer in French."},
34# {"type": "image_url", "image_url": {"url": "https://www.wolframcloud.com/obj/resourcesystem/images/a0e/a0ee3983-46c6-4c92-b85d-059044639928/6af8cfb971db031b.png"}}
35# ]
36# }
37# ]
38# }'
39# ```
40#
41# Usage:
42# python demo.py simple
43# python demo.py advanced
44
45model_name = "mistralai/Mistral-7B-Instruct-v0.3"
46# or switch to "mistralai/Mistral-Nemo-Instruct-2407"
47# or "mistralai/Mistral-Large-Instruct-2407"
48# or any other mistral model with function calling ability
49
50sampling_params = SamplingParams(max_tokens=8192, temperature=0.0)
51llm = LLM(model=model_name,
52 tokenizer_mode="mistral",
53 config_format="mistral",
54 load_format="mistral")
55
56
57def generate_random_id(length=9):
58 characters = string.ascii_letters + string.digits
59 random_id = ''.join(random.choice(characters) for _ in range(length))
60 return random_id
61
62
63# simulate an API that can be called
64def get_current_weather(city: str, state: str, unit: 'str'):
65 return (f"The weather in {city}, {state} is 85 degrees {unit}. It is "
66 "partly cloudly, with highs in the 90's.")
67
68
69tool_funtions = {"get_current_weather": get_current_weather}
70
71tools = [{
72 "type": "function",
73 "function": {
74 "name": "get_current_weather",
75 "description": "Get the current weather in a given location",
76 "parameters": {
77 "type": "object",
78 "properties": {
79 "city": {
80 "type":
81 "string",
82 "description":
83 "The city to find the weather for, e.g. 'San Francisco'"
84 },
85 "state": {
86 "type":
87 "string",
88 "description":
89 "the two-letter abbreviation for the state that the city is"
90 " in, e.g. 'CA' which would mean 'California'"
91 },
92 "unit": {
93 "type": "string",
94 "description": "The unit to fetch the temperature in",
95 "enum": ["celsius", "fahrenheit"]
96 }
97 },
98 "required": ["city", "state", "unit"]
99 }
100 }
101}]
102
103messages = [{
104 "role":
105 "user",
106 "content":
107 "Can you tell me what the temperate will be in Dallas, in fahrenheit?"
108}]
109
110outputs = llm.chat(messages, sampling_params=sampling_params, tools=tools)
111output = outputs[0].outputs[0].text.strip()
112
113# append the assistant message
114messages.append({
115 "role": "assistant",
116 "content": output,
117})
118
119# let's now actually parse and execute the model's output simulating an API call by using the
120# above defined function
121tool_calls = json.loads(output)
122tool_answers = [
123 tool_funtions[call['name']](**call['arguments']) for call in tool_calls
124]
125
126# append the answer as a tool message and let the LLM give you an answer
127messages.append({
128 "role": "tool",
129 "content": "\n\n".join(tool_answers),
130 "tool_call_id": generate_random_id(),
131})
132
133outputs = llm.chat(messages, sampling_params, tools=tools)
134
135print(outputs[0].outputs[0].text.strip())
136# yields
137# 'The weather in Dallas, TX is 85 degrees fahrenheit. '
138# 'It is partly cloudly, with highs in the 90's.'