跳到主要内容

使用多步提示编写单元测试

nbviewer

复杂的任务,比如编写单元测试,可以从多步提示中受益。与单个提示相比,多步提示会生成文本并将该输出文本反馈到后续提示中。这可以帮助在您希望GPT在回答之前推理事物,或在执行计划之前进行头脑风暴的情况下使用。

在这个笔记本中,我们使用一个3步提示来使用以下步骤用Python编写单元测试:

  1. 解释:给定一个Python函数,我们要求GPT解释该函数正在做什么以及为什么。
  2. 计划:我们要求GPT为该函数规划一组单元测试。
    • 如果计划太简短,我们要求GPT用更多的单元测试想法详细说明。
  3. 执行:最后,我们指示GPT编写覆盖计划案例的单元测试。

代码示例演示了关于链式多步提示的一些修饰:

  • 条件分支(例如,仅当第一个计划太短时才要求详细说明)
  • 为不同步骤选择不同的模型
  • 检查,如果输出不令人满意(例如,如果输出代码无法被Python的ast模块解析),则重新运行函数
  • 流式输出,这样您可以在完全生成输出之前开始阅读输出(对于长的多步输出很方便)
# 运行本笔记本代码所需的导入包
import ast # 用于检测生成的Python代码是否有效
import os
from openai import OpenAI

client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY", "<your OpenAI API key if not set as env var>"))

color_prefix_by_role = {
"system": "\033[0m", # 灰色
"user": "\033[0m", # 灰色
"assistant": "\033[92m", # 绿色
}


def print_messages(messages, color_prefix_by_role=color_prefix_by_role) -> None:
"""打印发送至或来自GPT的消息。"""
for message in messages:
role = message["role"]
color_prefix = color_prefix_by_role[role]
content = message["content"]
print(f"{color_prefix}\n[{role}]\n{content}")


def print_message_delta(delta, color_prefix_by_role=color_prefix_by_role) -> None:
"""从GPT流式返回的消息块中打印出一部分。"""
if "role" in delta:
role = delta["role"]
color_prefix = color_prefix_by_role[role]
print(f"{color_prefix}\n[{role}]\n", end="")
elif "content" in delta:
content = delta["content"]
print(content, end="")
else:
pass


# 使用多步骤提示编写单元测试的函数示例
def unit_tests_from_function(
function_to_test: str, # 用于测试的Python函数,以字符串形式表示
unit_test_package: str = "pytest", # 单元测试包;使用导入语句中出现的名称
approx_min_cases_to_cover: int = 7, # 覆盖所需的最少测试用例类别(近似值)
print_text: bool = False, # 可选择打印文本;有助于理解函数和调试
explain_model: str = "gpt-3.5-turbo", # 用于在步骤1中生成文本计划所采用的模型
plan_model: str = "gpt-3.5-turbo", # 用于在步骤2和2b中生成文本计划的模型
execute_model: str = "gpt-3.5-turbo", # 用于在步骤3中生成代码的模型
temperature: float = 0.4, # 温度值设为0有时会导致程序陷入重复循环,因此我们采用0.4。
reruns_if_fail: int = 1, # 如果输出代码无法解析,这将最多重新运行该函数N次。
) -> str:
"""使用3步GPT提示,为给定的Python函数返回一个单元测试。"""

# 第一步:生成对函数功能的解释

# - **Explain the Function of GPT**
explain_system_message = {
"role": "system",
"content": "You are a world-class Python developer with an eagle eye for unintended bugs and edge cases. You carefully explain code with great detail and accuracy. You organize your explanations in markdown-formatted, bulleted lists.",
}
explain_user_message = {
"role": "user",
"content": f"""Please explain the following Python function. Review what each element of the function is doing precisely and what the author's intentions may have been. Organize your explanation as a markdown-formatted, bulleted list.

```python
{function_to_test}
```""",
}
explain_messages = [explain_system_message, explain_user_message]
if print_text:
print_messages(explain_messages)

explanation_response = client.chat.completions.create(model=explain_model,
messages=explain_messages,
temperature=temperature,
stream=True)
explanation = ""
for chunk in explanation_response:
delta = chunk.choices[0].delta
if print_text:
print_message_delta(delta)
if "content" in delta:
explanation += delta.content
explain_assistant_message = {"role": "assistant", "content": explanation}

# Step 2: Generate a plan to write a unit test

# Asks GPT to plan out cases the units tests should cover, formatted as a bullet list
plan_user_message = {
"role": "user",
"content": f"""A good unit test suite should aim to:
- Test the function's behavior for a wide range of possible inputs
- Test edge cases that the author may not have foreseen
- Take advantage of the features of `{unit_test_package}` to make the tests easy to write and maintain
- Be easy to read and understand, with clean code and descriptive names
- Be deterministic, so that the tests always pass or fail in the same way

To help unit test the function above, list diverse scenarios that the function should be able to handle (and under each scenario, include a few examples as sub-bullets).""",
}
plan_messages = [
explain_system_message,
explain_user_message,
explain_assistant_message,
plan_user_message,
]
if print_text:
print_messages([plan_user_message])
plan_response = client.chat.completions.create(model=plan_model,
messages=plan_messages,
temperature=temperature,
stream=True)
plan = ""
for chunk in plan_response:
delta = chunk.choices[0].delta
if print_text:
print_message_delta(delta)
if "content" in delta:
explanation += delta.content
plan_assistant_message = {"role": "assistant", "content": plan}

# 步骤2b:如果计划较短,请GPT进一步详细阐述。
# 这会统计顶级项目符号(例如,类别),但不会统计子项目符号(例如,测试用例)。
num_bullets = max(plan.count("\n-"), plan.count("\n*"))
elaboration_needed = num_bullets < approx_min_cases_to_cover
if elaboration_needed:
elaboration_user_message = {
"role": "user",
"content": f"""In addition to those scenarios above, list a few rare or unexpected edge cases (and as before, under each edge case, include a few examples as sub-bullets).""",
}
elaboration_messages = [
explain_system_message,
explain_user_message,
explain_assistant_message,
plan_user_message,
plan_assistant_message,
elaboration_user_message,
]
if print_text:
print_messages([elaboration_user_message])
elaboration_response = client.chat.completions.create(model=plan_model,
messages=elaboration_messages,
temperature=temperature,
stream=True)
elaboration = ""
for chunk in elaboration_response:
delta = chunk.choices[0].delta
if print_text:
print_message_delta(delta)
if "content" in delta:
explanation += delta.content
elaboration_assistant_message = {"role": "assistant", "content": elaboration}

# 第三步:生成单元测试

# 请求GPT完成单元测试请完成以下单元测试,确保函数
package_comment = ""
if unit_test_package == "pytest":
package_comment = "# below, each test case is represented by a tuple passed to the @pytest.mark.parametrize decorator"
execute_system_message = {
"role": "system",
"content": "You are a world-class Python developer with an eagle eye for unintended bugs and edge cases. You write careful, accurate unit tests. When asked to reply only with code, you write all of your code in a single block.",
}
execute_user_message = {
"role": "user",
"content": f"""Using Python and the `{unit_test_package}` package, write a suite of unit tests for the function, following the cases above. Include helpful comments to explain each line. Reply only with code, formatted as follows:

```python
# imports
import {unit_test_package} # used for our unit tests
{{insert other imports as needed}}

# function to test
{function_to_test}

# unit tests
{package_comment}
{{insert unit test code here}}
```""",
}
execute_messages = [
execute_system_message,
explain_user_message,
explain_assistant_message,
plan_user_message,
plan_assistant_message,
]
if elaboration_needed:
execute_messages += [elaboration_user_message, elaboration_assistant_message]
execute_messages += [execute_user_message]
if print_text:
print_messages([execute_system_message, execute_user_message])

execute_response = client.chat.completions.create(model=execute_model,
messages=execute_messages,
temperature=temperature,
stream=True)
execution = ""
for chunk in execute_response:
delta = chunk.choices[0].delta
if print_text:
print_message_delta(delta)
if delta.content:
execution += delta.content

# 检查输出中的错误
code = execution.split("```python")[1].split("```")[0].strip()
try:
ast.parse(code)
except SyntaxError as e:
print(f"Syntax error in generated code: {e}")
if reruns_if_fail > 0:
print("Rerunning...")
return unit_tests_from_function(
function_to_test=function_to_test,
unit_test_package=unit_test_package,
approx_min_cases_to_cover=approx_min_cases_to_cover,
print_text=print_text,
explain_model=explain_model,
plan_model=plan_model,
execute_model=execute_model,
temperature=temperature,
reruns_if_fail=reruns_if_fail
- 1, # 再次调用时减少重运行计数器
)

# 返回单元测试作为字符串
return code

example_function = """def pig_latin(text):
def translate(word):
vowels = 'aeiou'
if word[0] in vowels:
return word + 'way'
else:
consonants = ''
for letter in word:
if letter not in vowels:
consonants += letter
else:
break
return word[len(consonants):] + consonants + 'ay'

words = text.lower().split()
translated_words = [translate(word) for word in words]
return ' '.join(translated_words)
"""

unit_tests = unit_tests_from_function(
example_function,
approx_min_cases_to_cover=10,
print_text=True
)


[system]
You are a world-class Python developer with an eagle eye for unintended bugs and edge cases. You carefully explain code with great detail and accuracy. You organize your explanations in markdown-formatted, bulleted lists.

[user]
Please explain the following Python function. Review what each element of the function is doing precisely and what the author's intentions may have been. Organize your explanation as a markdown-formatted, bulleted list.

```python
def pig_latin(text):
def translate(word):
vowels = 'aeiou'
if word[0] in vowels:
return word + 'way'
else:
consonants = ''
for letter in word:
if letter not in vowels:
consonants += letter
else:
break
return word[len(consonants):] + consonants + 'ay'

words = text.lower().split()
translated_words = [translate(word) for word in words]
return ' '.join(translated_words)

```

[user]
A good unit test suite should aim to:
- Test the function's behavior for a wide range of possible inputs
- Test edge cases that the author may not have foreseen
- Take advantage of the features of `pytest` to make the tests easy to write and maintain
- Be easy to read and understand, with clean code and descriptive names
- Be deterministic, so that the tests always pass or fail in the same way

To help unit test the function above, list diverse scenarios that the function should be able to handle (and under each scenario, include a few examples as sub-bullets).

[user]
In addition to those scenarios above, list a few rare or unexpected edge cases (and as before, under each edge case, include a few examples as sub-bullets).

[system]
You are a world-class Python developer with an eagle eye for unintended bugs and edge cases. You write careful, accurate unit tests. When asked to reply only with code, you write all of your code in a single block.

[user]
Using Python and the `pytest` package, write a suite of unit tests for the function, following the cases above. Include helpful comments to explain each line. Reply only with code, formatted as follows:

```python
# imports
import pytest # used for our unit tests
{insert other imports as needed}

# function to test
def pig_latin(text):
def translate(word):
vowels = 'aeiou'
if word[0] in vowels:
return word + 'way'
else:
consonants = ''
for letter in word:
if letter not in vowels:
consonants += letter
else:
break
return word[len(consonants):] + consonants + 'ay'

words = text.lower().split()
translated_words = [translate(word) for word in words]
return ' '.join(translated_words)


# unit tests
# below, each test case is represented by a tuple passed to the @pytest.mark.parametrize decorator
{insert unit test code here}
```
execute messages: [{'role': 'system', 'content': 'You are a world-class Python developer with an eagle eye for unintended bugs and edge cases. You write careful, accurate unit tests. When asked to reply only with code, you write all of your code in a single block.'}, {'role': 'user', 'content': "Please explain the following Python function. Review what each element of the function is doing precisely and what the author's intentions may have been. Organize your explanation as a markdown-formatted, bulleted list.\n\n```python\ndef pig_latin(text):\n def translate(word):\n vowels = 'aeiou'\n if word[0] in vowels:\n return word + 'way'\n else:\n consonants = ''\n for letter in word:\n if letter not in vowels:\n consonants += letter\n else:\n break\n return word[len(consonants):] + consonants + 'ay'\n\n words = text.lower().split()\n translated_words = [translate(word) for word in words]\n return ' '.join(translated_words)\n\n```"}, {'role': 'assistant', 'content': ''}, {'role': 'user', 'content': "A good unit test suite should aim to:\n- Test the function's behavior for a wide range of possible inputs\n- Test edge cases that the author may not have foreseen\n- Take advantage of the features of `pytest` to make the tests easy to write and maintain\n- Be easy to read and understand, with clean code and descriptive names\n- Be deterministic, so that the tests always pass or fail in the same way\n\nTo help unit test the function above, list diverse scenarios that the function should be able to handle (and under each scenario, include a few examples as sub-bullets)."}, {'role': 'assistant', 'content': ''}, {'role': 'user', 'content': 'In addition to those scenarios above, list a few rare or unexpected edge cases (and as before, under each edge case, include a few examples as sub-bullets).'}, {'role': 'assistant', 'content': ''}, {'role': 'user', 'content': "Using Python and the `pytest` package, write a suite of unit tests for the function, following the cases above. Include helpful comments to explain each line. Reply only with code, formatted as follows:\n\n```python\n# imports\nimport pytest # used for our unit tests\n{insert other imports as needed}\n\n# function to test\ndef pig_latin(text):\n def translate(word):\n vowels = 'aeiou'\n if word[0] in vowels:\n return word + 'way'\n else:\n consonants = ''\n for letter in word:\n if letter not in vowels:\n consonants += letter\n else:\n break\n return word[len(consonants):] + consonants + 'ay'\n\n words = text.lower().split()\n translated_words = [translate(word) for word in words]\n return ' '.join(translated_words)\n\n\n# unit tests\n# below, each test case is represented by a tuple passed to the @pytest.mark.parametrize decorator\n{insert unit test code here}\n```"}]
print(unit_tests)

# imports
import pytest

# function to test
def pig_latin(text):
def translate(word):
vowels = 'aeiou'
if word[0] in vowels:
return word + 'way'
else:
consonants = ''
for letter in word:
if letter not in vowels:
consonants += letter
else:
break
return word[len(consonants):] + consonants + 'ay'

words = text.lower().split()
translated_words = [translate(word) for word in words]
return ' '.join(translated_words)


# unit tests
@pytest.mark.parametrize('text, expected', [
('hello world', 'ellohay orldway'), # basic test case
('Python is awesome', 'ythonPay isway awesomeway'), # test case with multiple words
('apple', 'appleway'), # test case with a word starting with a vowel
('', ''), # test case with an empty string
('123', '123'), # test case with non-alphabetic characters
('Hello World!', 'elloHay orldWay!'), # test case with punctuation
('The quick brown fox', 'ethay ickquay ownbray oxfay'), # test case with mixed case words
('a e i o u', 'away eway iway oway uway'), # test case with all vowels
('bcd fgh jkl mnp', 'bcday fghay jklway mnpay'), # test case with all consonants
])
def test_pig_latin(text, expected):
assert pig_latin(text) == expected

请确保在使用代码之前检查,因为GPT会出现很多错误(特别是在像这样基于字符的任务中)。为获得最佳结果,请使用最强大的模型(截至2023年5月的GPT-4)。