如何使用函数自动化任务(S3 存储桶示例)
这段代码演示了如何与ChatGPT函数交互,执行与Amazon S3存储桶相关的任务。该笔记本涵盖了S3存储桶关键功能,如运行简单的列出命令、在所有存储桶中搜索特定文件、将文件上传到存储桶以及从存储桶下载文件。OpenAI Chat API能够理解用户的指令,生成自然语言响应,并根据用户的输入提取适当的函数调用。
要求: 要运行该笔记本,需要生成具有S3存储桶写入权限的AWS访问密钥,并将它们存储在本地环境文件中,与Openai密钥放在一起。“.env”文件格式如下:
AWS_ACCESS_KEY_ID=<your-key>
AWS_SECRET_ACCESS_KEY=<your-key>
OPENAI_API_KEY=<your-key>
! pip install openai
! pip install boto3
! pip install tenacity
! pip install python-dotenv
from openai import OpenAI
import json
import boto3
import os
import datetime
from urllib.request import urlretrieve
# 加载环境变量
from dotenv import load_dotenv
load_dotenv()
True
初始设置
OpenAI.api_key = os.environ.get("OPENAI_API_KEY")
GPT_MODEL = "gpt-3.5-turbo"
# 可选 - 如果你在加载环境文件时遇到问题,可以使用以下代码设置 AWS 值。
# os.environ['AWS_ACCESS_KEY_ID'] = ''
# os.environ['AWS_SECRET_ACCESS_KEY'] = ''
# 创建S3客户端
s3_client = boto3.client('s3')
# 创建OpenAI客户端
client = OpenAI()
实用工具
为了将用户的问题或命令与适当的函数连接起来,我们需要向ChatGPT提供必要的函数细节和预期参数。
# 功能字典用于传递GPT模型执行S3操作的详细信息
functions = [
{
"type": "function",
"function":{
"name": "list_buckets",
"description": "List all available S3 buckets",
"parameters": {
"type": "object",
"properties": {}
}
}
},
{
"type": "function",
"function":{
"name": "list_objects",
"description": "List the objects or files inside a given S3 bucket",
"parameters": {
"type": "object",
"properties": {
"bucket": {"type": "string", "description": "The name of the S3 bucket"},
"prefix": {"type": "string", "description": "The folder path in the S3 bucket"},
},
"required": ["bucket"],
},
}
},
{
"type": "function",
"function":{
"name": "download_file",
"description": "Download a specific file from an S3 bucket to a local distribution folder.",
"parameters": {
"type": "object",
"properties": {
"bucket": {"type": "string", "description": "The name of the S3 bucket"},
"key": {"type": "string", "description": "The path to the file inside the bucket"},
"directory": {"type": "string", "description": "The local destination directory to download the file, should be specificed by the user."},
},
"required": ["bucket", "key", "directory"],
}
}
},
{
"type": "function",
"function":{
"name": "upload_file",
"description": "Upload a file to an S3 bucket",
"parameters": {
"type": "object",
"properties": {
"source": {"type": "string", "description": "The local source path or remote URL"},
"bucket": {"type": "string", "description": "The name of the S3 bucket"},
"key": {"type": "string", "description": "The path to the file inside the bucket"},
"is_remote_url": {"type": "boolean", "description": "Is the provided source a URL (True) or local path (False)"},
},
"required": ["source", "bucket", "key", "is_remote_url"],
}
}
},
{
"type": "function",
"function":{
"name": "search_s3_objects",
"description": "Search for a specific file name inside an S3 bucket",
"parameters": {
"type": "object",
"properties": {
"search_name": {"type": "string", "description": "The name of the file you want to search for"},
"bucket": {"type": "string", "description": "The name of the S3 bucket"},
"prefix": {"type": "string", "description": "The folder path in the S3 bucket"},
"exact_match": {"type": "boolean", "description": "Set exact_match to True if the search should match the exact file name. Set exact_match to False to compare part of the file name string (the file contains)"}
},
"required": ["search_name"],
},
}
}
]
创建用于与S3服务交互的辅助函数,例如列出存储桶、列出对象、下载和上传文件,以及搜索特定文件。
def datetime_converter(obj):
if isinstance(obj, datetime.datetime):
return obj.isoformat()
raise TypeError(f"Object of type {obj.__class__.__name__} is not JSON serializable")
def list_buckets():
response = s3_client.list_buckets()
return json.dumps(response['Buckets'], default=datetime_converter)
def list_objects(bucket, prefix=''):
response = s3_client.list_objects_v2(Bucket=bucket, Prefix=prefix)
return json.dumps(response.get('Contents', []), default=datetime_converter)
def download_file(bucket, key, directory):
filename = os.path.basename(key)
# 解析目标路径至正确 的文件位置
destination = os.path.join(directory, filename)
s3_client.download_file(bucket, key, destination)
return json.dumps({"status": "success", "bucket": bucket, "key": key, "destination": destination})
def upload_file(source, bucket, key, is_remote_url=False):
if is_remote_url:
file_name = os.path.basename(source)
urlretrieve(source, file_name)
source = file_name
s3_client.upload_file(source, bucket, key)
return json.dumps({"status": "success", "source": source, "bucket": bucket, "key": key})
def search_s3_objects(search_name, bucket=None, prefix='', exact_match=True):
search_name = search_name.lower()
if bucket is None:
buckets_response = json.loads(list_buckets())
buckets = [bucket_info["Name"] for bucket_info in buckets_response]
else:
buckets = [bucket]
results = []
for bucket_name in buckets:
objects_response = json.loads(list_objects(bucket_name, prefix))
if exact_match:
bucket_results = [obj for obj in objects_response if search_name == obj['Key'].lower()]
else:
bucket_results = [obj for obj in objects_response if search_name in obj['Key'].lower()]
if bucket_results:
results.extend([{"Bucket": bucket_name, "Object": obj} for obj in bucket_results])
return json.dumps(results)
下面的字典将名称与函数连接起来,以便根据ChatGPT的响应来使用 它进行执行。
available_functions = {
"list_buckets": list_buckets,
"list_objects": list_objects,
"download_file": download_file,
"upload_file": upload_file,
"search_s3_objects": search_s3_objects
}
ChatGPT
ChatGPT是一个基于GPT-3的模型,用于生成自然对话。它可以用于多种应用,如智能助手、聊天机器人、对话系统等。ChatGPT可以生成连贯、有逻辑的对话,并且可以根据上下文进行回复。
def chat_completion_request(messages, functions=None, function_call='auto',
model_name=GPT_MODEL):
if functions is not None:
return client.chat.completions.create(
model=model_name,
messages=messages,
tools=functions,
tool_choice=function_call)
else:
return client.chat.completions.create(
model=model_name,
messages=messages)