Azure 代码解释器

AzureCodeInterpreterToolSpec #

基类: BaseToolSpec

Azure 代码解释器工具规范。

利用Azure动态会话执行Python代码。

Source code in llama-index-integrations/tools/llama-index-tools-azure-code-interpreter/llama_index/tools/azure_code_interpreter/base.py

class AzureCodeInterpreterToolSpec(BaseToolSpec):
    """
    Azure Code Interpreter tool spec.

    Leverages Azure Dynamic Sessions to execute Python code.
    """

    spec_functions = ["code_interpreter", "list_files"]

    def __init__(
        self,
        pool_management_endpoint: Optional[str] = None,
        session_id: Optional[str] = None,
        local_save_path: Optional[str] = None,
        sanitize_input: bool = True,
    ) -> None:
        """Initialize with parameters."""
        self.pool_management_endpoint: str = pool_management_endpoint or os.getenv(
            "AZURE_POOL_MANAGEMENT_ENDPOINT"
        )
        self.access_token: Optional[AccessToken] = None

        def _access_token_provider_factory() -> Callable[[], Optional[str]]:
            def access_token_provider() -> Optional[str]:
                """Create a function that returns an access token."""
                if self.access_token is None or datetime.fromtimestamp(
                    self.access_token.expires_on, timezone.utc
                ) < (datetime.now(timezone.utc) + timedelta(minutes=5)):
                    credential = DefaultAzureCredential()
                    self.access_token = credential.get_token(
                        "https://dynamicsessions.io/.default"
                    )
                return self.access_token.token

            return access_token_provider

        self.access_token_provider: Callable[[], Optional[str]] = (
            _access_token_provider_factory()
        )
        """A function that returns the access token to use for the session pool."""

        self.session_id: str = session_id or str(uuid4())
        """The session ID to use for the session pool. Defaults to a random UUID."""

        self.sanitize_input: bool = sanitize_input
        """Whether to sanitize input before executing it."""

        if local_save_path:
            if not os.path.exists(local_save_path):
                raise Exception(f"Local save path {local_save_path} does not exist.")

        self.local_save_path: Optional[str] = local_save_path
        """The local path to save files generated by Python interpreter."""

        try:
            _package_version = importlib.metadata.version(
                "llamaindex-azure-code-interpreter"
            )
        except importlib.metadata.PackageNotFoundError:
            _package_version = "0.0.0"

        self.user_agent = (
            f"llamaindex-azure-code-interpreter/{_package_version} (Language=Python)"
        )

    def _build_url(self, path: str) -> str:
        pool_management_endpoint = self.pool_management_endpoint
        if not pool_management_endpoint:
            raise ValueError("pool_management_endpoint is not set")

        if not pool_management_endpoint.endswith("/"):
            pool_management_endpoint += "/"

        encoded_session_id = urllib.parse.quote(self.session_id)
        query = f"identifier={encoded_session_id}&api-version=2024-02-02-preview"
        query_separator = "&" if "?" in pool_management_endpoint else "?"

        return pool_management_endpoint + path + query_separator + query

    def code_interpreter(self, python_code: str) -> dict:
        """
        This tool is used to execute python commands when you need to perform calculations or computations in a Session.
        Input should be a valid python command. The tool returns the result, stdout, and stderr.

        Args:
            python_code (str): Python code to be executed generated by llm.

        """
        if self.sanitize_input:
            python_code = _sanitize_input(python_code)

        access_token = self.access_token_provider()
        api_url = self._build_url("code/execute")
        headers = {
            "Authorization": f"Bearer {access_token}",
            "Content-Type": "application/json",
            "User-Agent": self.user_agent,
        }
        body = {
            "properties": {
                "codeInputType": "inline",
                "executionType": "synchronous",
                "code": python_code,
            }
        }

        response = requests.post(api_url, headers=headers, json=body)
        response.raise_for_status()
        response_json = response.json()
        if "properties" in response_json:
            if (
                "result" in response_json["properties"]
                and response_json["properties"]["result"]
            ):
                if isinstance(response_json["properties"]["result"], dict):
                    if "base64_data" in response_json["properties"]["result"]:
                        base64_encoded_data = response_json["properties"]["result"][
                            "base64_data"
                        ]
                        if self.local_save_path:
                            file_path = f"{self.local_save_path}/{self.session_id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.{response_json['properties']['result']['format']}"
                            decoded_data = base64.b64decode(base64_encoded_data)
                            with open(file_path, "wb") as f:
                                f.write(decoded_data)
                            # Check if file is written to the file path successfully. if so, update the response_json
                            response_json["properties"]["result"][
                                "saved_to_local_path"
                            ] = response_json["properties"]["result"].pop("base64_data")
                            if os.path.exists(file_path):
                                response_json["properties"]["result"][
                                    "saved_to_local_path"
                                ] = True
                            else:
                                response_json["properties"]["result"][
                                    "saved_to_local_path"
                                ] = False
                        else:
                            response_json["properties"]["result"]["base64_data"] = ""
        return response_json

    def upload_file(
        self,
        data: Optional[Any] = None,
        local_file_path: Optional[str] = None,
    ) -> List[RemoteFileMetadata]:
        """
        Upload a file to the session under the path /mnt/data.

        Args:
            data: The data to upload.
            local_file_path: The path to the local file to upload.

        Returns:
            List[RemoteFileMetadata]: The list of metadatas for the uploaded files.

        """
        if data and local_file_path:
            raise ValueError("data and local_file_path cannot be provided together")

        if local_file_path:
            remote_file_path = f"/mnt/data/{os.path.basename(local_file_path)}"
            data = open(local_file_path, "rb")

        access_token = self.access_token_provider()
        if not remote_file_path.startswith("/mnt/data"):
            remote_file_path = f"/mnt/data/{remote_file_path}"
        api_url = self._build_url("files/upload")
        headers = {
            "Authorization": f"Bearer {access_token}",
        }

        files = [("file", (remote_file_path, data, "application/octet-stream"))]

        response = requests.request("POST", api_url, headers=headers, files=files)
        response.raise_for_status()

        response_json = response.json()
        remote_files_metadatas = []
        for entry in response_json["value"]:
            if "properties" in entry:
                remote_files_metadatas.append(
                    RemoteFileMetadata.from_dict(entry["properties"])
                )
        return remote_files_metadatas

    def download_file_to_local(
        self, remote_file_path: str, local_file_path: Optional[str] = None
    ) -> Optional[BufferedReader]:
        """
        Download a file from the session back to your local environment.

        Args:
            remote_file_path: The path to download the file from, relative to `/mnt/data`.
            local_file_path: The path to save the downloaded file to. If not provided, the file is returned as a BufferedReader.

        Returns:
            BufferedReader: The data of the downloaded file.

        """
        access_token = self.access_token_provider()
        # In case if the file path LLM provides is absolute, remove the /mnt/data/ prefix
        remote_file_path = remote_file_path.replace("/mnt/data/", "")
        api_url = self._build_url(f"files/content/{remote_file_path}")
        headers = {
            "Authorization": f"Bearer {access_token}",
        }

        response = requests.get(api_url, headers=headers)
        response.raise_for_status()

        if local_file_path:
            with open(local_file_path, "wb") as f:
                f.write(response.content)
            return None

        return BytesIO(response.content)

    def list_files(self) -> List[RemoteFileMetadata]:
        """
        List the files in the session.

        Returns:
            List[RemoteFileMetadata]: The metadata for the files in the session

        """
        access_token = self.access_token_provider()
        api_url = self._build_url("files")
        headers = {
            "Authorization": f"Bearer {access_token}",
        }

        response = requests.get(api_url, headers=headers)
        response.raise_for_status()

        response_json = response.json()
        return [
            RemoteFileMetadata.from_dict(entry["properties"])
            for entry in response_json["value"]
        ]

access_token_provider `instance-attribute` #

access_token_provider: Callable[[], Optional[str]] = _access_token_provider_factory()

一个返回会话池使用的访问令牌的函数。

session_id `instance-attribute` #

session_id: str = session_id or str(uuid4())

用于会话池的会话ID。默认为随机UUID。

sanitize_input `instance-attribute` #

sanitize_input: bool = sanitize_input

在执行前是否对输入进行清理。

本地保存路径 `instance-attribute` #

local_save_path: Optional[str] = local_save_path

Python解释器生成文件的本地保存路径。

代码解释器 #

code_interpreter(python_code: str) -> dict

该工具用于在会话中需要进行计算或运算时执行Python命令。输入应为有效的Python命令。工具将返回结果、标准输出和标准错误。

参数:

名称	类型	描述	默认值
`python_code`	`str`	由llm生成的待执行Python代码。	required

Source code in llama-index-integrations/tools/llama-index-tools-azure-code-interpreter/llama_index/tools/azure_code_interpreter/base.py

def code_interpreter(self, python_code: str) -> dict:
    """
    This tool is used to execute python commands when you need to perform calculations or computations in a Session.
    Input should be a valid python command. The tool returns the result, stdout, and stderr.

    Args:
        python_code (str): Python code to be executed generated by llm.

    """
    if self.sanitize_input:
        python_code = _sanitize_input(python_code)

    access_token = self.access_token_provider()
    api_url = self._build_url("code/execute")
    headers = {
        "Authorization": f"Bearer {access_token}",
        "Content-Type": "application/json",
        "User-Agent": self.user_agent,
    }
    body = {
        "properties": {
            "codeInputType": "inline",
            "executionType": "synchronous",
            "code": python_code,
        }
    }

    response = requests.post(api_url, headers=headers, json=body)
    response.raise_for_status()
    response_json = response.json()
    if "properties" in response_json:
        if (
            "result" in response_json["properties"]
            and response_json["properties"]["result"]
        ):
            if isinstance(response_json["properties"]["result"], dict):
                if "base64_data" in response_json["properties"]["result"]:
                    base64_encoded_data = response_json["properties"]["result"][
                        "base64_data"
                    ]
                    if self.local_save_path:
                        file_path = f"{self.local_save_path}/{self.session_id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.{response_json['properties']['result']['format']}"
                        decoded_data = base64.b64decode(base64_encoded_data)
                        with open(file_path, "wb") as f:
                            f.write(decoded_data)
                        # Check if file is written to the file path successfully. if so, update the response_json
                        response_json["properties"]["result"][
                            "saved_to_local_path"
                        ] = response_json["properties"]["result"].pop("base64_data")
                        if os.path.exists(file_path):
                            response_json["properties"]["result"][
                                "saved_to_local_path"
                            ] = True
                        else:
                            response_json["properties"]["result"][
                                "saved_to_local_path"
                            ] = False
                    else:
                        response_json["properties"]["result"]["base64_data"] = ""
    return response_json

upload_file #

upload_file(data: Optional[Any] = None, local_file_path: Optional[str] = None) -> List[RemoteFileMetadata]

将文件上传到路径 /mnt/data 下的会话中。

参数:

名称	类型	描述	默认值
`data`	`Optional[Any]`	要上传的数据。	`None`
`local_file_path`	`Optional[str]`	要上传的本地文件路径。	`None`

返回：

类型	描述
`List[RemoteFileMetadata]`	List[RemoteFileMetadata]: 上传文件的元数据列表。

Source code in llama-index-integrations/tools/llama-index-tools-azure-code-interpreter/llama_index/tools/azure_code_interpreter/base.py

def upload_file(
    self,
    data: Optional[Any] = None,
    local_file_path: Optional[str] = None,
) -> List[RemoteFileMetadata]:
    """
    Upload a file to the session under the path /mnt/data.

    Args:
        data: The data to upload.
        local_file_path: The path to the local file to upload.

    Returns:
        List[RemoteFileMetadata]: The list of metadatas for the uploaded files.

    """
    if data and local_file_path:
        raise ValueError("data and local_file_path cannot be provided together")

    if local_file_path:
        remote_file_path = f"/mnt/data/{os.path.basename(local_file_path)}"
        data = open(local_file_path, "rb")

    access_token = self.access_token_provider()
    if not remote_file_path.startswith("/mnt/data"):
        remote_file_path = f"/mnt/data/{remote_file_path}"
    api_url = self._build_url("files/upload")
    headers = {
        "Authorization": f"Bearer {access_token}",
    }

    files = [("file", (remote_file_path, data, "application/octet-stream"))]

    response = requests.request("POST", api_url, headers=headers, files=files)
    response.raise_for_status()

    response_json = response.json()
    remote_files_metadatas = []
    for entry in response_json["value"]:
        if "properties" in entry:
            remote_files_metadatas.append(
                RemoteFileMetadata.from_dict(entry["properties"])
            )
    return remote_files_metadatas

download_file_to_local #

download_file_to_local(remote_file_path: str, local_file_path: Optional[str] = None) -> Optional[BufferedReader]

从会话中下载文件到您的本地环境。

参数:

名称	类型	描述	默认值
`remote_file_path`	`str`	文件下载路径，相对于`/mnt/data`。	required
`local_file_path`	`Optional[str]`	下载文件的保存路径。如果未提供，文件将以BufferedReader形式返回。	`None`

返回：

名称	类型	描述
`BufferedReader`	`Optional[BufferedReader]`	下载文件的数据。

Source code in llama-index-integrations/tools/llama-index-tools-azure-code-interpreter/llama_index/tools/azure_code_interpreter/base.py

def download_file_to_local(
    self, remote_file_path: str, local_file_path: Optional[str] = None
) -> Optional[BufferedReader]:
    """
    Download a file from the session back to your local environment.

    Args:
        remote_file_path: The path to download the file from, relative to `/mnt/data`.
        local_file_path: The path to save the downloaded file to. If not provided, the file is returned as a BufferedReader.

    Returns:
        BufferedReader: The data of the downloaded file.

    """
    access_token = self.access_token_provider()
    # In case if the file path LLM provides is absolute, remove the /mnt/data/ prefix
    remote_file_path = remote_file_path.replace("/mnt/data/", "")
    api_url = self._build_url(f"files/content/{remote_file_path}")
    headers = {
        "Authorization": f"Bearer {access_token}",
    }

    response = requests.get(api_url, headers=headers)
    response.raise_for_status()

    if local_file_path:
        with open(local_file_path, "wb") as f:
            f.write(response.content)
        return None

    return BytesIO(response.content)

list_files #

list_files() -> List[RemoteFileMetadata]

列出会话中的文件。

返回：

类型	描述
`List[RemoteFileMetadata]`	List[RemoteFileMetadata]: 会话中文件的元数据

Source code in llama-index-integrations/tools/llama-index-tools-azure-code-interpreter/llama_index/tools/azure_code_interpreter/base.py

def list_files(self) -> List[RemoteFileMetadata]:
    """
    List the files in the session.

    Returns:
        List[RemoteFileMetadata]: The metadata for the files in the session

    """
    access_token = self.access_token_provider()
    api_url = self._build_url("files")
    headers = {
        "Authorization": f"Bearer {access_token}",
    }

    response = requests.get(api_url, headers=headers)
    response.raise_for_status()

    response_json = response.json()
    return [
        RemoteFileMetadata.from_dict(entry["properties"])
        for entry in response_json["value"]
    ]

Azure 代码解释器

AzureCodeInterpreterToolSpec #

access_token_provider instance-attribute #

session_id instance-attribute #

sanitize_input instance-attribute #

本地保存路径 instance-attribute #

代码解释器 #

upload_file #

download_file_to_local #

list_files #

access_token_provider `instance-attribute` #

session_id `instance-attribute` #

sanitize_input `instance-attribute` #

本地保存路径 `instance-attribute` #