Index

ChatResponseMode #

Bases: str, Enum

在Agent._chat中切换等待/流式传输标志。

Source code in llama_index/core/chat_engine/types.py

class ChatResponseMode(str, Enum):
    """在`Agent._chat`中切换等待/流式传输标志。"""

    WAIT = "wait"
    STREAM = "stream"

AgentChatResponse `dataclass` #

代理人聊天回复。

Source code in llama_index/core/chat_engine/types.py

@dataclass
class AgentChatResponse:
    """代理人聊天回复。"""

    response: str = ""
    sources: List[ToolOutput] = field(default_factory=list)
    source_nodes: List[NodeWithScore] = field(default_factory=list)
    is_dummy_stream: bool = False

    def __post_init__(self) -> None:
        if self.sources and not self.source_nodes:
            for tool_output in self.sources:
                if isinstance(tool_output.raw_output, (Response, StreamingResponse)):
                    self.source_nodes.extend(tool_output.raw_output.source_nodes)

    def __str__(self) -> str:
        return self.response

    @property
    def response_gen(self) -> Generator[str, None, None]:
        """用于虚拟流式传输，即使用工具输出。"""
        if not self.is_dummy_stream:
            raise ValueError(
                "response_gen is only available for streaming responses. "
                "Set is_dummy_stream=True if you still want a generator."
            )

        for token in self.response.split(" "):
            yield token + " "
            time.sleep(0.1)

    async def async_response_gen(self) -> AsyncGenerator[str, None]:
        """用于虚拟流式传输，即使用工具输出。"""
        if not self.is_dummy_stream:
            raise ValueError(
                "response_gen is only available for streaming responses. "
                "Set is_dummy_stream=True if you still want a generator."
            )

        for token in self.response.split(" "):
            yield token + " "
            await asyncio.sleep(0.1)

response_gen `property` #

response_gen: Generator[str, None, None]

用于虚拟流式传输，即使用工具输出。

async_response_gen `async` #

async_response_gen() -> AsyncGenerator[str, None]

用于虚拟流式传输，即使用工具输出。

Source code in llama_index/core/chat_engine/types.py

async def async_response_gen(self) -> AsyncGenerator[str, None]:
    """用于虚拟流式传输，即使用工具输出。"""
    if not self.is_dummy_stream:
        raise ValueError(
            "response_gen is only available for streaming responses. "
            "Set is_dummy_stream=True if you still want a generator."
        )

    for token in self.response.split(" "):
        yield token + " "
        await asyncio.sleep(0.1)

StreamingAgentChatResponse `dataclass` #

将聊天响应流式传输给用户，并写入聊天历史记录。

Source code in llama_index/core/chat_engine/types.py

@dataclass
class StreamingAgentChatResponse:
    """将聊天响应流式传输给用户，并写入聊天历史记录。"""

    response: str = ""
    sources: List[ToolOutput] = field(default_factory=list)
    chat_stream: Optional[ChatResponseGen] = None
    achat_stream: Optional[ChatResponseAsyncGen] = None
    source_nodes: List[NodeWithScore] = field(default_factory=list)
    unformatted_response: str = ""
    queue: Queue = field(default_factory=Queue)
    aqueue: Optional[asyncio.Queue] = None
    # flag when chat message is a function call
    is_function: Optional[bool] = None
    # flag when processing done
    is_done = False
    # signal when a new item is added to the queue
    new_item_event: Optional[asyncio.Event] = None
    # NOTE: async code uses two events rather than one since it yields
    # control when waiting for queue item
    # signal when the OpenAI functions stop executing
    is_function_false_event: Optional[asyncio.Event] = None
    # signal when an OpenAI function is being executed
    is_function_not_none_thread_event: Event = field(default_factory=Event)
    # Track if an exception occurred
    exception: Optional[Exception] = None

    def __post_init__(self) -> None:
        if self.sources and not self.source_nodes:
            for tool_output in self.sources:
                if isinstance(tool_output.raw_output, (Response, StreamingResponse)):
                    self.source_nodes.extend(tool_output.raw_output.source_nodes)

    def __str__(self) -> str:
        if self.is_done and not self.queue.empty() and not self.is_function:
            while self.queue.queue:
                delta = self.queue.queue.popleft()
                self.unformatted_response += delta
            self.response = self.unformatted_response.strip()
        return self.response

    def _ensure_async_setup(self) -> None:
        if self.aqueue is None:
            self.aqueue = asyncio.Queue()
        if self.new_item_event is None:
            self.new_item_event = asyncio.Event()
        if self.is_function_false_event is None:
            self.is_function_false_event = asyncio.Event()

    def put_in_queue(self, delta: Optional[str]) -> None:
        self.queue.put_nowait(delta)
        self.is_function_not_none_thread_event.set()

    def aput_in_queue(self, delta: Optional[str]) -> None:
        self.aqueue.put_nowait(delta)
        self.new_item_event.set()

    @dispatcher.span
    def write_response_to_history(
        self,
        memory: BaseMemory,
        on_stream_end_fn: Optional[callable] = None,
    ) -> None:
        if self.chat_stream is None:
            raise ValueError(
                "chat_stream is None. Cannot write to history without chat_stream."
            )
        dispatch_event = dispatcher.get_dispatch_event()

        # try/except to prevent hanging on error
        dispatch_event(StreamChatStartEvent())
        try:
            final_text = ""
            for chat in self.chat_stream:
                self.is_function = is_function(chat.message)
                if chat.delta:
                    dispatch_event(
                        StreamChatDeltaReceivedEvent(
                            delta=chat.delta,
                        )
                    )
                    self.put_in_queue(chat.delta)
                final_text += chat.delta or ""
            if self.is_function is not None:  # if loop has gone through iteration
                # NOTE: this is to handle the special case where we consume some of the
                # chat stream, but not all of it (e.g. in react agent)
                chat.message.content = final_text.strip()  # final message
                memory.put(chat.message)
        except Exception as e:
            dispatch_event(StreamChatErrorEvent(exception=e))
            self.exception = e

            # This act as is_done events for any consumers waiting
            self.is_function_not_none_thread_event.set()

            # force the queue reader to see the exception
            self.put_in_queue("")
            raise
        dispatch_event(StreamChatEndEvent())

        self.is_done = True

        # This act as is_done events for any consumers waiting
        self.is_function_not_none_thread_event.set()
        if on_stream_end_fn is not None and not self.is_function:
            on_stream_end_fn()

    @dispatcher.span
    async def awrite_response_to_history(
        self,
        memory: BaseMemory,
        on_stream_end_fn: Optional[callable] = None,
    ) -> None:
        self._ensure_async_setup()
        dispatch_event = dispatcher.get_dispatch_event()

        if self.achat_stream is None:
            raise ValueError(
                "achat_stream is None. Cannot asynchronously write to "
                "history without achat_stream."
            )

        # try/except to prevent hanging on error
        dispatch_event(StreamChatStartEvent())
        try:
            final_text = ""
            async for chat in self.achat_stream:
                self.is_function = is_function(chat.message)
                if chat.delta:
                    dispatch_event(
                        StreamChatDeltaReceivedEvent(
                            delta=chat.delta,
                        )
                    )
                    self.aput_in_queue(chat.delta)
                final_text += chat.delta or ""
                self.new_item_event.set()
                if self.is_function is False:
                    self.is_function_false_event.set()
            if self.is_function is not None:  # if loop has gone through iteration
                # NOTE: this is to handle the special case where we consume some of the
                # chat stream, but not all of it (e.g. in react agent)
                chat.message.content = final_text.strip()  # final message
                memory.put(chat.message)
        except Exception as e:
            dispatch_event(StreamChatErrorEvent(exception=e))
            self.exception = e

            # These act as is_done events for any consumers waiting
            self.is_function_false_event.set()
            self.new_item_event.set()

            # force the queue reader to see the exception
            self.aput_in_queue("")
            raise
        dispatch_event(StreamChatEndEvent())
        self.is_done = True

        # These act as is_done events for any consumers waiting
        self.is_function_false_event.set()
        self.new_item_event.set()
        if on_stream_end_fn is not None and not self.is_function:
            on_stream_end_fn()

    @property
    def response_gen(self) -> Generator[str, None, None]:
        while not self.is_done or not self.queue.empty():
            if self.exception is not None:
                raise self.exception

            try:
                delta = self.queue.get(block=False)
                self.unformatted_response += delta
                yield delta
            except Empty:
                # Queue is empty, but we're not done yet. Sleep for 0 secs to release the GIL and allow other threads to run.
                time.sleep(0)
        self.response = self.unformatted_response.strip()

    async def async_response_gen(self) -> AsyncGenerator[str, None]:
        self._ensure_async_setup()
        while True:
            if not self.aqueue.empty() or not self.is_done:
                if self.exception is not None:
                    raise self.exception

                try:
                    delta = await asyncio.wait_for(self.aqueue.get(), timeout=0.1)
                except asyncio.TimeoutError:
                    if self.is_done:
                        break
                    continue
                if delta is not None:
                    self.unformatted_response += delta
                    yield delta
            else:
                break
        self.response = self.unformatted_response.strip()

    def print_response_stream(self) -> None:
        for token in self.response_gen:
            print(token, end="", flush=True)

    async def aprint_response_stream(self) -> None:
        async for token in self.async_response_gen():
            print(token, end="", flush=True)

BaseChatEngine #

Bases: ABC

基础聊天引擎。

Source code in llama_index/core/chat_engine/types.py

class BaseChatEngine(ABC):
    """基础聊天引擎。"""

    @abstractmethod
    def reset(self) -> None:
        """重置对话状态。"""

    @abstractmethod
    def chat(
        self, message: str, chat_history: Optional[List[ChatMessage]] = None
    ) -> AGENT_CHAT_RESPONSE_TYPE:
        """主要聊天界面。"""

    @abstractmethod
    def stream_chat(
        self, message: str, chat_history: Optional[List[ChatMessage]] = None
    ) -> StreamingAgentChatResponse:
        """流式聊天界面。"""

    @abstractmethod
    async def achat(
        self, message: str, chat_history: Optional[List[ChatMessage]] = None
    ) -> AGENT_CHAT_RESPONSE_TYPE:
        """异步版本的主要聊天界面。"""

    @abstractmethod
    async def astream_chat(
        self, message: str, chat_history: Optional[List[ChatMessage]] = None
    ) -> StreamingAgentChatResponse:
        """异步版本的主要聊天界面。"""

    def chat_repl(self) -> None:
        """进入交互式聊天REPL。"""
        print("===== Entering Chat REPL =====")
        print('Type "exit" to exit.\n')
        self.reset()
        message = input("Human: ")
        while message != "exit":
            response = self.chat(message)
            print(f"Assistant: {response}\n")
            message = input("Human: ")

    def streaming_chat_repl(self) -> None:
        """进入交互式聊天REPL，并获取实时响应。"""
        print("===== Entering Chat REPL =====")
        print('Type "exit" to exit.\n')
        self.reset()
        message = input("Human: ")
        while message != "exit":
            response = self.stream_chat(message)
            print("Assistant: ", end="", flush=True)
            response.print_response_stream()
            print("\n")
            message = input("Human: ")

    @property
    @abstractmethod
    def chat_history(self) -> List[ChatMessage]:
        pass

reset `abstractmethod` #

reset() -> None

重置对话状态。

Source code in llama_index/core/chat_engine/types.py

@abstractmethod
def reset(self) -> None:
    """重置对话状态。"""

chat `abstractmethod` #

chat(
    message: str,
    chat_history: Optional[List[ChatMessage]] = None,
) -> AGENT_CHAT_RESPONSE_TYPE

主要聊天界面。

Source code in llama_index/core/chat_engine/types.py

@abstractmethod
def chat(
    self, message: str, chat_history: Optional[List[ChatMessage]] = None
) -> AGENT_CHAT_RESPONSE_TYPE:
    """主要聊天界面。"""

stream_chat `abstractmethod` #

stream_chat(
    message: str,
    chat_history: Optional[List[ChatMessage]] = None,
) -> StreamingAgentChatResponse

流式聊天界面。

Source code in llama_index/core/chat_engine/types.py

@abstractmethod
def stream_chat(
    self, message: str, chat_history: Optional[List[ChatMessage]] = None
) -> StreamingAgentChatResponse:
    """流式聊天界面。"""

achat `abstractmethod` `async` #

achat(
    message: str,
    chat_history: Optional[List[ChatMessage]] = None,
) -> AGENT_CHAT_RESPONSE_TYPE

异步版本的主要聊天界面。

Source code in llama_index/core/chat_engine/types.py

@abstractmethod
async def achat(
    self, message: str, chat_history: Optional[List[ChatMessage]] = None
) -> AGENT_CHAT_RESPONSE_TYPE:
    """异步版本的主要聊天界面。"""

astream_chat `abstractmethod` `async` #

astream_chat(
    message: str,
    chat_history: Optional[List[ChatMessage]] = None,
) -> StreamingAgentChatResponse

异步版本的主要聊天界面。

Source code in llama_index/core/chat_engine/types.py

@abstractmethod
async def astream_chat(
    self, message: str, chat_history: Optional[List[ChatMessage]] = None
) -> StreamingAgentChatResponse:
    """异步版本的主要聊天界面。"""

chat_repl #

chat_repl() -> None

进入交互式聊天REPL。

Source code in llama_index/core/chat_engine/types.py

def chat_repl(self) -> None:
    """进入交互式聊天REPL。"""
    print("===== Entering Chat REPL =====")
    print('Type "exit" to exit.\n')
    self.reset()
    message = input("Human: ")
    while message != "exit":
        response = self.chat(message)
        print(f"Assistant: {response}\n")
        message = input("Human: ")

streaming_chat_repl #

streaming_chat_repl() -> None

进入交互式聊天REPL，并获取实时响应。

Source code in llama_index/core/chat_engine/types.py

def streaming_chat_repl(self) -> None:
    """进入交互式聊天REPL，并获取实时响应。"""
    print("===== Entering Chat REPL =====")
    print('Type "exit" to exit.\n')
    self.reset()
    message = input("Human: ")
    while message != "exit":
        response = self.stream_chat(message)
        print("Assistant: ", end="", flush=True)
        response.print_response_stream()
        print("\n")
        message = input("Human: ")

ChatMode #

Bases: str, Enum

聊天引擎模式。

Source code in llama_index/core/chat_engine/types.py

class ChatMode(str, Enum):
    """聊天引擎模式。"""

    SIMPLE = "simple"
    """对应于`SimpleChatEngine`。

与LLM进行聊天，而不使用知识库。"""

    CONDENSE_QUESTION = "condense_question"
    """对应于`CondenseQuestionChatEngine`。

    首先从对话上下文和最后一条消息生成一个独立的问题，
    然后查询查询引擎以获取响应。"""

    CONTEXT = "context"
    """对应于 `ContextChatEngine`。

    首先使用用户的消息从索引中检索文本，然后使用系统提示中的上下文生成响应。"""

    CONDENSE_PLUS_CONTEXT = "condense_plus_context"
    """对应于`CondensePlusContextChatEngine`。

    首先将对话和最新用户消息压缩为独立的问题。
    然后从检索器构建独立问题的上下文，
    然后将上下文与提示和用户消息一起传递给LLM以生成响应。"""

    REACT = "react"
    """对应于`ReActAgent`。

使用ReAct代理循环与查询引擎工具。"""

    OPENAI = "openai"
    """对应于`OpenAIAgent`。

使用一个调用OpenAI函数的代理循环。

注意：仅适用于支持函数调用API的OpenAI模型。"""

    BEST = "best"
    """根据当前的LLM选择最佳的聊天引擎。

如果使用支持函数调用API的OpenAI模型，则对应于`OpenAIAgent`，否则对应于`ReActAgent`。"""

SIMPLE `class-attribute` `instance-attribute` #

SIMPLE = 'simple'

对应于SimpleChatEngine。

与LLM进行聊天，而不使用知识库。

CONDENSE_QUESTION `class-attribute` `instance-attribute` #

CONDENSE_QUESTION = 'condense_question'

对应于CondenseQuestionChatEngine。

首先从对话上下文和最后一条消息生成一个独立的问题，然后查询查询引擎以获取响应。

CONTEXT `class-attribute` `instance-attribute` #

CONTEXT = 'context'

对应于 ContextChatEngine。

首先使用用户的消息从索引中检索文本，然后使用系统提示中的上下文生成响应。

CONDENSE_PLUS_CONTEXT `class-attribute` `instance-attribute` #

CONDENSE_PLUS_CONTEXT = 'condense_plus_context'

对应于CondensePlusContextChatEngine。

首先将对话和最新用户消息压缩为独立的问题。然后从检索器构建独立问题的上下文，然后将上下文与提示和用户消息一起传递给LLM以生成响应。

REACT `class-attribute` `instance-attribute` #

REACT = 'react'

对应于ReActAgent。

使用ReAct代理循环与查询引擎工具。

OPENAI `class-attribute` `instance-attribute` #

OPENAI = 'openai'

对应于OpenAIAgent。

使用一个调用OpenAI函数的代理循环。

注意：仅适用于支持函数调用API的OpenAI模型。

BEST `class-attribute` `instance-attribute` #

BEST = 'best'

根据当前的LLM选择最佳的聊天引擎。

如果使用支持函数调用API的OpenAI模型，则对应于OpenAIAgent，否则对应于ReActAgent。

is_function #

is_function(message: ChatMessage) -> bool

用于OpenAI模型的ChatMessage响应的实用程序。

Source code in llama_index/core/chat_engine/types.py

def is_function(message: ChatMessage) -> bool:
    """用于OpenAI模型的ChatMessage响应的实用程序。"""
    return "tool_calls" in message.additional_kwargs

Index

ChatResponseMode #

AgentChatResponse dataclass #

response_gen property #

async_response_gen async #

StreamingAgentChatResponse dataclass #

BaseChatEngine #

reset abstractmethod #

chat abstractmethod #

stream_chat abstractmethod #

achat abstractmethod async #

astream_chat abstractmethod async #

chat_repl #

streaming_chat_repl #

ChatMode #

SIMPLE class-attribute instance-attribute #

CONDENSE_QUESTION class-attribute instance-attribute #

CONTEXT class-attribute instance-attribute #

CONDENSE_PLUS_CONTEXT class-attribute instance-attribute #

REACT class-attribute instance-attribute #

OPENAI class-attribute instance-attribute #

BEST class-attribute instance-attribute #

is_function #

AgentChatResponse `dataclass` #

response_gen `property` #

async_response_gen `async` #

StreamingAgentChatResponse `dataclass` #

reset `abstractmethod` #

chat `abstractmethod` #

stream_chat `abstractmethod` #

achat `abstractmethod` `async` #

astream_chat `abstractmethod` `async` #

SIMPLE `class-attribute` `instance-attribute` #

CONDENSE_QUESTION `class-attribute` `instance-attribute` #

CONTEXT `class-attribute` `instance-attribute` #

CONDENSE_PLUS_CONTEXT `class-attribute` `instance-attribute` #

REACT `class-attribute` `instance-attribute` #

OPENAI `class-attribute` `instance-attribute` #

BEST `class-attribute` `instance-attribute` #