langchain_google_vertexai.callbacks ηζΊδ»£η
import threading
from typing import Any, Dict, List
from langchain_core.callbacks import BaseCallbackHandler
from langchain_core.outputs import LLMResult
[docs]
class VertexAICallbackHandler(BaseCallbackHandler):
"""Callback Handler that tracks VertexAI info."""
prompt_tokens: int = 0
prompt_characters: int = 0
completion_tokens: int = 0
completion_characters: int = 0
successful_requests: int = 0
[docs]
def __init__(self) -> None:
super().__init__()
self._lock = threading.Lock()
def __repr__(self) -> str:
return (
f"\tPrompt tokens: {self.prompt_tokens}\n"
f"\tPrompt characters: {self.prompt_characters}\n"
f"\tCompletion tokens: {self.completion_tokens}\n"
f"\tCompletion characters: {self.completion_characters}\n"
f"Successful requests: {self.successful_requests}\n"
)
@property
def always_verbose(self) -> bool:
"""Whether to call verbose callbacks even if verbose is False."""
return True
[docs]
def on_llm_start(
self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
) -> None:
"""Runs when LLM starts running."""
pass
[docs]
def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
"""Runs on new LLM token. Only available when streaming is enabled."""
pass
[docs]
def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
"""Collects token usage."""
completion_tokens, prompt_tokens = 0, 0
completion_characters, prompt_characters = 0, 0
for generations in response.generations:
if len(generations) > 0 and generations[0].generation_info:
usage_metadata = generations[0].generation_info.get(
"usage_metadata", {}
)
completion_tokens += usage_metadata.get("candidates_token_count", 0)
prompt_tokens += usage_metadata.get("prompt_token_count", 0)
completion_characters += usage_metadata.get(
"candidates_billable_characters", 0
)
prompt_characters += usage_metadata.get("prompt_billable_characters", 0)
with self._lock:
self.prompt_characters += prompt_characters
self.prompt_tokens += prompt_tokens
self.completion_characters += completion_characters
self.completion_tokens += completion_tokens
self.successful_requests += 1