Skip to content

Llama guard moderator

LlamaGuardModeratorPack #

Bases: BaseLlamaPack

Source code in llama_index/packs/llama_guard_moderator/base.py
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
class LlamaGuardModeratorPack(BaseLlamaPack):
    def __init__(
        self,
        custom_taxonomy: str = DEFAULT_TAXONOMY,
    ) -> None:
        """初始化参数。"""
        try:
            import torch
            from transformers import AutoModelForCausalLM, AutoTokenizer
        except ImportError:
            raise ImportError(
                "Dependencies missing, run " "`pip install torch transformers`"
            )

        import os

        hf_access_token = os.environ.get("HUGGINGFACE_ACCESS_TOKEN")
        if not os.environ.get("HUGGINGFACE_ACCESS_TOKEN", None):
            raise ValueError("Hugging Face access token is missing or invalid.")

        from huggingface_hub import login

        login(token=hf_access_token)

        model_id = "meta-llama/LlamaGuard-7b"
        self.device = "cuda"
        dtype = torch.bfloat16

        self.tokenizer = AutoTokenizer.from_pretrained(model_id)
        self.model = AutoModelForCausalLM.from_pretrained(
            model_id, torch_dtype=dtype, device_map=self.device
        )

        self.custom_taxonomy = custom_taxonomy

    def get_modules(self) -> Dict[str, Any]:
        """获取模块。"""
        return {
            "tokenizer": self.tokenizer,
            "model": self.model,
            "device": self.device,
            "custom_taxonomy": self.custom_taxonomy,
        }

    def run(self, message: str, **kwargs: Any) -> Any:
        """运行流水线。"""
        # tailored for query engine input/output, using "user" role
        chat = [{"role": "user", "content": message}]

        prompt = self._moderation_prompt_for_chat(chat)
        inputs = self.tokenizer([prompt], return_tensors="pt").to(self.device)
        output = self.model.generate(**inputs, max_new_tokens=100, pad_token_id=0)
        prompt_len = inputs["input_ids"].shape[-1]
        return self.tokenizer.decode(output[0][prompt_len:], skip_special_tokens=True)

    def _moderation_prompt_for_chat(self, chat):
        # For simplicity, we assume the chat dictionary correctly alternates "user" and "assistant" roles
        conversation = [turn["content"] for turn in chat]
        role = "Agent" if len(conversation) % 2 == 0 else "User"
        prompt = f"""[INST] Task: Check if there is unsafe content in '{role}' messages in conversations according our safety policy with the below categories.

    <BEGIN UNSAFE CONTENT CATEGORIES>
    {self.custom_taxonomy}
    <END UNSAFE CONTENT CATEGORIES>

    <BEGIN CONVERSATION>

    """

        # Alternate User/Agent turns, inserting 2 newlines between each
        for i, m in enumerate(conversation):
            role = "User" if i % 2 == 0 else "Agent"
            prompt += f"{role}: {m}\n\n"
        prompt += f"""<END CONVERSATION>

    Provide your safety assessment for {role} in the above conversation:
    - First line must read 'safe' or 'unsafe'.
    - If unsafe, a second line must include a comma-separated list of violated categories. [/INST]"""
        return prompt

get_modules #

get_modules() -> Dict[str, Any]

获取模块。

Source code in llama_index/packs/llama_guard_moderator/base.py
90
91
92
93
94
95
96
97
def get_modules(self) -> Dict[str, Any]:
    """获取模块。"""
    return {
        "tokenizer": self.tokenizer,
        "model": self.model,
        "device": self.device,
        "custom_taxonomy": self.custom_taxonomy,
    }

run #

run(message: str, **kwargs: Any) -> Any

运行流水线。

Source code in llama_index/packs/llama_guard_moderator/base.py
 99
100
101
102
103
104
105
106
107
108
def run(self, message: str, **kwargs: Any) -> Any:
    """运行流水线。"""
    # tailored for query engine input/output, using "user" role
    chat = [{"role": "user", "content": message}]

    prompt = self._moderation_prompt_for_chat(chat)
    inputs = self.tokenizer([prompt], return_tensors="pt").to(self.device)
    output = self.model.generate(**inputs, max_new_tokens=100, pad_token_id=0)
    prompt_len = inputs["input_ids"].shape[-1]
    return self.tokenizer.decode(output[0][prompt_len:], skip_special_tokens=True)