Source code for langchain_experimental.comprehend_moderation.pii

import asyncio
from typing import Any, Dict, Optional

from langchain_experimental.comprehend_moderation.base_moderation_exceptions import (
    ModerationPiiError,
)


[docs]class ComprehendPII: """处理个人身份信息(PII)审核的类。"""
[docs] def __init__( self, client: Any, callback: Optional[Any] = None, unique_id: Optional[str] = None, chain_id: Optional[str] = None, ) -> None: self.client = client self.moderation_beacon = { "moderation_chain_id": chain_id, "moderation_type": "PII", "moderation_status": "LABELS_NOT_FOUND", } self.callback = callback self.unique_id = unique_id
[docs] def validate(self, prompt_value: str, config: Any = None) -> str: redact = config.get("redact") return ( self._detect_pii(prompt_value=prompt_value, config=config) if redact else self._contains_pii(prompt_value=prompt_value, config=config) )
def _contains_pii(self, prompt_value: str, config: Any = None) -> str: """检查是否存在超过指定阈值的个人身份信息(PII)标签。使用Amazon Comprehend Contains PII Entities API。参见-https://docs.aws.amazon.com/comprehend/latest/APIReference/API_ContainsPiiEntities.html 参数: prompt_value(str):要检查PII标签的输入文本。 config(Dict[str, Any]):PII检查和操作的配置。 返回: str:原始提示 注意: - 提供的客户端应使用有效的AWS凭证进行初始化。 """ pii_identified = self.client.contains_pii_entities( Text=prompt_value, LanguageCode="en" ) if self.callback and self.callback.pii_callback: self.moderation_beacon["moderation_input"] = prompt_value self.moderation_beacon["moderation_output"] = pii_identified threshold = config.get("threshold") pii_labels = config.get("labels") pii_found = False for entity in pii_identified["Labels"]: if (entity["Score"] >= threshold and entity["Name"] in pii_labels) or ( entity["Score"] >= threshold and not pii_labels ): pii_found = True break if self.callback and self.callback.pii_callback: if pii_found: self.moderation_beacon["moderation_status"] = "LABELS_FOUND" asyncio.create_task( self.callback.on_after_pii(self.moderation_beacon, self.unique_id) ) if pii_found: raise ModerationPiiError return prompt_value def _detect_pii(self, prompt_value: str, config: Optional[Dict[str, Any]]) -> str: """检测并处理给定提示文本中的个人身份信息(PII)实体,使用Amazon Comprehend的detect_pii_entities API。该函数提供了基于识别的PII实体和提供的配置来进行编辑或停止处理的选项。使用Amazon Comprehend Detect PII Entities API。 参数: prompt_value(str):要检查PII实体的输入文本。 config(Dict[str, Any]):指定如何处理PII实体的配置。 返回: str:处理后的提示文本,包含编辑后的PII实体或引发的异常。 引发: ValueError:如果提示包含配置的PII实体以停止处理。 注意: - 如果在提示中未找到PII,则返回原始提示。 - 客户端应使用有效的AWS凭据进行初始化。 """ pii_identified = self.client.detect_pii_entities( Text=prompt_value, LanguageCode="en" ) if self.callback and self.callback.pii_callback: self.moderation_beacon["moderation_input"] = prompt_value self.moderation_beacon["moderation_output"] = pii_identified if (pii_identified["Entities"]) == []: if self.callback and self.callback.pii_callback: asyncio.create_task( self.callback.on_after_pii(self.moderation_beacon, self.unique_id) ) return prompt_value pii_found = False if not config and pii_identified["Entities"]: for entity in pii_identified["Entities"]: if entity["Score"] >= 0.5: pii_found = True break if self.callback and self.callback.pii_callback: if pii_found: self.moderation_beacon["moderation_status"] = "LABELS_FOUND" asyncio.create_task( self.callback.on_after_pii(self.moderation_beacon, self.unique_id) ) if pii_found: raise ModerationPiiError else: threshold = config.get("threshold") # type: ignore pii_labels = config.get("labels") # type: ignore mask_marker = config.get("mask_character") # type: ignore pii_found = False for entity in pii_identified["Entities"]: if ( pii_labels and entity["Type"] in pii_labels and entity["Score"] >= threshold ) or (not pii_labels and entity["Score"] >= threshold): pii_found = True char_offset_begin = entity["BeginOffset"] char_offset_end = entity["EndOffset"] mask_length = char_offset_end - char_offset_begin + 1 masked_part = mask_marker * mask_length prompt_value = ( prompt_value[:char_offset_begin] + masked_part + prompt_value[char_offset_end + 1 :] ) if self.callback and self.callback.pii_callback: if pii_found: self.moderation_beacon["moderation_status"] = "LABELS_FOUND" asyncio.create_task( self.callback.on_after_pii(self.moderation_beacon, self.unique_id) ) return prompt_value