Source code for langchain_experimental.comprehend_moderation.pii
import asyncio
from typing import Any, Dict, Optional
from langchain_experimental.comprehend_moderation.base_moderation_exceptions import (
ModerationPiiError,
)
[docs]class ComprehendPII:
"""处理个人身份信息(PII)审核的类。"""
[docs] def __init__(
self,
client: Any,
callback: Optional[Any] = None,
unique_id: Optional[str] = None,
chain_id: Optional[str] = None,
) -> None:
self.client = client
self.moderation_beacon = {
"moderation_chain_id": chain_id,
"moderation_type": "PII",
"moderation_status": "LABELS_NOT_FOUND",
}
self.callback = callback
self.unique_id = unique_id
[docs] def validate(self, prompt_value: str, config: Any = None) -> str:
redact = config.get("redact")
return (
self._detect_pii(prompt_value=prompt_value, config=config)
if redact
else self._contains_pii(prompt_value=prompt_value, config=config)
)
def _contains_pii(self, prompt_value: str, config: Any = None) -> str:
"""检查是否存在超过指定阈值的个人身份信息(PII)标签。使用Amazon Comprehend Contains PII Entities API。参见-https://docs.aws.amazon.com/comprehend/latest/APIReference/API_ContainsPiiEntities.html
参数:
prompt_value(str):要检查PII标签的输入文本。
config(Dict[str, Any]):PII检查和操作的配置。
返回:
str:原始提示
注意:
- 提供的客户端应使用有效的AWS凭证进行初始化。
"""
pii_identified = self.client.contains_pii_entities(
Text=prompt_value, LanguageCode="en"
)
if self.callback and self.callback.pii_callback:
self.moderation_beacon["moderation_input"] = prompt_value
self.moderation_beacon["moderation_output"] = pii_identified
threshold = config.get("threshold")
pii_labels = config.get("labels")
pii_found = False
for entity in pii_identified["Labels"]:
if (entity["Score"] >= threshold and entity["Name"] in pii_labels) or (
entity["Score"] >= threshold and not pii_labels
):
pii_found = True
break
if self.callback and self.callback.pii_callback:
if pii_found:
self.moderation_beacon["moderation_status"] = "LABELS_FOUND"
asyncio.create_task(
self.callback.on_after_pii(self.moderation_beacon, self.unique_id)
)
if pii_found:
raise ModerationPiiError
return prompt_value
def _detect_pii(self, prompt_value: str, config: Optional[Dict[str, Any]]) -> str:
"""检测并处理给定提示文本中的个人身份信息(PII)实体,使用Amazon Comprehend的detect_pii_entities API。该函数提供了基于识别的PII实体和提供的配置来进行编辑或停止处理的选项。使用Amazon Comprehend Detect PII Entities API。
参数:
prompt_value(str):要检查PII实体的输入文本。
config(Dict[str, Any]):指定如何处理PII实体的配置。
返回:
str:处理后的提示文本,包含编辑后的PII实体或引发的异常。
引发:
ValueError:如果提示包含配置的PII实体以停止处理。
注意:
- 如果在提示中未找到PII,则返回原始提示。
- 客户端应使用有效的AWS凭据进行初始化。
"""
pii_identified = self.client.detect_pii_entities(
Text=prompt_value, LanguageCode="en"
)
if self.callback and self.callback.pii_callback:
self.moderation_beacon["moderation_input"] = prompt_value
self.moderation_beacon["moderation_output"] = pii_identified
if (pii_identified["Entities"]) == []:
if self.callback and self.callback.pii_callback:
asyncio.create_task(
self.callback.on_after_pii(self.moderation_beacon, self.unique_id)
)
return prompt_value
pii_found = False
if not config and pii_identified["Entities"]:
for entity in pii_identified["Entities"]:
if entity["Score"] >= 0.5:
pii_found = True
break
if self.callback and self.callback.pii_callback:
if pii_found:
self.moderation_beacon["moderation_status"] = "LABELS_FOUND"
asyncio.create_task(
self.callback.on_after_pii(self.moderation_beacon, self.unique_id)
)
if pii_found:
raise ModerationPiiError
else:
threshold = config.get("threshold") # type: ignore
pii_labels = config.get("labels") # type: ignore
mask_marker = config.get("mask_character") # type: ignore
pii_found = False
for entity in pii_identified["Entities"]:
if (
pii_labels
and entity["Type"] in pii_labels
and entity["Score"] >= threshold
) or (not pii_labels and entity["Score"] >= threshold):
pii_found = True
char_offset_begin = entity["BeginOffset"]
char_offset_end = entity["EndOffset"]
mask_length = char_offset_end - char_offset_begin + 1
masked_part = mask_marker * mask_length
prompt_value = (
prompt_value[:char_offset_begin]
+ masked_part
+ prompt_value[char_offset_end + 1 :]
)
if self.callback and self.callback.pii_callback:
if pii_found:
self.moderation_beacon["moderation_status"] = "LABELS_FOUND"
asyncio.create_task(
self.callback.on_after_pii(self.moderation_beacon, self.unique_id)
)
return prompt_value