PHP code example of lovevivi / hyperf-sensitive-words
1. Go to this page and download the library: Download lovevivi/hyperf-sensitive-words library . Choose the download type require .
2. Extract the ZIP file and open the index.php.
3. Add this code to the index.php.
<?php
require_once('vendor/autoload.php');
/* Start to develop here. Best regards https://php-download.com/ */
lovevivi / hyperf-sensitive-words example snippets
return [
// 用户自定义敏感词库路径,留空则使用默认词库
'word_path' => '',
// 词库合并模式:override-覆盖模式,append-追加模式
'merge_mode' => 'append',
// 是否开启中间件自动过滤
'middleware_enable' => false,
// 替换字符
'replace_char' => '*',
// 是否重复替换字符
'repeat_char' => true,
// 处理哪些HTTP请求参数
'http_params' => ['content', 'text', 'message'],
// 是否启用词库缓存
'enable_cache' => true,
// 缓存过期时间(秒),默认86400秒(1天)
'cache_expire' => 86400,
// 缓存文件存放路径,留空则按优先级决定
'cache_path' => '',
// 是否在应用启动时预热词库
'preload' => false,
// 是否启用前缀索引加速
'enable_prefix_index' => true,
// 表情符号处理策略 (ignore, remove, replace,
namespace App\Service;
use SensitiveWords\SensitiveWordsManager;
class ContentService
{
/**
* @var SensitiveWordsManager
*/
protected $sensitiveWordsManager;
public function __construct(SensitiveWordsManager $sensitiveWordsManager)
{
$this->sensitiveWordsManager = $sensitiveWordsManager;
}
public function filterContent(string $content): string
{
// 检查内容是否包含敏感词
if (!$this->sensitiveWordsManager->check($content)) {
// 替换敏感词
$content = $this->sensitiveWordsManager->replace($content);
// 或者获取所有敏感词
$badWords = $this->sensitiveWordsManager->getBadWords($content);
}
return $content;
}
}
### 1.1. 模糊匹配功能 **[新增]**
namespace App\Controller;
use Hyperf\HttpServer\Annotation\Controller;
use Hyperf\HttpServer\Annotation\RequestMapping;
use SensitiveWords\Annotation\SensitiveCheck;
/**
* @Controller
*/
class ContentController
{
/**
* @RequestMapping(path="/sensitive/aspect-test", methods={"GET", "POST"})
*/
public function test(RequestInterface $request)
{
// 从请求中获取内容
$content = $request->input('string', '');
// 调用助手方法,它将被切片处理
$filteredContent = $this->filterContent($content);
// 使用过滤后的内容构建响应
return [
'code' => 0,
'message' => '成功',
'data' => [
'original_content' => $content,
'processed_content' => $filteredContent
]
];
}
/**
* 助手方法:过滤敏感词
*
* @SensitiveCheck(param="content", replace=true, replaceChar="#")
*/
protected function filterContent(string $content): string
{
// 切片会在方法执行前处理$content参数
// 这里直接返回(可能已经被过滤的)内容
return $content;
}
}
return [
'http' => [
SensitiveWords\Middleware\SensitiveWordsMiddleware::class,
],
];
use Hyperf\HttpServer\Annotation\Middleware;
use SensitiveWords\Middleware\SensitiveWordsMiddleware;
/**
* @Middleware(SensitiveWordsMiddleware::class)
*/
class SensitiveMiddlewareTestController
// 预热词库(在应用启动时调用,提高首次访问性能)
$container->get(SensitiveWordsManager::class)->warmup();
// 清除词库缓存
$container->get(SensitiveWordsManager::class)->clearCache();
// 设置词库(覆盖现有词库)- 方法已重命名
$container->get(SensitiveWordsManager::class)->setWordLibrary(['新词1', '新词2']);
// 增量添加敏感词
$container->get(SensitiveWordsManager::class)->addWords(['新词1', '新词2']);
$manager = $container->get(SensitiveWordsManager::class);
// 添加白名单词语
$manager->addWhitelistWords(['正常词汇', '合法内容']);
// 删除白名单词语
$manager->removeWhitelistWords(['不再需要的白名单']);
// 设置白名单(覆盖现有白名单)
$manager->setWhitelistWords(['新白名单1', '新白名单2']);
// 获取当前所有白名单词语
$whitelistWords = $manager->getWhitelistWords();
// 检查词语是否在白名单中
$isWhitelisted = $manager->isWhitelisted('某个词语');
// 清空所有白名单
$manager->clearWhitelist();
$manager = $container->get(SensitiveWordsManager::class);
// 获取当前词库中的所有敏感词
$allSensitiveWords = $manager->getAllSensitiveWords();
// 示例输出:['敏感词1', '敏感词2', '违禁词', ...]
var_dump($allSensitiveWords);
// 可以用于词库管理、统计分析等场景
$wordCount = count($allSensitiveWords);
echo "当前词库包含 {$wordCount} 个敏感词";
// 获取敏感词的详细位置信息
$badWordsDetails = $manager->getBadWords($content, 0, true); // 第三个参数为true时返回详细信息
// 返回格式:
// [
// ['word' => '敏感词', 'offset' => 5, 'len' => 3],
// ['word' => '违禁词', 'offset' => 12, 'len' => 3]
// ]
$bypassText = 'a法😊b轮😜c功d相关内容';
// 常规检测:无法识别绕过
$normalWords = $manager->getBadWords($bypassText, 0, false);
// 结果:[] (空数组)
// 模糊检测:能识别绕过
$fuzzyWords = $manager->getBadWords($bypassText, 0, true);
// 结果:['法轮功'] (找到原始敏感词)
$manager->addWhitelistWords(['assessment', 'helloween']);
// "assessment" 和 "helloween" 不会被检测为敏感词
// 敏感词库包含:['ass', 'hell']
// 白名单包含:['assessment', 'helloween']
$text = 'This is an assessment of the helloween party.';
// "ass" 和 "hell" 不会被检测,因为它们是白名单词语的一部分
// 运行时添加
$manager->addWhitelistWords(['新的合法词汇']);
// 检查是否在白名单中
if ($manager->isWhitelisted('某个词')) {
// 处理逻辑
}
// 获取所有白名单
$allWhitelist = $manager->getWhitelistWords();
$details = $manager->getBadWords($content, 0, true);
// 返回:[['word' => '敏感词', 'offset' => 5, 'len' => 3], ...]
bash
php bin/hyperf.php vendor:publish lovevivi/hyperf-sensitive-words
namespace App\Service;
use SensitiveWords\SensitiveWordsManager;
class AdvancedContentService
{
/**
* @var SensitiveWordsManager
*/
protected $sensitiveWordsManager;
public function __construct(SensitiveWordsManager $sensitiveWordsManager)
{
$this->sensitiveWordsManager = $sensitiveWordsManager;
}
public function smartFilterContent(string $content): array
{
// 常规检测:快速,但无法识别绕过技术
$normalCheck = $this->sensitiveWordsManager->check($content);
$normalBadWords = $this->sensitiveWordsManager->getBadWords($content);
// 模糊检测:能发现各种绕过技术
$fuzzyCheck = $this->sensitiveWordsManager->check($content, true);
$fuzzyBadWords = $this->sensitiveWordsManager->getBadWords($content, 0, true);
return [
'normal_detection' => [
'has_sensitive' => $normalCheck,
'bad_words' => $normalBadWords
],
'fuzzy_detection' => [
'has_sensitive' => $fuzzyCheck,
'bad_words' => $fuzzyBadWords
],
'bypass_detected' => $fuzzyCheck && !$normalCheck, // 是否检测到绕过技术
];
}
public function handleBypassAttempts(string $content): string
{
// 示例绕过文本:'a法😊b轮😜c功d' (试图绕过"法轮功")
// 常规检测无法发现绕过
$normalDetected = $this->sensitiveWordsManager->getBadWords($content);
// 结果:[] (空数组)
// 模糊检测能发现绕过
$fuzzyDetected = $this->sensitiveWordsManager->getBadWords($content, 0, true);
// 结果:['法轮功'] (找到了原始敏感词)
if (!empty($fuzzyDetected)) {
// 发现绕过尝试,记录日志或采取其他措施
logger()->warning('检测到敏感词绕过尝试', [
'content' => $content,
'detected_words' => $fuzzyDetected
]);
return '内容包含不当信息';
}
return $content;
}