mirror of
https://github.com/JasonsGong/JasonsGong.github.io.git
synced 2024-11-24 11:49:36 +08:00
281 lines
45 KiB
HTML
281 lines
45 KiB
HTML
<!DOCTYPE html><html lang="zh-CN" data-theme="light"><head><meta charset="UTF-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0,viewport-fit=cover"><title>DFA算法实现敏感词自管理 | The Blog</title><meta name="author" content="Jason"><meta name="copyright" content="Jason"><meta name="format-detection" content="telephone=no"><meta name="theme-color" content="#ffffff"><meta name="description" content="一.需求分析 在审核文本内容的时候,我们可以调用第三方成熟的服务(例如阿里云的内容安全)来实现,但是基于不同的场景,第三方的服务不可能涉及到方方面面的敏感词,比如在游戏类的场景中,开挂一词算敏感词,但是在其它的场景中,开挂一词是一个正常的词汇。这时候需要我们根据不同场景自己维护一套敏感词,在文本审核的时候,需要验证文本中是否包含这些敏感词。 二.可选方案 方案 说明 数据库模糊查询">
|
||
<meta property="og:type" content="article">
|
||
<meta property="og:title" content="DFA算法实现敏感词自管理">
|
||
<meta property="og:url" content="https://qingling.icu/posts/28101.html">
|
||
<meta property="og:site_name" content="The Blog">
|
||
<meta property="og:description" content="一.需求分析 在审核文本内容的时候,我们可以调用第三方成熟的服务(例如阿里云的内容安全)来实现,但是基于不同的场景,第三方的服务不可能涉及到方方面面的敏感词,比如在游戏类的场景中,开挂一词算敏感词,但是在其它的场景中,开挂一词是一个正常的词汇。这时候需要我们根据不同场景自己维护一套敏感词,在文本审核的时候,需要验证文本中是否包含这些敏感词。 二.可选方案 方案 说明 数据库模糊查询">
|
||
<meta property="og:locale" content="zh_CN">
|
||
<meta property="og:image" content="https://qingling.icu/img/9.png">
|
||
<meta property="article:published_time" content="2023-12-03T05:28:05.000Z">
|
||
<meta property="article:modified_time" content="2024-05-10T05:31:11.595Z">
|
||
<meta property="article:author" content="Jason">
|
||
<meta property="article:tag" content="DFA">
|
||
<meta name="twitter:card" content="summary">
|
||
<meta name="twitter:image" content="https://qingling.icu/img/9.png"><link rel="shortcut icon" href="/img/%E5%9B%BE%E6%A0%87.png"><link rel="canonical" href="https://qingling.icu/posts/28101.html"><link rel="preconnect" href="//fastly.jsdelivr.net"/><link rel="preconnect" href="//busuanzi.ibruce.info"/><link rel="stylesheet" href="/css/index.css"><link rel="stylesheet" href="/cdn/icon/fontawesome-free/css/all.min.css" media="print" onload="this.media='all'"><link rel="stylesheet" href="/cdn/css/snackbar.min.css" media="print" onload="this.media='all'"><link rel="stylesheet" href="/cdn/css/fancybox.min.css" media="print" onload="this.media='all'"><script>const GLOBAL_CONFIG = {
|
||
root: '/',
|
||
algolia: undefined,
|
||
localSearch: {"path":"/search.xml","preload":true,"top_n_per_article":1,"unescape":false,"languages":{"hits_empty":"找不到您查询的内容:${query}","hits_stats":"共找到 ${hits} 篇文章"}},
|
||
translate: undefined,
|
||
noticeOutdate: undefined,
|
||
highlight: {"plugin":"highlighjs","highlightCopy":true,"highlightLang":true,"highlightHeightLimit":400},
|
||
copy: {
|
||
success: '复制成功',
|
||
error: '复制错误',
|
||
noSupport: '浏览器不支持'
|
||
},
|
||
relativeDate: {
|
||
homepage: true,
|
||
post: true
|
||
},
|
||
runtime: '天',
|
||
dateSuffix: {
|
||
just: '刚刚',
|
||
min: '分钟前',
|
||
hour: '小时前',
|
||
day: '天前',
|
||
month: '个月前'
|
||
},
|
||
copyright: undefined,
|
||
lightbox: 'mediumZoom',
|
||
Snackbar: {"chs_to_cht":"你已切换为繁体","cht_to_chs":"你已切换为简体","day_to_night":"你已切换为深色模式","night_to_day":"你已切换为浅色模式","bgLight":"#006650","bgDark":"#006650","position":"top-center"},
|
||
source: {
|
||
justifiedGallery: {
|
||
js: 'https://fastly.jsdelivr.net/npm/flickr-justified-gallery/dist/fjGallery.min.js',
|
||
css: 'https://fastly.jsdelivr.net/npm/flickr-justified-gallery/dist/fjGallery.min.css'
|
||
}
|
||
},
|
||
isPhotoFigcaption: false,
|
||
islazyload: false,
|
||
isAnchor: true,
|
||
percent: {
|
||
toc: true,
|
||
rightside: false,
|
||
},
|
||
autoDarkmode: true
|
||
}</script><script id="config-diff">var GLOBAL_CONFIG_SITE = {
|
||
title: 'DFA算法实现敏感词自管理',
|
||
isPost: true,
|
||
isHome: false,
|
||
isHighlightShrink: false,
|
||
isToc: true,
|
||
postUpdate: '2024-05-10 13:31:11'
|
||
}</script><noscript><style type="text/css">
|
||
#nav {
|
||
opacity: 1
|
||
}
|
||
.justified-gallery img {
|
||
opacity: 1
|
||
}
|
||
|
||
#recent-posts time,
|
||
#post-meta time {
|
||
display: inline !important
|
||
}
|
||
</style></noscript><script>(win=>{
|
||
win.saveToLocal = {
|
||
set: function setWithExpiry(key, value, ttl) {
|
||
if (ttl === 0) return
|
||
const now = new Date()
|
||
const expiryDay = ttl * 86400000
|
||
const item = {
|
||
value: value,
|
||
expiry: now.getTime() + expiryDay,
|
||
}
|
||
localStorage.setItem(key, JSON.stringify(item))
|
||
},
|
||
|
||
get: function getWithExpiry(key) {
|
||
const itemStr = localStorage.getItem(key)
|
||
|
||
if (!itemStr) {
|
||
return undefined
|
||
}
|
||
const item = JSON.parse(itemStr)
|
||
const now = new Date()
|
||
|
||
if (now.getTime() > item.expiry) {
|
||
localStorage.removeItem(key)
|
||
return undefined
|
||
}
|
||
return item.value
|
||
}
|
||
}
|
||
|
||
win.getScript = url => new Promise((resolve, reject) => {
|
||
const script = document.createElement('script')
|
||
script.src = url
|
||
script.async = true
|
||
script.onerror = reject
|
||
script.onload = script.onreadystatechange = function() {
|
||
const loadState = this.readyState
|
||
if (loadState && loadState !== 'loaded' && loadState !== 'complete') return
|
||
script.onload = script.onreadystatechange = null
|
||
resolve()
|
||
}
|
||
document.head.appendChild(script)
|
||
})
|
||
|
||
win.getCSS = (url,id = false) => new Promise((resolve, reject) => {
|
||
const link = document.createElement('link')
|
||
link.rel = 'stylesheet'
|
||
link.href = url
|
||
if (id) link.id = id
|
||
link.onerror = reject
|
||
link.onload = link.onreadystatechange = function() {
|
||
const loadState = this.readyState
|
||
if (loadState && loadState !== 'loaded' && loadState !== 'complete') return
|
||
link.onload = link.onreadystatechange = null
|
||
resolve()
|
||
}
|
||
document.head.appendChild(link)
|
||
})
|
||
|
||
win.activateDarkMode = function () {
|
||
document.documentElement.setAttribute('data-theme', 'dark')
|
||
if (document.querySelector('meta[name="theme-color"]') !== null) {
|
||
document.querySelector('meta[name="theme-color"]').setAttribute('content', '#0d0d0d')
|
||
}
|
||
}
|
||
win.activateLightMode = function () {
|
||
document.documentElement.setAttribute('data-theme', 'light')
|
||
if (document.querySelector('meta[name="theme-color"]') !== null) {
|
||
document.querySelector('meta[name="theme-color"]').setAttribute('content', '#ffffff')
|
||
}
|
||
}
|
||
const t = saveToLocal.get('theme')
|
||
|
||
const isDarkMode = window.matchMedia('(prefers-color-scheme: dark)').matches
|
||
const isLightMode = window.matchMedia('(prefers-color-scheme: light)').matches
|
||
const isNotSpecified = window.matchMedia('(prefers-color-scheme: no-preference)').matches
|
||
const hasNoSupport = !isDarkMode && !isLightMode && !isNotSpecified
|
||
|
||
if (t === undefined) {
|
||
if (isLightMode) activateLightMode()
|
||
else if (isDarkMode) activateDarkMode()
|
||
else if (isNotSpecified || hasNoSupport) {
|
||
const now = new Date()
|
||
const hour = now.getHours()
|
||
const isNight = hour <= 8 || hour >= 22
|
||
isNight ? activateDarkMode() : activateLightMode()
|
||
}
|
||
window.matchMedia('(prefers-color-scheme: dark)').addListener(function (e) {
|
||
if (saveToLocal.get('theme') === undefined) {
|
||
e.matches ? activateDarkMode() : activateLightMode()
|
||
}
|
||
})
|
||
} else if (t === 'light') activateLightMode()
|
||
else activateDarkMode()
|
||
|
||
const asideStatus = saveToLocal.get('aside-status')
|
||
if (asideStatus !== undefined) {
|
||
if (asideStatus === 'hide') {
|
||
document.documentElement.classList.add('hide-aside')
|
||
} else {
|
||
document.documentElement.classList.remove('hide-aside')
|
||
}
|
||
}
|
||
|
||
const detectApple = () => {
|
||
if(/iPad|iPhone|iPod|Macintosh/.test(navigator.userAgent)){
|
||
document.documentElement.classList.add('apple')
|
||
}
|
||
}
|
||
detectApple()
|
||
})(window)</script><script src="https://apps.bdimg.com/libs/jquery/2.1.4/jquery.min.js"></script><script type="text/javascript" src ="/js/welcome.js" ></script><script src="/js/sweetalert.js"></script><link rel="stylesheet" href="/css/sweetalert.css"><!-- hexo injector head_end start --><link rel="stylesheet" href="https://npm.elemecdn.com/hexo-butterfly-swiper/lib/swiper.min.css" media="print" onload="this.media='all'"><link rel="stylesheet" href="https://npm.elemecdn.com/hexo-butterfly-swiper/lib/swiperstyle.css" media="print" onload="this.media='all'"><!-- hexo injector head_end end --><meta name="generator" content="Hexo 6.3.0"></head><body><div id="sidebar"><div id="menu-mask"></div><div id="sidebar-menus"><div class="avatar-img is-center"><img src="/img/avatar.jpg" onerror="onerror=null;src='/img/loading.gif'" alt="avatar"/></div><div class="sidebar-site-data site-data is-center"><a href="/archives/"><div class="headline">文章</div><div class="length-num">60</div></a><a href="/tags/"><div class="headline">标签</div><div class="length-num">39</div></a><a href="/categories/"><div class="headline">分类</div><div class="length-num">10</div></a></div><br/><div class="menus_items"><div class="menus_item"><a class="site-page" target="_blank" rel="noopener" href="https://www.tutorialspoint.com/compile_java8_online.php"><i class="fa-fw fas fa-code"></i><span> 代码</span></a></div><div class="menus_item"><a class="site-page" href="/notice/"><i class="fa-fw fas fa-stream"></i><span> 公告</span></a></div><div class="menus_item"><a class="site-page" href="/website/"><i class="fa-fw fas fa-list"></i><span> 网址</span></a></div><div class="menus_item"><a class="site-page" href="/"><i class="fa-fw fas fa-home"></i><span> 主页</span></a></div></div></div></div><div class="post" id="body-wrap"><header class="not-top-img" id="page-header"><nav id="nav"><span id="blog-info"><a href="/" title="The Blog"><img class="site-icon" src="/img/logo.png"/><span class="site-name">The Blog</span></a></span><div id="menus"><div id="search-button"><a class="site-page social-icon search" href="javascript:void(0);"><i class="fas fa-search fa-fw"></i><span> 搜索</span></a></div><div class="menus_items"><div class="menus_item"><a class="site-page" target="_blank" rel="noopener" href="https://www.tutorialspoint.com/compile_java8_online.php"><i class="fa-fw fas fa-code"></i><span> 代码</span></a></div><div class="menus_item"><a class="site-page" href="/notice/"><i class="fa-fw fas fa-stream"></i><span> 公告</span></a></div><div class="menus_item"><a class="site-page" href="/website/"><i class="fa-fw fas fa-list"></i><span> 网址</span></a></div><div class="menus_item"><a class="site-page" href="/"><i class="fa-fw fas fa-home"></i><span> 主页</span></a></div></div><div id="toggle-menu"><a class="site-page" href="javascript:void(0);"><i class="fas fa-bars fa-fw"></i></a></div></div></nav></header><main class="layout" id="content-inner"><div id="post"><div id="post-info"><h1 class="post-title">DFA算法实现敏感词自管理</h1><div id="post-meta"><div class="meta-firstline"><span class="post-meta-date"><i class="far fa-calendar-alt fa-fw post-meta-icon"></i><span class="post-meta-label">发表于</span><time class="post-meta-date-created" datetime="2023-12-03T05:28:05.000Z" title="发表于 2023-12-03 13:28:05">2023-12-03</time><span class="post-meta-separator">|</span><i class="fas fa-history fa-fw post-meta-icon"></i><span class="post-meta-label">更新于</span><time class="post-meta-date-updated" datetime="2024-05-10T05:31:11.595Z" title="更新于 2024-05-10 13:31:11">2024-05-10</time></span><span class="post-meta-categories"><span class="post-meta-separator">|</span><i class="fas fa-inbox fa-fw post-meta-icon"></i><a class="post-meta-categories" href="/categories/%E5%90%8E%E7%AB%AF/">后端</a></span></div><div class="meta-secondline"><span class="post-meta-separator">|</span><span class="post-meta-wordcount"><i class="far fa-file-word fa-fw post-meta-icon"></i><span class="post-meta-label">字数总计:</span><span class="word-count">965</span><span class="post-meta-separator">|</span><i class="far fa-clock fa-fw post-meta-icon"></i><span class="post-meta-label">阅读时长:</span><span>4分钟</span></span><span class="post-meta-separator">|</span><span class="post-meta-pv-cv" id="" data-flag-title="DFA算法实现敏感词自管理"><i class="far fa-eye fa-fw post-meta-icon"></i><span class="post-meta-label">阅读量:</span><span id="busuanzi_value_page_pv"><i class="fa-solid fa-spinner fa-spin"></i></span></span></div></div></div><article class="post-content" id="article-container"><h2 id="一-需求分析"><a href="#一-需求分析" class="headerlink" title="一.需求分析"></a>一.需求分析</h2><p> 在审核文本内容的时候,我们可以调用第三方成熟的服务(例如阿里云的内容安全)来实现,但是基于不同的场景,第三方的服务不可能涉及到方方面面的敏感词,比如在游戏类的场景中,开挂一词算敏感词,但是在其它的场景中,开挂一词是一个正常的词汇。这时候需要我们根据不同场景自己维护一套敏感词,在文本审核的时候,需要验证文本中是否包含这些敏感词。</p>
|
||
<h2 id="二-可选方案"><a href="#二-可选方案" class="headerlink" title="二.可选方案"></a>二.可选方案</h2><table>
|
||
<thead>
|
||
<tr>
|
||
<th><strong>方案</strong></th>
|
||
<th><strong>说明</strong></th>
|
||
</tr>
|
||
</thead>
|
||
<tbody><tr>
|
||
<td>数据库模糊查询</td>
|
||
<td>效率太低</td>
|
||
</tr>
|
||
<tr>
|
||
<td>String.indexOf(“”)查找</td>
|
||
<td>数据库量大的话也是比较慢</td>
|
||
</tr>
|
||
<tr>
|
||
<td>全文检索</td>
|
||
<td>分词再匹配</td>
|
||
</tr>
|
||
<tr>
|
||
<td>DFA算法</td>
|
||
<td>确定有穷自动机(一种数据结构)</td>
|
||
</tr>
|
||
</tbody></table>
|
||
<h2 id="三-DFA算法"><a href="#三-DFA算法" class="headerlink" title="三. DFA算法"></a>三. DFA算法</h2><h3 id="1-简介"><a href="#1-简介" class="headerlink" title="1.简介"></a>1.简介</h3><p>DFA全称为:Deterministic Finite Automaton,即确定有穷自动机。</p>
|
||
<p>存储:一次性的把所有的敏感词存储到了多个map中,就是下图表示这种结构</p>
|
||
<p>敏感词:冰毒、大麻、大坏蛋</p>
|
||
<p><img src="/pictures/image-20231203134106231.png" alt="image-20231203134106231"></p>
|
||
<h3 id="2-检索过程"><a href="#2-检索过程" class="headerlink" title="2.检索过程"></a>2.检索过程</h3><p><img src="/pictures/image-20231203134335076.png" alt="image-20231203134335076"></p>
|
||
<h2 id="四-工具类"><a href="#四-工具类" class="headerlink" title="四.工具类"></a>四.工具类</h2><figure class="highlight java"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br><span class="line">39</span><br><span class="line">40</span><br><span class="line">41</span><br><span class="line">42</span><br><span class="line">43</span><br><span class="line">44</span><br><span class="line">45</span><br><span class="line">46</span><br><span class="line">47</span><br><span class="line">48</span><br><span class="line">49</span><br><span class="line">50</span><br><span class="line">51</span><br><span class="line">52</span><br><span class="line">53</span><br><span class="line">54</span><br><span class="line">55</span><br><span class="line">56</span><br><span class="line">57</span><br><span class="line">58</span><br><span class="line">59</span><br><span class="line">60</span><br><span class="line">61</span><br><span class="line">62</span><br><span class="line">63</span><br><span class="line">64</span><br><span class="line">65</span><br><span class="line">66</span><br><span class="line">67</span><br><span class="line">68</span><br><span class="line">69</span><br><span class="line">70</span><br><span class="line">71</span><br><span class="line">72</span><br><span class="line">73</span><br><span class="line">74</span><br><span class="line">75</span><br><span class="line">76</span><br><span class="line">77</span><br><span class="line">78</span><br><span class="line">79</span><br><span class="line">80</span><br><span class="line">81</span><br><span class="line">82</span><br><span class="line">83</span><br><span class="line">84</span><br><span class="line">85</span><br><span class="line">86</span><br><span class="line">87</span><br><span class="line">88</span><br><span class="line">89</span><br><span class="line">90</span><br><span class="line">91</span><br><span class="line">92</span><br><span class="line">93</span><br><span class="line">94</span><br><span class="line">95</span><br><span class="line">96</span><br><span class="line">97</span><br><span class="line">98</span><br><span class="line">99</span><br><span class="line">100</span><br><span class="line">101</span><br><span class="line">102</span><br><span class="line">103</span><br><span class="line">104</span><br><span class="line">105</span><br><span class="line">106</span><br><span class="line">107</span><br><span class="line">108</span><br><span class="line">109</span><br><span class="line">110</span><br><span class="line">111</span><br><span class="line">112</span><br><span class="line">113</span><br><span class="line">114</span><br><span class="line">115</span><br><span class="line">116</span><br><span class="line">117</span><br><span class="line">118</span><br><span class="line">119</span><br><span class="line">120</span><br><span class="line">121</span><br><span class="line">122</span><br><span class="line">123</span><br><span class="line">124</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">package</span> com.heima.utils.common;</span><br><span class="line"></span><br><span class="line"></span><br><span class="line"><span class="keyword">import</span> java.util.*;</span><br><span class="line"></span><br><span class="line"><span class="keyword">public</span> <span class="keyword">class</span> <span class="title class_">SensitiveWordUtil</span> {</span><br><span class="line"></span><br><span class="line"> <span class="keyword">public</span> <span class="keyword">static</span> Map<String, Object> dictionaryMap = <span class="keyword">new</span> <span class="title class_">HashMap</span><>();</span><br><span class="line"></span><br><span class="line"></span><br><span class="line"> <span class="comment">/**</span></span><br><span class="line"><span class="comment"> * 生成关键词字典库</span></span><br><span class="line"><span class="comment"> * <span class="doctag">@param</span> words</span></span><br><span class="line"><span class="comment"> * <span class="doctag">@return</span></span></span><br><span class="line"><span class="comment"> */</span></span><br><span class="line"> <span class="keyword">public</span> <span class="keyword">static</span> <span class="keyword">void</span> <span class="title function_">initMap</span><span class="params">(Collection<String> words)</span> {</span><br><span class="line"> <span class="keyword">if</span> (words == <span class="literal">null</span>) {</span><br><span class="line"> System.out.println(<span class="string">"敏感词列表不能为空"</span>);</span><br><span class="line"> <span class="keyword">return</span> ;</span><br><span class="line"> }</span><br><span class="line"></span><br><span class="line"> <span class="comment">// map初始长度words.size(),整个字典库的入口字数(小于words.size(),因为不同的词可能会有相同的首字)</span></span><br><span class="line"> Map<String, Object> map = <span class="keyword">new</span> <span class="title class_">HashMap</span><>(words.size());</span><br><span class="line"> <span class="comment">// 遍历过程中当前层次的数据</span></span><br><span class="line"> Map<String, Object> curMap = <span class="literal">null</span>;</span><br><span class="line"> Iterator<String> iterator = words.iterator();</span><br><span class="line"></span><br><span class="line"> <span class="keyword">while</span> (iterator.hasNext()) {</span><br><span class="line"> <span class="type">String</span> <span class="variable">word</span> <span class="operator">=</span> iterator.next();</span><br><span class="line"> curMap = map;</span><br><span class="line"> <span class="type">int</span> <span class="variable">len</span> <span class="operator">=</span> word.length();</span><br><span class="line"> <span class="keyword">for</span> (<span class="type">int</span> <span class="variable">i</span> <span class="operator">=</span><span class="number">0</span>; i < len; i++) {</span><br><span class="line"> <span class="comment">// 遍历每个词的字</span></span><br><span class="line"> <span class="type">String</span> <span class="variable">key</span> <span class="operator">=</span> String.valueOf(word.charAt(i));</span><br><span class="line"> <span class="comment">// 当前字在当前层是否存在, 不存在则新建, 当前层数据指向下一个节点, 继续判断是否存在数据</span></span><br><span class="line"> Map<String, Object> wordMap = (Map<String, Object>) curMap.get(key);</span><br><span class="line"> <span class="keyword">if</span> (wordMap == <span class="literal">null</span>) {</span><br><span class="line"> <span class="comment">// 每个节点存在两个数据: 下一个节点和isEnd(是否结束标志)</span></span><br><span class="line"> wordMap = <span class="keyword">new</span> <span class="title class_">HashMap</span><>(<span class="number">2</span>);</span><br><span class="line"> wordMap.put(<span class="string">"isEnd"</span>, <span class="string">"0"</span>);</span><br><span class="line"> curMap.put(key, wordMap);</span><br><span class="line"> }</span><br><span class="line"> curMap = wordMap;</span><br><span class="line"> <span class="comment">// 如果当前字是词的最后一个字,则将isEnd标志置1</span></span><br><span class="line"> <span class="keyword">if</span> (i == len -<span class="number">1</span>) {</span><br><span class="line"> curMap.put(<span class="string">"isEnd"</span>, <span class="string">"1"</span>);</span><br><span class="line"> }</span><br><span class="line"> }</span><br><span class="line"> }</span><br><span class="line"></span><br><span class="line"> dictionaryMap = map;</span><br><span class="line"> }</span><br><span class="line"></span><br><span class="line"> <span class="comment">/**</span></span><br><span class="line"><span class="comment"> * 搜索文本中某个文字是否匹配关键词</span></span><br><span class="line"><span class="comment"> * <span class="doctag">@param</span> text</span></span><br><span class="line"><span class="comment"> * <span class="doctag">@param</span> beginIndex</span></span><br><span class="line"><span class="comment"> * <span class="doctag">@return</span></span></span><br><span class="line"><span class="comment"> */</span></span><br><span class="line"> <span class="keyword">private</span> <span class="keyword">static</span> <span class="type">int</span> <span class="title function_">checkWord</span><span class="params">(String text, <span class="type">int</span> beginIndex)</span> {</span><br><span class="line"> <span class="keyword">if</span> (dictionaryMap == <span class="literal">null</span>) {</span><br><span class="line"> <span class="keyword">throw</span> <span class="keyword">new</span> <span class="title class_">RuntimeException</span>(<span class="string">"字典不能为空"</span>);</span><br><span class="line"> }</span><br><span class="line"> <span class="type">boolean</span> <span class="variable">isEnd</span> <span class="operator">=</span> <span class="literal">false</span>;</span><br><span class="line"> <span class="type">int</span> <span class="variable">wordLength</span> <span class="operator">=</span> <span class="number">0</span>;</span><br><span class="line"> Map<String, Object> curMap = dictionaryMap;</span><br><span class="line"> <span class="type">int</span> <span class="variable">len</span> <span class="operator">=</span> text.length();</span><br><span class="line"> <span class="comment">// 从文本的第beginIndex开始匹配</span></span><br><span class="line"> <span class="keyword">for</span> (<span class="type">int</span> <span class="variable">i</span> <span class="operator">=</span> beginIndex; i < len; i++) {</span><br><span class="line"> <span class="type">String</span> <span class="variable">key</span> <span class="operator">=</span> String.valueOf(text.charAt(i));</span><br><span class="line"> <span class="comment">// 获取当前key的下一个节点</span></span><br><span class="line"> curMap = (Map<String, Object>) curMap.get(key);</span><br><span class="line"> <span class="keyword">if</span> (curMap == <span class="literal">null</span>) {</span><br><span class="line"> <span class="keyword">break</span>;</span><br><span class="line"> } <span class="keyword">else</span> {</span><br><span class="line"> wordLength ++;</span><br><span class="line"> <span class="keyword">if</span> (<span class="string">"1"</span>.equals(curMap.get(<span class="string">"isEnd"</span>))) {</span><br><span class="line"> isEnd = <span class="literal">true</span>;</span><br><span class="line"> }</span><br><span class="line"> }</span><br><span class="line"> }</span><br><span class="line"> <span class="keyword">if</span> (!isEnd) {</span><br><span class="line"> wordLength = <span class="number">0</span>;</span><br><span class="line"> }</span><br><span class="line"> <span class="keyword">return</span> wordLength;</span><br><span class="line"> }</span><br><span class="line"></span><br><span class="line"> <span class="comment">/**</span></span><br><span class="line"><span class="comment"> * 获取匹配的关键词和命中次数</span></span><br><span class="line"><span class="comment"> * <span class="doctag">@param</span> text</span></span><br><span class="line"><span class="comment"> * <span class="doctag">@return</span></span></span><br><span class="line"><span class="comment"> */</span></span><br><span class="line"> <span class="keyword">public</span> <span class="keyword">static</span> Map<String, Integer> <span class="title function_">matchWords</span><span class="params">(String text)</span> {</span><br><span class="line"> Map<String, Integer> wordMap = <span class="keyword">new</span> <span class="title class_">HashMap</span><>();</span><br><span class="line"> <span class="type">int</span> <span class="variable">len</span> <span class="operator">=</span> text.length();</span><br><span class="line"> <span class="keyword">for</span> (<span class="type">int</span> <span class="variable">i</span> <span class="operator">=</span> <span class="number">0</span>; i < len; i++) {</span><br><span class="line"> <span class="type">int</span> <span class="variable">wordLength</span> <span class="operator">=</span> checkWord(text, i);</span><br><span class="line"> <span class="keyword">if</span> (wordLength > <span class="number">0</span>) {</span><br><span class="line"> <span class="type">String</span> <span class="variable">word</span> <span class="operator">=</span> text.substring(i, i + wordLength);</span><br><span class="line"> <span class="comment">// 添加关键词匹配次数</span></span><br><span class="line"> <span class="keyword">if</span> (wordMap.containsKey(word)) {</span><br><span class="line"> wordMap.put(word, wordMap.get(word) + <span class="number">1</span>);</span><br><span class="line"> } <span class="keyword">else</span> {</span><br><span class="line"> wordMap.put(word, <span class="number">1</span>);</span><br><span class="line"> }</span><br><span class="line"></span><br><span class="line"> i += wordLength - <span class="number">1</span>;</span><br><span class="line"> }</span><br><span class="line"> }</span><br><span class="line"> <span class="keyword">return</span> wordMap;</span><br><span class="line"> }</span><br><span class="line"></span><br><span class="line"> <span class="keyword">public</span> <span class="keyword">static</span> <span class="keyword">void</span> <span class="title function_">main</span><span class="params">(String[] args)</span> {</span><br><span class="line"> List<String> list = <span class="keyword">new</span> <span class="title class_">ArrayList</span><>();</span><br><span class="line"> list.add(<span class="string">"法轮"</span>);</span><br><span class="line"> list.add(<span class="string">"法轮功"</span>);</span><br><span class="line"> list.add(<span class="string">"冰毒"</span>);</span><br><span class="line"> <span class="comment">//初始化敏感词库</span></span><br><span class="line"> initMap(list);</span><br><span class="line"> String content=<span class="string">"我是一个好人,并不会卖冰毒,也不操练法轮功,我真的不卖冰毒"</span>;</span><br><span class="line"> Map<String, Integer> map = matchWords(content);</span><br><span class="line"> System.out.println(map);</span><br><span class="line"> }</span><br><span class="line">}</span><br></pre></td></tr></table></figure>
|
||
|
||
<p><strong>结果:</strong></p>
|
||
<figure class="highlight plaintext"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><span class="line">{冰毒=2, 法轮功=1}</span><br></pre></td></tr></table></figure>
|
||
|
||
<p><strong>最佳实践:</strong> <a href="https://qingling.icu/posts/64695.html">项目实战-黑马头条 | The Blog (qingling.icu)</a></p>
|
||
</article><div class="tag_share"><div class="post-meta__tag-list"><a class="post-meta__tags" href="/tags/DFA/">DFA</a></div><div class="post_share"><div class="social-share" data-image="/img/9.png" data-sites="wechat,weibo,qq"></div><link rel="stylesheet" href="/cdn/css/share.min.css" media="print" onload="this.media='all'"><script src="/cdn/js/social-share.min.js" defer></script></div></div><div class="post-reward"><div class="reward-button"><i class="fas fa-qrcode"></i> 打赏</div><div class="reward-main"><ul class="reward-all"><li class="reward-item"><a href="/img/wechat.jpg" target="_blank"><img class="post-qr-code-img" src="/img/wechat.jpg" alt="微信"/></a><div class="post-qr-code-desc">微信</div></li><li class="reward-item"><a href="/img/alipay.jpg" target="_blank"><img class="post-qr-code-img" src="/img/alipay.jpg" alt="支付宝"/></a><div class="post-qr-code-desc">支付宝</div></li></ul></div></div><br/><div id="post-comment"><div class="comment-head"><div class="comment-headline"><i class="far fa-comment-alt fa-fw"></i><span> 评论</span></div></div><div class="comment-wrap"><div><div id="gitalk-container"></div></div></div></div></div><div class="aside-content" id="aside-content"><div class="card-widget card-info"><div class="is-center"><div class="avatar-img"><img src="/img/avatar.jpg" onerror="this.onerror=null;this.src='/img/loading.gif'" alt="avatar"/></div><div class="author-info__name">Jason</div><div class="author-info__description">Debug the World!</div></div><div class="card-info-data site-data is-center"><a href="/archives/"><div class="headline">文章</div><div class="length-num">60</div></a><a href="/tags/"><div class="headline">标签</div><div class="length-num">39</div></a><a href="/categories/"><div class="headline">分类</div><div class="length-num">10</div></a></div><a id="card-info-btn"><i class="fab fa-microsoft"></i><span>Ctrl + D 收藏</span></a><div class="card-info-social-icons is-center"><a class="social-icon" href="https://github.com/JasonsGong" target="_blank" title="Github"><i class="fab fa-github"></i></a><a class="social-icon" href="tencent://AddContact/?fromId=45&fromSubId=1&subcmd=all&uin=2602183349&website=www.oicqzone.com" target="_blank" title="QQ"><i class="fab fa-qq"></i></a><a class="social-icon" href="mailto:2602183349@qq.com" target="_blank" title="Email"><i class="fas fa-envelope-open-text"></i></a><a class="social-icon" href="https://github.com/JasonsGong?tab=repositories" target="_blank" title="代码仓库"><i class="fas fa-database"></i></a></div></div><div class="card-widget card-announcement"><div class="item-headline"><i class="fas fa-bullhorn fa-shake"></i><span>公告</span></div><div class="announcement_content">本网站是静态网站,更新页面资源请使用Ctrl+F5;若网站内文章对你有帮助,请使用Ctrl+D收藏该网站!</div></div><div class="sticky_layout"><div class="card-widget" id="card-toc"><div class="item-headline"><i class="fas fa-stream"></i><span>目录</span><span class="toc-percentage"></span></div><div class="toc-content is-expand"><ol class="toc"><li class="toc-item toc-level-2"><a class="toc-link" href="#%E4%B8%80-%E9%9C%80%E6%B1%82%E5%88%86%E6%9E%90"><span class="toc-text">一.需求分析</span></a></li><li class="toc-item toc-level-2"><a class="toc-link" href="#%E4%BA%8C-%E5%8F%AF%E9%80%89%E6%96%B9%E6%A1%88"><span class="toc-text">二.可选方案</span></a></li><li class="toc-item toc-level-2"><a class="toc-link" href="#%E4%B8%89-DFA%E7%AE%97%E6%B3%95"><span class="toc-text">三. DFA算法</span></a><ol class="toc-child"><li class="toc-item toc-level-3"><a class="toc-link" href="#1-%E7%AE%80%E4%BB%8B"><span class="toc-text">1.简介</span></a></li><li class="toc-item toc-level-3"><a class="toc-link" href="#2-%E6%A3%80%E7%B4%A2%E8%BF%87%E7%A8%8B"><span class="toc-text">2.检索过程</span></a></li></ol></li><li class="toc-item toc-level-2"><a class="toc-link" href="#%E5%9B%9B-%E5%B7%A5%E5%85%B7%E7%B1%BB"><span class="toc-text">四.工具类</span></a></li></ol></div></div><div class="card-widget card-recent-post"><div class="item-headline"><i class="fas fa-history"></i><span>最近更新</span></div><div class="aside-list"><div class="aside-list-item"><a class="thumbnail" href="/posts/8957.html" title="Linux从入门到进阶"><img src="/img/8.png" onerror="this.onerror=null;this.src='/img/404.jpg'" alt="Linux从入门到进阶"/></a><div class="content"><a class="title" href="/posts/8957.html" title="Linux从入门到进阶">Linux从入门到进阶</a><time datetime="2024-05-10T05:31:11.691Z" title="更新于 2024-05-10 13:31:11">2024-05-10</time></div></div><div class="aside-list-item"><a class="thumbnail" href="/posts/35630.html" title="接口测试工具"><img src="/img/2.png" onerror="this.onerror=null;this.src='/img/404.jpg'" alt="接口测试工具"/></a><div class="content"><a class="title" href="/posts/35630.html" title="接口测试工具">接口测试工具</a><time datetime="2024-05-10T05:31:11.686Z" title="更新于 2024-05-10 13:31:11">2024-05-10</time></div></div><div class="aside-list-item"><a class="thumbnail" href="/posts/32246.html" title="SpringBoot中整合Swagger2"><img src="/img/3.png" onerror="this.onerror=null;this.src='/img/404.jpg'" alt="SpringBoot中整合Swagger2"/></a><div class="content"><a class="title" href="/posts/32246.html" title="SpringBoot中整合Swagger2">SpringBoot中整合Swagger2</a><time datetime="2024-05-10T05:31:11.681Z" title="更新于 2024-05-10 13:31:11">2024-05-10</time></div></div><div class="aside-list-item"><a class="thumbnail" href="/posts/19306.html" title="Docker容器化技术"><img src="/img/1.png" onerror="this.onerror=null;this.src='/img/404.jpg'" alt="Docker容器化技术"/></a><div class="content"><a class="title" href="/posts/19306.html" title="Docker容器化技术">Docker容器化技术</a><time datetime="2024-05-10T05:31:11.675Z" title="更新于 2024-05-10 13:31:11">2024-05-10</time></div></div><div class="aside-list-item"><a class="thumbnail" href="/posts/20683.html" title="Linux中开发环境的搭建"><img src="/img/8.png" onerror="this.onerror=null;this.src='/img/404.jpg'" alt="Linux中开发环境的搭建"/></a><div class="content"><a class="title" href="/posts/20683.html" title="Linux中开发环境的搭建">Linux中开发环境的搭建</a><time datetime="2024-05-10T05:31:11.669Z" title="更新于 2024-05-10 13:31:11">2024-05-10</time></div></div></div></div></div></div></main><footer id="footer"><div id="footer-wrap"></div></footer></div><div id="rightside"><div id="rightside-config-hide"><button id="readmode" type="button" title="阅读模式"><i class="fas fa-book-open"></i></button><button id="darkmode" type="button" title="浅色和深色模式转换"><i class="fas fa-adjust"></i></button><button id="hide-aside-btn" type="button" title="单栏和双栏切换"><i class="fas fa-arrows-alt-h"></i></button></div><div id="rightside-config-show"><button id="rightside_config" type="button" title="设置"><i class="fas fa-cog fa-spin"></i></button><button class="close" id="mobile-toc-button" type="button" title="目录"><i class="fas fa-list-ul"></i></button><a id="to_comment" href="#post-comment" title="直达评论"><i class="fas fa-comment-alt"></i></a><button id="go-up" type="button" title="回到顶部"><span class="scroll-percent"></span><i class="fas fa-arrow-up"></i></button></div></div><div><script src="/js/utils.js"></script><script src="/js/main.js"></script><script src="/cdn/js/medium-zoom.min.js"></script><script src="/cdn/js/instantpage.min.js" type="module"></script><script src="/cdn/js/snackbar.min.js"></script><div class="js-pjax"><script>function loadGitalk () {
|
||
function initGitalk () {
|
||
var gitalk = new Gitalk(Object.assign({
|
||
clientID: '00fb27b1e484536359c2',
|
||
clientSecret: 'be41a12281c68b6e228d1a27e8d08aeb91541145',
|
||
repo: 'BlogComment',
|
||
owner: 'JasonsGong',
|
||
admin: ['JasonsGong'],
|
||
id: '1897c7098c83201b519df9b057bfe77a',
|
||
updateCountCallback: commentCount
|
||
},null))
|
||
|
||
gitalk.render('gitalk-container')
|
||
}
|
||
|
||
if (typeof Gitalk === 'function') initGitalk()
|
||
else {
|
||
getCSS('/cdn/css/gitalk.min.css')
|
||
getScript('/cdn/js/gitalk.min.js').then(initGitalk)
|
||
}
|
||
}
|
||
|
||
function commentCount(n){
|
||
let isCommentCount = document.querySelector('#post-meta .gitalk-comment-count')
|
||
if (isCommentCount) {
|
||
isCommentCount.textContent= n
|
||
}
|
||
}
|
||
|
||
if ('Gitalk' === 'Gitalk' || !true) {
|
||
if (true) btf.loadComment(document.getElementById('gitalk-container'), loadGitalk)
|
||
else loadGitalk()
|
||
} else {
|
||
function loadOtherComment () {
|
||
loadGitalk()
|
||
}
|
||
}</script></div><script async data-pjax src="//busuanzi.ibruce.info/busuanzi/2.3/busuanzi.pure.mini.js"></script><div id="local-search"><div class="search-dialog"><nav class="search-nav"><span class="search-dialog-title">搜索</span><span id="loading-status"></span><button class="search-close-button"><i class="fas fa-times"></i></button></nav><div class="is-center" id="loading-database"><i class="fas fa-spinner fa-pulse"></i><span> 数据库加载中</span></div><div class="search-wrap"><div id="local-search-input"><div class="local-search-box"><input class="local-search-box--input" placeholder="搜索文章" type="text"/></div></div><br/><div class="no-result" id="local-search-results"></div><div id="local-search-stats-wrap"></div></div></div><div id="search-mask"></div><script src="/js/search/local-search.js"></script></div></div><!-- hexo injector body_end start --><script data-pjax>
|
||
function butterfly_swiper_injector_config(){
|
||
var parent_div_git = document.getElementById('recent-posts');
|
||
var item_html = '<div class="recent-post-item" style="height: auto;width: 100%"><div class="blog-slider swiper-container-fade swiper-container-horizontal" id="swiper_container"><div class="blog-slider__wrp swiper-wrapper" style="transition-duration: 0ms;"><div class="blog-slider__item swiper-slide" style="width: 750px; opacity: 1; transform: translate3d(0px, 0px, 0px); transition-duration: 0ms;"><a class="blog-slider__img" href="posts/19306.html" alt=""><img width="48" height="48" src="/img/1.png" alt="" onerror="this.src=https://unpkg.zhimg.com/akilar-candyassets/image/loading.gif; this.onerror = null;"/></a><div class="blog-slider__content"><span class="blog-slider__code">2023-04-21</span><a class="blog-slider__title" href="posts/19306.html" alt="">Docker容器化技术</a><div class="blog-slider__text">Docker</div><a class="blog-slider__button" href="posts/19306.html" alt="">详情 </a></div></div><div class="blog-slider__item swiper-slide" style="width: 750px; opacity: 1; transform: translate3d(0px, 0px, 0px); transition-duration: 0ms;"><a class="blog-slider__img" href="posts/47003.html" alt=""><img width="48" height="48" src="/img/5.png" alt="" onerror="this.src=https://unpkg.zhimg.com/akilar-candyassets/image/loading.gif; this.onerror = null;"/></a><div class="blog-slider__content"><span class="blog-slider__code">2023-03-10</span><a class="blog-slider__title" href="posts/47003.html" alt="">常用正则表达式大全</a><div class="blog-slider__text">正则表达式</div><a class="blog-slider__button" href="posts/47003.html" alt="">详情 </a></div></div><div class="blog-slider__item swiper-slide" style="width: 750px; opacity: 1; transform: translate3d(0px, 0px, 0px); transition-duration: 0ms;"><a class="blog-slider__img" href="posts/20683.html" alt=""><img width="48" height="48" src="/img/8.png" alt="" onerror="this.src=https://unpkg.zhimg.com/akilar-candyassets/image/loading.gif; this.onerror = null;"/></a><div class="blog-slider__content"><span class="blog-slider__code">2023-06-05</span><a class="blog-slider__title" href="posts/20683.html" alt="">Linux中开发环境的搭建</a><div class="blog-slider__text">环境搭建</div><a class="blog-slider__button" href="posts/20683.html" alt="">详情 </a></div></div><div class="blog-slider__item swiper-slide" style="width: 750px; opacity: 1; transform: translate3d(0px, 0px, 0px); transition-duration: 0ms;"><a class="blog-slider__img" href="posts/63333.html" alt=""><img width="48" height="48" src="/img/10.png" alt="" onerror="this.src=https://unpkg.zhimg.com/akilar-candyassets/image/loading.gif; this.onerror = null;"/></a><div class="blog-slider__content"><span class="blog-slider__code">2023-06-03</span><a class="blog-slider__title" href="posts/63333.html" alt="">开发环境的搭建</a><div class="blog-slider__text">环境搭建</div><a class="blog-slider__button" href="posts/63333.html" alt="">详情 </a></div></div></div><div class="blog-slider__pagination swiper-pagination-clickable swiper-pagination-bullets"></div></div></div>';
|
||
if (parent_div_git !== null && typeof parent_div_git !== 'undefined') {
|
||
parent_div_git.insertAdjacentHTML("afterbegin",item_html)
|
||
}
|
||
}
|
||
var elist = 'undefined'.split(',');
|
||
var cpage = location.pathname;
|
||
var epage = 'all';
|
||
var flag = 0;
|
||
|
||
for (var i=0;i<elist.length;i++){
|
||
if (cpage.includes(elist[i])){
|
||
flag++;
|
||
}
|
||
}
|
||
|
||
if ((epage ==='all')&&(flag == 0)){
|
||
butterfly_swiper_injector_config();
|
||
}
|
||
else if (epage === cpage){
|
||
butterfly_swiper_injector_config();
|
||
}
|
||
</script><script defer src="https://npm.elemecdn.com/hexo-butterfly-swiper/lib/swiper.min.js"></script><script defer data-pjax src="https://npm.elemecdn.com/hexo-butterfly-swiper/lib/swiper_init.js"></script><!-- hexo injector body_end end --></body></html> |