JasonsGong.github.io/posts/28101.html
2024-06-14 22:00:25 +08:00

281 lines
45 KiB
HTML
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<!DOCTYPE html><html lang="zh-CN" data-theme="light"><head><meta charset="UTF-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0,viewport-fit=cover"><title>DFA算法实现敏感词自管理 | The Blog</title><meta name="author" content="Jason"><meta name="copyright" content="Jason"><meta name="format-detection" content="telephone=no"><meta name="theme-color" content="#ffffff"><meta name="description" content="一.需求分析​ 在审核文本内容的时候,我们可以调用第三方成熟的服务(例如阿里云的内容安全)来实现,但是基于不同的场景,第三方的服务不可能涉及到方方面面的敏感词,比如在游戏类的场景中,开挂一词算敏感词,但是在其它的场景中,开挂一词是一个正常的词汇。这时候需要我们根据不同场景自己维护一套敏感词,在文本审核的时候,需要验证文本中是否包含这些敏感词。 二.可选方案 方案 说明 数据库模糊查询">
<meta property="og:type" content="article">
<meta property="og:title" content="DFA算法实现敏感词自管理">
<meta property="og:url" content="https://qingling.icu/posts/28101.html">
<meta property="og:site_name" content="The Blog">
<meta property="og:description" content="一.需求分析​ 在审核文本内容的时候,我们可以调用第三方成熟的服务(例如阿里云的内容安全)来实现,但是基于不同的场景,第三方的服务不可能涉及到方方面面的敏感词,比如在游戏类的场景中,开挂一词算敏感词,但是在其它的场景中,开挂一词是一个正常的词汇。这时候需要我们根据不同场景自己维护一套敏感词,在文本审核的时候,需要验证文本中是否包含这些敏感词。 二.可选方案 方案 说明 数据库模糊查询">
<meta property="og:locale" content="zh_CN">
<meta property="og:image" content="https://qingling.icu/img/9.png">
<meta property="article:published_time" content="2023-12-03T05:28:05.000Z">
<meta property="article:modified_time" content="2024-05-10T05:31:11.595Z">
<meta property="article:author" content="Jason">
<meta property="article:tag" content="DFA">
<meta name="twitter:card" content="summary">
<meta name="twitter:image" content="https://qingling.icu/img/9.png"><link rel="shortcut icon" href="/img/%E5%9B%BE%E6%A0%87.png"><link rel="canonical" href="https://qingling.icu/posts/28101.html"><link rel="preconnect" href="//fastly.jsdelivr.net"/><link rel="preconnect" href="//busuanzi.ibruce.info"/><link rel="stylesheet" href="/css/index.css"><link rel="stylesheet" href="/cdn/icon/fontawesome-free/css/all.min.css" media="print" onload="this.media='all'"><link rel="stylesheet" href="/cdn/css/snackbar.min.css" media="print" onload="this.media='all'"><link rel="stylesheet" href="/cdn/css/fancybox.min.css" media="print" onload="this.media='all'"><script>const GLOBAL_CONFIG = {
root: '/',
algolia: undefined,
localSearch: {"path":"/search.xml","preload":true,"top_n_per_article":1,"unescape":false,"languages":{"hits_empty":"找不到您查询的内容:${query}","hits_stats":"共找到 ${hits} 篇文章"}},
translate: undefined,
noticeOutdate: undefined,
highlight: {"plugin":"highlighjs","highlightCopy":true,"highlightLang":true,"highlightHeightLimit":400},
copy: {
success: '复制成功',
error: '复制错误',
noSupport: '浏览器不支持'
},
relativeDate: {
homepage: true,
post: true
},
runtime: '天',
dateSuffix: {
just: '刚刚',
min: '分钟前',
hour: '小时前',
day: '天前',
month: '个月前'
},
copyright: undefined,
lightbox: 'mediumZoom',
Snackbar: {"chs_to_cht":"你已切换为繁体","cht_to_chs":"你已切换为简体","day_to_night":"你已切换为深色模式","night_to_day":"你已切换为浅色模式","bgLight":"#006650","bgDark":"#006650","position":"top-center"},
source: {
justifiedGallery: {
js: 'https://fastly.jsdelivr.net/npm/flickr-justified-gallery/dist/fjGallery.min.js',
css: 'https://fastly.jsdelivr.net/npm/flickr-justified-gallery/dist/fjGallery.min.css'
}
},
isPhotoFigcaption: false,
islazyload: false,
isAnchor: true,
percent: {
toc: true,
rightside: false,
},
autoDarkmode: true
}</script><script id="config-diff">var GLOBAL_CONFIG_SITE = {
title: 'DFA算法实现敏感词自管理',
isPost: true,
isHome: false,
isHighlightShrink: false,
isToc: true,
postUpdate: '2024-05-10 13:31:11'
}</script><noscript><style type="text/css">
#nav {
opacity: 1
}
.justified-gallery img {
opacity: 1
}
#recent-posts time,
#post-meta time {
display: inline !important
}
</style></noscript><script>(win=>{
win.saveToLocal = {
set: function setWithExpiry(key, value, ttl) {
if (ttl === 0) return
const now = new Date()
const expiryDay = ttl * 86400000
const item = {
value: value,
expiry: now.getTime() + expiryDay,
}
localStorage.setItem(key, JSON.stringify(item))
},
get: function getWithExpiry(key) {
const itemStr = localStorage.getItem(key)
if (!itemStr) {
return undefined
}
const item = JSON.parse(itemStr)
const now = new Date()
if (now.getTime() > item.expiry) {
localStorage.removeItem(key)
return undefined
}
return item.value
}
}
win.getScript = url => new Promise((resolve, reject) => {
const script = document.createElement('script')
script.src = url
script.async = true
script.onerror = reject
script.onload = script.onreadystatechange = function() {
const loadState = this.readyState
if (loadState && loadState !== 'loaded' && loadState !== 'complete') return
script.onload = script.onreadystatechange = null
resolve()
}
document.head.appendChild(script)
})
win.getCSS = (url,id = false) => new Promise((resolve, reject) => {
const link = document.createElement('link')
link.rel = 'stylesheet'
link.href = url
if (id) link.id = id
link.onerror = reject
link.onload = link.onreadystatechange = function() {
const loadState = this.readyState
if (loadState && loadState !== 'loaded' && loadState !== 'complete') return
link.onload = link.onreadystatechange = null
resolve()
}
document.head.appendChild(link)
})
win.activateDarkMode = function () {
document.documentElement.setAttribute('data-theme', 'dark')
if (document.querySelector('meta[name="theme-color"]') !== null) {
document.querySelector('meta[name="theme-color"]').setAttribute('content', '#0d0d0d')
}
}
win.activateLightMode = function () {
document.documentElement.setAttribute('data-theme', 'light')
if (document.querySelector('meta[name="theme-color"]') !== null) {
document.querySelector('meta[name="theme-color"]').setAttribute('content', '#ffffff')
}
}
const t = saveToLocal.get('theme')
const isDarkMode = window.matchMedia('(prefers-color-scheme: dark)').matches
const isLightMode = window.matchMedia('(prefers-color-scheme: light)').matches
const isNotSpecified = window.matchMedia('(prefers-color-scheme: no-preference)').matches
const hasNoSupport = !isDarkMode && !isLightMode && !isNotSpecified
if (t === undefined) {
if (isLightMode) activateLightMode()
else if (isDarkMode) activateDarkMode()
else if (isNotSpecified || hasNoSupport) {
const now = new Date()
const hour = now.getHours()
const isNight = hour <= 8 || hour >= 22
isNight ? activateDarkMode() : activateLightMode()
}
window.matchMedia('(prefers-color-scheme: dark)').addListener(function (e) {
if (saveToLocal.get('theme') === undefined) {
e.matches ? activateDarkMode() : activateLightMode()
}
})
} else if (t === 'light') activateLightMode()
else activateDarkMode()
const asideStatus = saveToLocal.get('aside-status')
if (asideStatus !== undefined) {
if (asideStatus === 'hide') {
document.documentElement.classList.add('hide-aside')
} else {
document.documentElement.classList.remove('hide-aside')
}
}
const detectApple = () => {
if(/iPad|iPhone|iPod|Macintosh/.test(navigator.userAgent)){
document.documentElement.classList.add('apple')
}
}
detectApple()
})(window)</script><script src="https://apps.bdimg.com/libs/jquery/2.1.4/jquery.min.js"></script><script type="text/javascript" src ="/js/welcome.js" ></script><script src="/js/sweetalert.js"></script><link rel="stylesheet" href="/css/sweetalert.css"><!-- hexo injector head_end start --><link rel="stylesheet" href="https://npm.elemecdn.com/hexo-butterfly-swiper/lib/swiper.min.css" media="print" onload="this.media='all'"><link rel="stylesheet" href="https://npm.elemecdn.com/hexo-butterfly-swiper/lib/swiperstyle.css" media="print" onload="this.media='all'"><!-- hexo injector head_end end --><meta name="generator" content="Hexo 6.3.0"></head><body><div id="sidebar"><div id="menu-mask"></div><div id="sidebar-menus"><div class="avatar-img is-center"><img src="/img/avatar.jpg" onerror="onerror=null;src='/img/loading.gif'" alt="avatar"/></div><div class="sidebar-site-data site-data is-center"><a href="/archives/"><div class="headline">文章</div><div class="length-num">60</div></a><a href="/tags/"><div class="headline">标签</div><div class="length-num">39</div></a><a href="/categories/"><div class="headline">分类</div><div class="length-num">10</div></a></div><br/><div class="menus_items"><div class="menus_item"><a class="site-page" target="_blank" rel="noopener" href="https://www.tutorialspoint.com/compile_java8_online.php"><i class="fa-fw fas fa-code"></i><span> 代码</span></a></div><div class="menus_item"><a class="site-page" href="/notice/"><i class="fa-fw fas fa-stream"></i><span> 公告</span></a></div><div class="menus_item"><a class="site-page" href="/website/"><i class="fa-fw fas fa-list"></i><span> 网址</span></a></div><div class="menus_item"><a class="site-page" href="/"><i class="fa-fw fas fa-home"></i><span> 主页</span></a></div></div></div></div><div class="post" id="body-wrap"><header class="not-top-img" id="page-header"><nav id="nav"><span id="blog-info"><a href="/" title="The Blog"><img class="site-icon" src="/img/logo.png"/><span class="site-name">The Blog</span></a></span><div id="menus"><div id="search-button"><a class="site-page social-icon search" href="javascript:void(0);"><i class="fas fa-search fa-fw"></i><span> 搜索</span></a></div><div class="menus_items"><div class="menus_item"><a class="site-page" target="_blank" rel="noopener" href="https://www.tutorialspoint.com/compile_java8_online.php"><i class="fa-fw fas fa-code"></i><span> 代码</span></a></div><div class="menus_item"><a class="site-page" href="/notice/"><i class="fa-fw fas fa-stream"></i><span> 公告</span></a></div><div class="menus_item"><a class="site-page" href="/website/"><i class="fa-fw fas fa-list"></i><span> 网址</span></a></div><div class="menus_item"><a class="site-page" href="/"><i class="fa-fw fas fa-home"></i><span> 主页</span></a></div></div><div id="toggle-menu"><a class="site-page" href="javascript:void(0);"><i class="fas fa-bars fa-fw"></i></a></div></div></nav></header><main class="layout" id="content-inner"><div id="post"><div id="post-info"><h1 class="post-title">DFA算法实现敏感词自管理</h1><div id="post-meta"><div class="meta-firstline"><span class="post-meta-date"><i class="far fa-calendar-alt fa-fw post-meta-icon"></i><span class="post-meta-label">发表于</span><time class="post-meta-date-created" datetime="2023-12-03T05:28:05.000Z" title="发表于 2023-12-03 13:28:05">2023-12-03</time><span class="post-meta-separator">|</span><i class="fas fa-history fa-fw post-meta-icon"></i><span class="post-meta-label">更新于</span><time class="post-meta-date-updated" datetime="2024-05-10T05:31:11.595Z" title="更新于 2024-05-10 13:31:11">2024-05-10</time></span><span class="post-meta-categories"><span class="post-meta-separator">|</span><i class="fas fa-inbox fa-fw post-meta-icon"></i><a class="post-meta-categories" href="/categories/%E5%90%8E%E7%AB%AF/">后端</a></span></div><div class="meta-secondline"><span class="post-meta-separator">|</span><span class="post-meta-wordcount"><i class="far fa-file-word fa-fw post-meta-icon"></i><span class="post-meta-label">字数总计:</span><span class="word-count">965</span><span class="post-meta-separator">|</span><i class="far fa-clock fa-fw post-meta-icon"></i><span class="post-meta-label">阅读时长:</span><span>4分钟</span></span><span class="post-meta-separator">|</span><span class="post-meta-pv-cv" id="" data-flag-title="DFA算法实现敏感词自管理"><i class="far fa-eye fa-fw post-meta-icon"></i><span class="post-meta-label">阅读量:</span><span id="busuanzi_value_page_pv"><i class="fa-solid fa-spinner fa-spin"></i></span></span></div></div></div><article class="post-content" id="article-container"><h2 id="一-需求分析"><a href="#一-需求分析" class="headerlink" title="一.需求分析"></a>一.需求分析</h2><p> 在审核文本内容的时候,我们可以调用第三方成熟的服务(例如阿里云的内容安全)来实现,但是基于不同的场景,第三方的服务不可能涉及到方方面面的敏感词,比如在游戏类的场景中,开挂一词算敏感词,但是在其它的场景中,开挂一词是一个正常的词汇。这时候需要我们根据不同场景自己维护一套敏感词,在文本审核的时候,需要验证文本中是否包含这些敏感词。</p>
<h2 id="二-可选方案"><a href="#二-可选方案" class="headerlink" title="二.可选方案"></a>二.可选方案</h2><table>
<thead>
<tr>
<th><strong>方案</strong></th>
<th><strong>说明</strong></th>
</tr>
</thead>
<tbody><tr>
<td>数据库模糊查询</td>
<td>效率太低</td>
</tr>
<tr>
<td>String.indexOf(“”)查找</td>
<td>数据库量大的话也是比较慢</td>
</tr>
<tr>
<td>全文检索</td>
<td>分词再匹配</td>
</tr>
<tr>
<td>DFA算法</td>
<td>确定有穷自动机(一种数据结构)</td>
</tr>
</tbody></table>
<h2 id="三-DFA算法"><a href="#三-DFA算法" class="headerlink" title="三. DFA算法"></a>三. DFA算法</h2><h3 id="1-简介"><a href="#1-简介" class="headerlink" title="1.简介"></a>1.简介</h3><p>DFA全称为Deterministic Finite Automaton,即确定有穷自动机。</p>
<p>存储一次性的把所有的敏感词存储到了多个map中就是下图表示这种结构</p>
<p>敏感词:冰毒、大麻、大坏蛋</p>
<p><img src="/pictures/image-20231203134106231.png" alt="image-20231203134106231"></p>
<h3 id="2-检索过程"><a href="#2-检索过程" class="headerlink" title="2.检索过程"></a>2.检索过程</h3><p><img src="/pictures/image-20231203134335076.png" alt="image-20231203134335076"></p>
<h2 id="四-工具类"><a href="#四-工具类" class="headerlink" title="四.工具类"></a>四.工具类</h2><figure class="highlight java"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br><span class="line">39</span><br><span class="line">40</span><br><span class="line">41</span><br><span class="line">42</span><br><span class="line">43</span><br><span class="line">44</span><br><span class="line">45</span><br><span class="line">46</span><br><span class="line">47</span><br><span class="line">48</span><br><span class="line">49</span><br><span class="line">50</span><br><span class="line">51</span><br><span class="line">52</span><br><span class="line">53</span><br><span class="line">54</span><br><span class="line">55</span><br><span class="line">56</span><br><span class="line">57</span><br><span class="line">58</span><br><span class="line">59</span><br><span class="line">60</span><br><span class="line">61</span><br><span class="line">62</span><br><span class="line">63</span><br><span class="line">64</span><br><span class="line">65</span><br><span class="line">66</span><br><span class="line">67</span><br><span class="line">68</span><br><span class="line">69</span><br><span class="line">70</span><br><span class="line">71</span><br><span class="line">72</span><br><span class="line">73</span><br><span class="line">74</span><br><span class="line">75</span><br><span class="line">76</span><br><span class="line">77</span><br><span class="line">78</span><br><span class="line">79</span><br><span class="line">80</span><br><span class="line">81</span><br><span class="line">82</span><br><span class="line">83</span><br><span class="line">84</span><br><span class="line">85</span><br><span class="line">86</span><br><span class="line">87</span><br><span class="line">88</span><br><span class="line">89</span><br><span class="line">90</span><br><span class="line">91</span><br><span class="line">92</span><br><span class="line">93</span><br><span class="line">94</span><br><span class="line">95</span><br><span class="line">96</span><br><span class="line">97</span><br><span class="line">98</span><br><span class="line">99</span><br><span class="line">100</span><br><span class="line">101</span><br><span class="line">102</span><br><span class="line">103</span><br><span class="line">104</span><br><span class="line">105</span><br><span class="line">106</span><br><span class="line">107</span><br><span class="line">108</span><br><span class="line">109</span><br><span class="line">110</span><br><span class="line">111</span><br><span class="line">112</span><br><span class="line">113</span><br><span class="line">114</span><br><span class="line">115</span><br><span class="line">116</span><br><span class="line">117</span><br><span class="line">118</span><br><span class="line">119</span><br><span class="line">120</span><br><span class="line">121</span><br><span class="line">122</span><br><span class="line">123</span><br><span class="line">124</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">package</span> com.heima.utils.common;</span><br><span class="line"></span><br><span class="line"></span><br><span class="line"><span class="keyword">import</span> java.util.*;</span><br><span class="line"></span><br><span class="line"><span class="keyword">public</span> <span class="keyword">class</span> <span class="title class_">SensitiveWordUtil</span> &#123;</span><br><span class="line"></span><br><span class="line"> <span class="keyword">public</span> <span class="keyword">static</span> Map&lt;String, Object&gt; dictionaryMap = <span class="keyword">new</span> <span class="title class_">HashMap</span>&lt;&gt;();</span><br><span class="line"></span><br><span class="line"></span><br><span class="line"> <span class="comment">/**</span></span><br><span class="line"><span class="comment"> * 生成关键词字典库</span></span><br><span class="line"><span class="comment"> * <span class="doctag">@param</span> words</span></span><br><span class="line"><span class="comment"> * <span class="doctag">@return</span></span></span><br><span class="line"><span class="comment"> */</span></span><br><span class="line"> <span class="keyword">public</span> <span class="keyword">static</span> <span class="keyword">void</span> <span class="title function_">initMap</span><span class="params">(Collection&lt;String&gt; words)</span> &#123;</span><br><span class="line"> <span class="keyword">if</span> (words == <span class="literal">null</span>) &#123;</span><br><span class="line"> System.out.println(<span class="string">&quot;敏感词列表不能为空&quot;</span>);</span><br><span class="line"> <span class="keyword">return</span> ;</span><br><span class="line"> &#125;</span><br><span class="line"></span><br><span class="line"> <span class="comment">// map初始长度words.size(),整个字典库的入口字数(小于words.size(),因为不同的词可能会有相同的首字)</span></span><br><span class="line"> Map&lt;String, Object&gt; map = <span class="keyword">new</span> <span class="title class_">HashMap</span>&lt;&gt;(words.size());</span><br><span class="line"> <span class="comment">// 遍历过程中当前层次的数据</span></span><br><span class="line"> Map&lt;String, Object&gt; curMap = <span class="literal">null</span>;</span><br><span class="line"> Iterator&lt;String&gt; iterator = words.iterator();</span><br><span class="line"></span><br><span class="line"> <span class="keyword">while</span> (iterator.hasNext()) &#123;</span><br><span class="line"> <span class="type">String</span> <span class="variable">word</span> <span class="operator">=</span> iterator.next();</span><br><span class="line"> curMap = map;</span><br><span class="line"> <span class="type">int</span> <span class="variable">len</span> <span class="operator">=</span> word.length();</span><br><span class="line"> <span class="keyword">for</span> (<span class="type">int</span> <span class="variable">i</span> <span class="operator">=</span><span class="number">0</span>; i &lt; len; i++) &#123;</span><br><span class="line"> <span class="comment">// 遍历每个词的字</span></span><br><span class="line"> <span class="type">String</span> <span class="variable">key</span> <span class="operator">=</span> String.valueOf(word.charAt(i));</span><br><span class="line"> <span class="comment">// 当前字在当前层是否存在, 不存在则新建, 当前层数据指向下一个节点, 继续判断是否存在数据</span></span><br><span class="line"> Map&lt;String, Object&gt; wordMap = (Map&lt;String, Object&gt;) curMap.get(key);</span><br><span class="line"> <span class="keyword">if</span> (wordMap == <span class="literal">null</span>) &#123;</span><br><span class="line"> <span class="comment">// 每个节点存在两个数据: 下一个节点和isEnd(是否结束标志)</span></span><br><span class="line"> wordMap = <span class="keyword">new</span> <span class="title class_">HashMap</span>&lt;&gt;(<span class="number">2</span>);</span><br><span class="line"> wordMap.put(<span class="string">&quot;isEnd&quot;</span>, <span class="string">&quot;0&quot;</span>);</span><br><span class="line"> curMap.put(key, wordMap);</span><br><span class="line"> &#125;</span><br><span class="line"> curMap = wordMap;</span><br><span class="line"> <span class="comment">// 如果当前字是词的最后一个字则将isEnd标志置1</span></span><br><span class="line"> <span class="keyword">if</span> (i == len -<span class="number">1</span>) &#123;</span><br><span class="line"> curMap.put(<span class="string">&quot;isEnd&quot;</span>, <span class="string">&quot;1&quot;</span>);</span><br><span class="line"> &#125;</span><br><span class="line"> &#125;</span><br><span class="line"> &#125;</span><br><span class="line"></span><br><span class="line"> dictionaryMap = map;</span><br><span class="line"> &#125;</span><br><span class="line"></span><br><span class="line"> <span class="comment">/**</span></span><br><span class="line"><span class="comment"> * 搜索文本中某个文字是否匹配关键词</span></span><br><span class="line"><span class="comment"> * <span class="doctag">@param</span> text</span></span><br><span class="line"><span class="comment"> * <span class="doctag">@param</span> beginIndex</span></span><br><span class="line"><span class="comment"> * <span class="doctag">@return</span></span></span><br><span class="line"><span class="comment"> */</span></span><br><span class="line"> <span class="keyword">private</span> <span class="keyword">static</span> <span class="type">int</span> <span class="title function_">checkWord</span><span class="params">(String text, <span class="type">int</span> beginIndex)</span> &#123;</span><br><span class="line"> <span class="keyword">if</span> (dictionaryMap == <span class="literal">null</span>) &#123;</span><br><span class="line"> <span class="keyword">throw</span> <span class="keyword">new</span> <span class="title class_">RuntimeException</span>(<span class="string">&quot;字典不能为空&quot;</span>);</span><br><span class="line"> &#125;</span><br><span class="line"> <span class="type">boolean</span> <span class="variable">isEnd</span> <span class="operator">=</span> <span class="literal">false</span>;</span><br><span class="line"> <span class="type">int</span> <span class="variable">wordLength</span> <span class="operator">=</span> <span class="number">0</span>;</span><br><span class="line"> Map&lt;String, Object&gt; curMap = dictionaryMap;</span><br><span class="line"> <span class="type">int</span> <span class="variable">len</span> <span class="operator">=</span> text.length();</span><br><span class="line"> <span class="comment">// 从文本的第beginIndex开始匹配</span></span><br><span class="line"> <span class="keyword">for</span> (<span class="type">int</span> <span class="variable">i</span> <span class="operator">=</span> beginIndex; i &lt; len; i++) &#123;</span><br><span class="line"> <span class="type">String</span> <span class="variable">key</span> <span class="operator">=</span> String.valueOf(text.charAt(i));</span><br><span class="line"> <span class="comment">// 获取当前key的下一个节点</span></span><br><span class="line"> curMap = (Map&lt;String, Object&gt;) curMap.get(key);</span><br><span class="line"> <span class="keyword">if</span> (curMap == <span class="literal">null</span>) &#123;</span><br><span class="line"> <span class="keyword">break</span>;</span><br><span class="line"> &#125; <span class="keyword">else</span> &#123;</span><br><span class="line"> wordLength ++;</span><br><span class="line"> <span class="keyword">if</span> (<span class="string">&quot;1&quot;</span>.equals(curMap.get(<span class="string">&quot;isEnd&quot;</span>))) &#123;</span><br><span class="line"> isEnd = <span class="literal">true</span>;</span><br><span class="line"> &#125;</span><br><span class="line"> &#125;</span><br><span class="line"> &#125;</span><br><span class="line"> <span class="keyword">if</span> (!isEnd) &#123;</span><br><span class="line"> wordLength = <span class="number">0</span>;</span><br><span class="line"> &#125;</span><br><span class="line"> <span class="keyword">return</span> wordLength;</span><br><span class="line"> &#125;</span><br><span class="line"></span><br><span class="line"> <span class="comment">/**</span></span><br><span class="line"><span class="comment"> * 获取匹配的关键词和命中次数</span></span><br><span class="line"><span class="comment"> * <span class="doctag">@param</span> text</span></span><br><span class="line"><span class="comment"> * <span class="doctag">@return</span></span></span><br><span class="line"><span class="comment"> */</span></span><br><span class="line"> <span class="keyword">public</span> <span class="keyword">static</span> Map&lt;String, Integer&gt; <span class="title function_">matchWords</span><span class="params">(String text)</span> &#123;</span><br><span class="line"> Map&lt;String, Integer&gt; wordMap = <span class="keyword">new</span> <span class="title class_">HashMap</span>&lt;&gt;();</span><br><span class="line"> <span class="type">int</span> <span class="variable">len</span> <span class="operator">=</span> text.length();</span><br><span class="line"> <span class="keyword">for</span> (<span class="type">int</span> <span class="variable">i</span> <span class="operator">=</span> <span class="number">0</span>; i &lt; len; i++) &#123;</span><br><span class="line"> <span class="type">int</span> <span class="variable">wordLength</span> <span class="operator">=</span> checkWord(text, i);</span><br><span class="line"> <span class="keyword">if</span> (wordLength &gt; <span class="number">0</span>) &#123;</span><br><span class="line"> <span class="type">String</span> <span class="variable">word</span> <span class="operator">=</span> text.substring(i, i + wordLength);</span><br><span class="line"> <span class="comment">// 添加关键词匹配次数</span></span><br><span class="line"> <span class="keyword">if</span> (wordMap.containsKey(word)) &#123;</span><br><span class="line"> wordMap.put(word, wordMap.get(word) + <span class="number">1</span>);</span><br><span class="line"> &#125; <span class="keyword">else</span> &#123;</span><br><span class="line"> wordMap.put(word, <span class="number">1</span>);</span><br><span class="line"> &#125;</span><br><span class="line"></span><br><span class="line"> i += wordLength - <span class="number">1</span>;</span><br><span class="line"> &#125;</span><br><span class="line"> &#125;</span><br><span class="line"> <span class="keyword">return</span> wordMap;</span><br><span class="line"> &#125;</span><br><span class="line"></span><br><span class="line"> <span class="keyword">public</span> <span class="keyword">static</span> <span class="keyword">void</span> <span class="title function_">main</span><span class="params">(String[] args)</span> &#123;</span><br><span class="line"> List&lt;String&gt; list = <span class="keyword">new</span> <span class="title class_">ArrayList</span>&lt;&gt;();</span><br><span class="line"> list.add(<span class="string">&quot;法轮&quot;</span>);</span><br><span class="line"> list.add(<span class="string">&quot;法轮功&quot;</span>);</span><br><span class="line"> list.add(<span class="string">&quot;冰毒&quot;</span>);</span><br><span class="line"> <span class="comment">//初始化敏感词库</span></span><br><span class="line"> initMap(list);</span><br><span class="line"> String content=<span class="string">&quot;我是一个好人,并不会卖冰毒,也不操练法轮功,我真的不卖冰毒&quot;</span>;</span><br><span class="line"> Map&lt;String, Integer&gt; map = matchWords(content);</span><br><span class="line"> System.out.println(map);</span><br><span class="line"> &#125;</span><br><span class="line">&#125;</span><br></pre></td></tr></table></figure>
<p><strong>结果:</strong></p>
<figure class="highlight plaintext"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><span class="line">&#123;冰毒=2, 法轮功=1&#125;</span><br></pre></td></tr></table></figure>
<p><strong>最佳实践:</strong> <a href="https://qingling.icu/posts/64695.html">项目实战-黑马头条 | The Blog (qingling.icu)</a></p>
</article><div class="tag_share"><div class="post-meta__tag-list"><a class="post-meta__tags" href="/tags/DFA/">DFA</a></div><div class="post_share"><div class="social-share" data-image="/img/9.png" data-sites="wechat,weibo,qq"></div><link rel="stylesheet" href="/cdn/css/share.min.css" media="print" onload="this.media='all'"><script src="/cdn/js/social-share.min.js" defer></script></div></div><div class="post-reward"><div class="reward-button"><i class="fas fa-qrcode"></i> 打赏</div><div class="reward-main"><ul class="reward-all"><li class="reward-item"><a href="/img/wechat.jpg" target="_blank"><img class="post-qr-code-img" src="/img/wechat.jpg" alt="微信"/></a><div class="post-qr-code-desc">微信</div></li><li class="reward-item"><a href="/img/alipay.jpg" target="_blank"><img class="post-qr-code-img" src="/img/alipay.jpg" alt="支付宝"/></a><div class="post-qr-code-desc">支付宝</div></li></ul></div></div><br/><div id="post-comment"><div class="comment-head"><div class="comment-headline"><i class="far fa-comment-alt fa-fw"></i><span> 评论</span></div></div><div class="comment-wrap"><div><div id="gitalk-container"></div></div></div></div></div><div class="aside-content" id="aside-content"><div class="card-widget card-info"><div class="is-center"><div class="avatar-img"><img src="/img/avatar.jpg" onerror="this.onerror=null;this.src='/img/loading.gif'" alt="avatar"/></div><div class="author-info__name">Jason</div><div class="author-info__description">Debug the World</div></div><div class="card-info-data site-data is-center"><a href="/archives/"><div class="headline">文章</div><div class="length-num">60</div></a><a href="/tags/"><div class="headline">标签</div><div class="length-num">39</div></a><a href="/categories/"><div class="headline">分类</div><div class="length-num">10</div></a></div><a id="card-info-btn"><i class="fab fa-microsoft"></i><span>Ctrl + D 收藏</span></a><div class="card-info-social-icons is-center"><a class="social-icon" href="https://github.com/JasonsGong" target="_blank" title="Github"><i class="fab fa-github"></i></a><a class="social-icon" href="tencent://AddContact/?fromId=45&amp;fromSubId=1&amp;subcmd=all&amp;uin=2602183349&amp;website=www.oicqzone.com" target="_blank" title="QQ"><i class="fab fa-qq"></i></a><a class="social-icon" href="mailto:2602183349@qq.com" target="_blank" title="Email"><i class="fas fa-envelope-open-text"></i></a><a class="social-icon" href="https://github.com/JasonsGong?tab=repositories" target="_blank" title="代码仓库"><i class="fas fa-database"></i></a></div></div><div class="card-widget card-announcement"><div class="item-headline"><i class="fas fa-bullhorn fa-shake"></i><span>公告</span></div><div class="announcement_content">本网站是静态网站,更新页面资源请使用Ctrl+F5;若网站内文章对你有帮助,请使用Ctrl+D收藏该网站</div></div><div class="sticky_layout"><div class="card-widget" id="card-toc"><div class="item-headline"><i class="fas fa-stream"></i><span>目录</span><span class="toc-percentage"></span></div><div class="toc-content is-expand"><ol class="toc"><li class="toc-item toc-level-2"><a class="toc-link" href="#%E4%B8%80-%E9%9C%80%E6%B1%82%E5%88%86%E6%9E%90"><span class="toc-text">一.需求分析</span></a></li><li class="toc-item toc-level-2"><a class="toc-link" href="#%E4%BA%8C-%E5%8F%AF%E9%80%89%E6%96%B9%E6%A1%88"><span class="toc-text">二.可选方案</span></a></li><li class="toc-item toc-level-2"><a class="toc-link" href="#%E4%B8%89-DFA%E7%AE%97%E6%B3%95"><span class="toc-text">三. DFA算法</span></a><ol class="toc-child"><li class="toc-item toc-level-3"><a class="toc-link" href="#1-%E7%AE%80%E4%BB%8B"><span class="toc-text">1.简介</span></a></li><li class="toc-item toc-level-3"><a class="toc-link" href="#2-%E6%A3%80%E7%B4%A2%E8%BF%87%E7%A8%8B"><span class="toc-text">2.检索过程</span></a></li></ol></li><li class="toc-item toc-level-2"><a class="toc-link" href="#%E5%9B%9B-%E5%B7%A5%E5%85%B7%E7%B1%BB"><span class="toc-text">四.工具类</span></a></li></ol></div></div><div class="card-widget card-recent-post"><div class="item-headline"><i class="fas fa-history"></i><span>最近更新</span></div><div class="aside-list"><div class="aside-list-item"><a class="thumbnail" href="/posts/8957.html" title="Linux从入门到进阶"><img src="/img/8.png" onerror="this.onerror=null;this.src='/img/404.jpg'" alt="Linux从入门到进阶"/></a><div class="content"><a class="title" href="/posts/8957.html" title="Linux从入门到进阶">Linux从入门到进阶</a><time datetime="2024-05-10T05:31:11.691Z" title="更新于 2024-05-10 13:31:11">2024-05-10</time></div></div><div class="aside-list-item"><a class="thumbnail" href="/posts/35630.html" title="接口测试工具"><img src="/img/2.png" onerror="this.onerror=null;this.src='/img/404.jpg'" alt="接口测试工具"/></a><div class="content"><a class="title" href="/posts/35630.html" title="接口测试工具">接口测试工具</a><time datetime="2024-05-10T05:31:11.686Z" title="更新于 2024-05-10 13:31:11">2024-05-10</time></div></div><div class="aside-list-item"><a class="thumbnail" href="/posts/32246.html" title="SpringBoot中整合Swagger2"><img src="/img/3.png" onerror="this.onerror=null;this.src='/img/404.jpg'" alt="SpringBoot中整合Swagger2"/></a><div class="content"><a class="title" href="/posts/32246.html" title="SpringBoot中整合Swagger2">SpringBoot中整合Swagger2</a><time datetime="2024-05-10T05:31:11.681Z" title="更新于 2024-05-10 13:31:11">2024-05-10</time></div></div><div class="aside-list-item"><a class="thumbnail" href="/posts/19306.html" title="Docker容器化技术"><img src="/img/1.png" onerror="this.onerror=null;this.src='/img/404.jpg'" alt="Docker容器化技术"/></a><div class="content"><a class="title" href="/posts/19306.html" title="Docker容器化技术">Docker容器化技术</a><time datetime="2024-05-10T05:31:11.675Z" title="更新于 2024-05-10 13:31:11">2024-05-10</time></div></div><div class="aside-list-item"><a class="thumbnail" href="/posts/20683.html" title="Linux中开发环境的搭建"><img src="/img/8.png" onerror="this.onerror=null;this.src='/img/404.jpg'" alt="Linux中开发环境的搭建"/></a><div class="content"><a class="title" href="/posts/20683.html" title="Linux中开发环境的搭建">Linux中开发环境的搭建</a><time datetime="2024-05-10T05:31:11.669Z" title="更新于 2024-05-10 13:31:11">2024-05-10</time></div></div></div></div></div></div></main><footer id="footer"><div id="footer-wrap"></div></footer></div><div id="rightside"><div id="rightside-config-hide"><button id="readmode" type="button" title="阅读模式"><i class="fas fa-book-open"></i></button><button id="darkmode" type="button" title="浅色和深色模式转换"><i class="fas fa-adjust"></i></button><button id="hide-aside-btn" type="button" title="单栏和双栏切换"><i class="fas fa-arrows-alt-h"></i></button></div><div id="rightside-config-show"><button id="rightside_config" type="button" title="设置"><i class="fas fa-cog fa-spin"></i></button><button class="close" id="mobile-toc-button" type="button" title="目录"><i class="fas fa-list-ul"></i></button><a id="to_comment" href="#post-comment" title="直达评论"><i class="fas fa-comment-alt"></i></a><button id="go-up" type="button" title="回到顶部"><span class="scroll-percent"></span><i class="fas fa-arrow-up"></i></button></div></div><div><script src="/js/utils.js"></script><script src="/js/main.js"></script><script src="/cdn/js/medium-zoom.min.js"></script><script src="/cdn/js/instantpage.min.js" type="module"></script><script src="/cdn/js/snackbar.min.js"></script><div class="js-pjax"><script>function loadGitalk () {
function initGitalk () {
var gitalk = new Gitalk(Object.assign({
clientID: '00fb27b1e484536359c2',
clientSecret: 'be41a12281c68b6e228d1a27e8d08aeb91541145',
repo: 'BlogComment',
owner: 'JasonsGong',
admin: ['JasonsGong'],
id: '1897c7098c83201b519df9b057bfe77a',
updateCountCallback: commentCount
},null))
gitalk.render('gitalk-container')
}
if (typeof Gitalk === 'function') initGitalk()
else {
getCSS('/cdn/css/gitalk.min.css')
getScript('/cdn/js/gitalk.min.js').then(initGitalk)
}
}
function commentCount(n){
let isCommentCount = document.querySelector('#post-meta .gitalk-comment-count')
if (isCommentCount) {
isCommentCount.textContent= n
}
}
if ('Gitalk' === 'Gitalk' || !true) {
if (true) btf.loadComment(document.getElementById('gitalk-container'), loadGitalk)
else loadGitalk()
} else {
function loadOtherComment () {
loadGitalk()
}
}</script></div><script async data-pjax src="//busuanzi.ibruce.info/busuanzi/2.3/busuanzi.pure.mini.js"></script><div id="local-search"><div class="search-dialog"><nav class="search-nav"><span class="search-dialog-title">搜索</span><span id="loading-status"></span><button class="search-close-button"><i class="fas fa-times"></i></button></nav><div class="is-center" id="loading-database"><i class="fas fa-spinner fa-pulse"></i><span> 数据库加载中</span></div><div class="search-wrap"><div id="local-search-input"><div class="local-search-box"><input class="local-search-box--input" placeholder="搜索文章" type="text"/></div></div><br/><div class="no-result" id="local-search-results"></div><div id="local-search-stats-wrap"></div></div></div><div id="search-mask"></div><script src="/js/search/local-search.js"></script></div></div><!-- hexo injector body_end start --><script data-pjax>
function butterfly_swiper_injector_config(){
var parent_div_git = document.getElementById('recent-posts');
var item_html = '<div class="recent-post-item" style="height: auto;width: 100%"><div class="blog-slider swiper-container-fade swiper-container-horizontal" id="swiper_container"><div class="blog-slider__wrp swiper-wrapper" style="transition-duration: 0ms;"><div class="blog-slider__item swiper-slide" style="width: 750px; opacity: 1; transform: translate3d(0px, 0px, 0px); transition-duration: 0ms;"><a class="blog-slider__img" href="posts/19306.html" alt=""><img width="48" height="48" src="/img/1.png" alt="" onerror="this.src=https://unpkg.zhimg.com/akilar-candyassets/image/loading.gif; this.onerror = null;"/></a><div class="blog-slider__content"><span class="blog-slider__code">2023-04-21</span><a class="blog-slider__title" href="posts/19306.html" alt="">Docker容器化技术</a><div class="blog-slider__text">Docker</div><a class="blog-slider__button" href="posts/19306.html" alt="">详情 </a></div></div><div class="blog-slider__item swiper-slide" style="width: 750px; opacity: 1; transform: translate3d(0px, 0px, 0px); transition-duration: 0ms;"><a class="blog-slider__img" href="posts/47003.html" alt=""><img width="48" height="48" src="/img/5.png" alt="" onerror="this.src=https://unpkg.zhimg.com/akilar-candyassets/image/loading.gif; this.onerror = null;"/></a><div class="blog-slider__content"><span class="blog-slider__code">2023-03-10</span><a class="blog-slider__title" href="posts/47003.html" alt="">常用正则表达式大全</a><div class="blog-slider__text">正则表达式</div><a class="blog-slider__button" href="posts/47003.html" alt="">详情 </a></div></div><div class="blog-slider__item swiper-slide" style="width: 750px; opacity: 1; transform: translate3d(0px, 0px, 0px); transition-duration: 0ms;"><a class="blog-slider__img" href="posts/20683.html" alt=""><img width="48" height="48" src="/img/8.png" alt="" onerror="this.src=https://unpkg.zhimg.com/akilar-candyassets/image/loading.gif; this.onerror = null;"/></a><div class="blog-slider__content"><span class="blog-slider__code">2023-06-05</span><a class="blog-slider__title" href="posts/20683.html" alt="">Linux中开发环境的搭建</a><div class="blog-slider__text">环境搭建</div><a class="blog-slider__button" href="posts/20683.html" alt="">详情 </a></div></div><div class="blog-slider__item swiper-slide" style="width: 750px; opacity: 1; transform: translate3d(0px, 0px, 0px); transition-duration: 0ms;"><a class="blog-slider__img" href="posts/63333.html" alt=""><img width="48" height="48" src="/img/10.png" alt="" onerror="this.src=https://unpkg.zhimg.com/akilar-candyassets/image/loading.gif; this.onerror = null;"/></a><div class="blog-slider__content"><span class="blog-slider__code">2023-06-03</span><a class="blog-slider__title" href="posts/63333.html" alt="">开发环境的搭建</a><div class="blog-slider__text">环境搭建</div><a class="blog-slider__button" href="posts/63333.html" alt="">详情 </a></div></div></div><div class="blog-slider__pagination swiper-pagination-clickable swiper-pagination-bullets"></div></div></div>';
if (parent_div_git !== null && typeof parent_div_git !== 'undefined') {
parent_div_git.insertAdjacentHTML("afterbegin",item_html)
}
}
var elist = 'undefined'.split(',');
var cpage = location.pathname;
var epage = 'all';
var flag = 0;
for (var i=0;i<elist.length;i++){
if (cpage.includes(elist[i])){
flag++;
}
}
if ((epage ==='all')&&(flag == 0)){
butterfly_swiper_injector_config();
}
else if (epage === cpage){
butterfly_swiper_injector_config();
}
</script><script defer src="https://npm.elemecdn.com/hexo-butterfly-swiper/lib/swiper.min.js"></script><script defer data-pjax src="https://npm.elemecdn.com/hexo-butterfly-swiper/lib/swiper_init.js"></script><!-- hexo injector body_end end --></body></html>