JasonsGong.github.io/posts/58456.html
2024-06-14 22:00:25 +08:00

294 lines
41 KiB
HTML
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<!DOCTYPE html><html lang="zh-CN" data-theme="light"><head><meta charset="UTF-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0,viewport-fit=cover"><title>OCR-图片文字识别 | The Blog</title><meta name="author" content="Jason"><meta name="copyright" content="Jason"><meta name="format-detection" content="telephone=no"><meta name="theme-color" content="#ffffff"><meta name="description" content="一.什么是OCROCR Optical Character Recognition光学字符识别是指电子设备例如扫描仪或数码相机检查纸上打印的字符通过检测暗、亮的模式确定其形状然后用字符识别方法将形状翻译成计算机文字的过程 方案 说明 百度OCR 收费 Tesseract-OCR Google维护的开源OCR引擎支持JavaPython等语言调用 Tess4J">
<meta property="og:type" content="article">
<meta property="og:title" content="OCR-图片文字识别">
<meta property="og:url" content="https://qingling.icu/posts/58456.html">
<meta property="og:site_name" content="The Blog">
<meta property="og:description" content="一.什么是OCROCR Optical Character Recognition光学字符识别是指电子设备例如扫描仪或数码相机检查纸上打印的字符通过检测暗、亮的模式确定其形状然后用字符识别方法将形状翻译成计算机文字的过程 方案 说明 百度OCR 收费 Tesseract-OCR Google维护的开源OCR引擎支持JavaPython等语言调用 Tess4J">
<meta property="og:locale" content="zh_CN">
<meta property="og:image" content="https://qingling.icu/img/2.png">
<meta property="article:published_time" content="2023-12-08T12:36:43.000Z">
<meta property="article:modified_time" content="2023-12-08T13:23:52.255Z">
<meta property="article:author" content="Jason">
<meta property="article:tag" content="OCR">
<meta name="twitter:card" content="summary">
<meta name="twitter:image" content="https://qingling.icu/img/2.png"><link rel="shortcut icon" href="/img/%E5%9B%BE%E6%A0%87.png"><link rel="canonical" href="https://qingling.icu/posts/58456.html"><link rel="preconnect" href="//fastly.jsdelivr.net"/><link rel="preconnect" href="//busuanzi.ibruce.info"/><link rel="stylesheet" href="/css/index.css"><link rel="stylesheet" href="/cdn/icon/fontawesome-free/css/all.min.css" media="print" onload="this.media='all'"><link rel="stylesheet" href="/cdn/css/snackbar.min.css" media="print" onload="this.media='all'"><link rel="stylesheet" href="/cdn/css/fancybox.min.css" media="print" onload="this.media='all'"><script>const GLOBAL_CONFIG = {
root: '/',
algolia: undefined,
localSearch: {"path":"/search.xml","preload":true,"top_n_per_article":1,"unescape":false,"languages":{"hits_empty":"找不到您查询的内容:${query}","hits_stats":"共找到 ${hits} 篇文章"}},
translate: undefined,
noticeOutdate: undefined,
highlight: {"plugin":"highlighjs","highlightCopy":true,"highlightLang":true,"highlightHeightLimit":400},
copy: {
success: '复制成功',
error: '复制错误',
noSupport: '浏览器不支持'
},
relativeDate: {
homepage: true,
post: true
},
runtime: '天',
dateSuffix: {
just: '刚刚',
min: '分钟前',
hour: '小时前',
day: '天前',
month: '个月前'
},
copyright: undefined,
lightbox: 'mediumZoom',
Snackbar: {"chs_to_cht":"你已切换为繁体","cht_to_chs":"你已切换为简体","day_to_night":"你已切换为深色模式","night_to_day":"你已切换为浅色模式","bgLight":"#006650","bgDark":"#006650","position":"top-center"},
source: {
justifiedGallery: {
js: 'https://fastly.jsdelivr.net/npm/flickr-justified-gallery/dist/fjGallery.min.js',
css: 'https://fastly.jsdelivr.net/npm/flickr-justified-gallery/dist/fjGallery.min.css'
}
},
isPhotoFigcaption: false,
islazyload: false,
isAnchor: true,
percent: {
toc: true,
rightside: false,
},
autoDarkmode: true
}</script><script id="config-diff">var GLOBAL_CONFIG_SITE = {
title: 'OCR-图片文字识别',
isPost: true,
isHome: false,
isHighlightShrink: false,
isToc: true,
postUpdate: '2023-12-08 21:23:52'
}</script><noscript><style type="text/css">
#nav {
opacity: 1
}
.justified-gallery img {
opacity: 1
}
#recent-posts time,
#post-meta time {
display: inline !important
}
</style></noscript><script>(win=>{
win.saveToLocal = {
set: function setWithExpiry(key, value, ttl) {
if (ttl === 0) return
const now = new Date()
const expiryDay = ttl * 86400000
const item = {
value: value,
expiry: now.getTime() + expiryDay,
}
localStorage.setItem(key, JSON.stringify(item))
},
get: function getWithExpiry(key) {
const itemStr = localStorage.getItem(key)
if (!itemStr) {
return undefined
}
const item = JSON.parse(itemStr)
const now = new Date()
if (now.getTime() > item.expiry) {
localStorage.removeItem(key)
return undefined
}
return item.value
}
}
win.getScript = url => new Promise((resolve, reject) => {
const script = document.createElement('script')
script.src = url
script.async = true
script.onerror = reject
script.onload = script.onreadystatechange = function() {
const loadState = this.readyState
if (loadState && loadState !== 'loaded' && loadState !== 'complete') return
script.onload = script.onreadystatechange = null
resolve()
}
document.head.appendChild(script)
})
win.getCSS = (url,id = false) => new Promise((resolve, reject) => {
const link = document.createElement('link')
link.rel = 'stylesheet'
link.href = url
if (id) link.id = id
link.onerror = reject
link.onload = link.onreadystatechange = function() {
const loadState = this.readyState
if (loadState && loadState !== 'loaded' && loadState !== 'complete') return
link.onload = link.onreadystatechange = null
resolve()
}
document.head.appendChild(link)
})
win.activateDarkMode = function () {
document.documentElement.setAttribute('data-theme', 'dark')
if (document.querySelector('meta[name="theme-color"]') !== null) {
document.querySelector('meta[name="theme-color"]').setAttribute('content', '#0d0d0d')
}
}
win.activateLightMode = function () {
document.documentElement.setAttribute('data-theme', 'light')
if (document.querySelector('meta[name="theme-color"]') !== null) {
document.querySelector('meta[name="theme-color"]').setAttribute('content', '#ffffff')
}
}
const t = saveToLocal.get('theme')
const isDarkMode = window.matchMedia('(prefers-color-scheme: dark)').matches
const isLightMode = window.matchMedia('(prefers-color-scheme: light)').matches
const isNotSpecified = window.matchMedia('(prefers-color-scheme: no-preference)').matches
const hasNoSupport = !isDarkMode && !isLightMode && !isNotSpecified
if (t === undefined) {
if (isLightMode) activateLightMode()
else if (isDarkMode) activateDarkMode()
else if (isNotSpecified || hasNoSupport) {
const now = new Date()
const hour = now.getHours()
const isNight = hour <= 8 || hour >= 22
isNight ? activateDarkMode() : activateLightMode()
}
window.matchMedia('(prefers-color-scheme: dark)').addListener(function (e) {
if (saveToLocal.get('theme') === undefined) {
e.matches ? activateDarkMode() : activateLightMode()
}
})
} else if (t === 'light') activateLightMode()
else activateDarkMode()
const asideStatus = saveToLocal.get('aside-status')
if (asideStatus !== undefined) {
if (asideStatus === 'hide') {
document.documentElement.classList.add('hide-aside')
} else {
document.documentElement.classList.remove('hide-aside')
}
}
const detectApple = () => {
if(/iPad|iPhone|iPod|Macintosh/.test(navigator.userAgent)){
document.documentElement.classList.add('apple')
}
}
detectApple()
})(window)</script><script src="https://apps.bdimg.com/libs/jquery/2.1.4/jquery.min.js"></script><script type="text/javascript" src ="/js/welcome.js" ></script><script src="/js/sweetalert.js"></script><link rel="stylesheet" href="/css/sweetalert.css"><!-- hexo injector head_end start --><link rel="stylesheet" href="https://npm.elemecdn.com/hexo-butterfly-swiper/lib/swiper.min.css" media="print" onload="this.media='all'"><link rel="stylesheet" href="https://npm.elemecdn.com/hexo-butterfly-swiper/lib/swiperstyle.css" media="print" onload="this.media='all'"><!-- hexo injector head_end end --><meta name="generator" content="Hexo 6.3.0"></head><body><div id="sidebar"><div id="menu-mask"></div><div id="sidebar-menus"><div class="avatar-img is-center"><img src="/img/avatar.jpg" onerror="onerror=null;src='/img/loading.gif'" alt="avatar"/></div><div class="sidebar-site-data site-data is-center"><a href="/archives/"><div class="headline">文章</div><div class="length-num">60</div></a><a href="/tags/"><div class="headline">标签</div><div class="length-num">39</div></a><a href="/categories/"><div class="headline">分类</div><div class="length-num">10</div></a></div><br/><div class="menus_items"><div class="menus_item"><a class="site-page" target="_blank" rel="noopener" href="https://www.tutorialspoint.com/compile_java8_online.php"><i class="fa-fw fas fa-code"></i><span> 代码</span></a></div><div class="menus_item"><a class="site-page" href="/notice/"><i class="fa-fw fas fa-stream"></i><span> 公告</span></a></div><div class="menus_item"><a class="site-page" href="/website/"><i class="fa-fw fas fa-list"></i><span> 网址</span></a></div><div class="menus_item"><a class="site-page" href="/"><i class="fa-fw fas fa-home"></i><span> 主页</span></a></div></div></div></div><div class="post" id="body-wrap"><header class="not-top-img" id="page-header"><nav id="nav"><span id="blog-info"><a href="/" title="The Blog"><img class="site-icon" src="/img/logo.png"/><span class="site-name">The Blog</span></a></span><div id="menus"><div id="search-button"><a class="site-page social-icon search" href="javascript:void(0);"><i class="fas fa-search fa-fw"></i><span> 搜索</span></a></div><div class="menus_items"><div class="menus_item"><a class="site-page" target="_blank" rel="noopener" href="https://www.tutorialspoint.com/compile_java8_online.php"><i class="fa-fw fas fa-code"></i><span> 代码</span></a></div><div class="menus_item"><a class="site-page" href="/notice/"><i class="fa-fw fas fa-stream"></i><span> 公告</span></a></div><div class="menus_item"><a class="site-page" href="/website/"><i class="fa-fw fas fa-list"></i><span> 网址</span></a></div><div class="menus_item"><a class="site-page" href="/"><i class="fa-fw fas fa-home"></i><span> 主页</span></a></div></div><div id="toggle-menu"><a class="site-page" href="javascript:void(0);"><i class="fas fa-bars fa-fw"></i></a></div></div></nav></header><main class="layout" id="content-inner"><div id="post"><div id="post-info"><h1 class="post-title">OCR-图片文字识别</h1><div id="post-meta"><div class="meta-firstline"><span class="post-meta-date"><i class="far fa-calendar-alt fa-fw post-meta-icon"></i><span class="post-meta-label">发表于</span><time class="post-meta-date-created" datetime="2023-12-08T12:36:43.000Z" title="发表于 2023-12-08 20:36:43">2023-12-08</time><span class="post-meta-separator">|</span><i class="fas fa-history fa-fw post-meta-icon"></i><span class="post-meta-label">更新于</span><time class="post-meta-date-updated" datetime="2023-12-08T13:23:52.255Z" title="更新于 2023-12-08 21:23:52">2023-12-08</time></span><span class="post-meta-categories"><span class="post-meta-separator">|</span><i class="fas fa-inbox fa-fw post-meta-icon"></i><a class="post-meta-categories" href="/categories/%E5%90%8E%E7%AB%AF/">后端</a></span></div><div class="meta-secondline"><span class="post-meta-separator">|</span><span class="post-meta-wordcount"><i class="far fa-file-word fa-fw post-meta-icon"></i><span class="post-meta-label">字数总计:</span><span class="word-count">652</span><span class="post-meta-separator">|</span><i class="far fa-clock fa-fw post-meta-icon"></i><span class="post-meta-label">阅读时长:</span><span>2分钟</span></span><span class="post-meta-separator">|</span><span class="post-meta-pv-cv" id="" data-flag-title="OCR-图片文字识别"><i class="far fa-eye fa-fw post-meta-icon"></i><span class="post-meta-label">阅读量:</span><span id="busuanzi_value_page_pv"><i class="fa-solid fa-spinner fa-spin"></i></span></span></div></div></div><article class="post-content" id="article-container"><h2 id="一-什么是OCR"><a href="#一-什么是OCR" class="headerlink" title="一.什么是OCR"></a>一.什么是OCR</h2><p>OCR Optical Character Recognition光学字符识别是指电子设备例如扫描仪或数码相机检查纸上打印的字符通过检测暗、亮的模式确定其形状然后用字符识别方法将形状翻译成计算机文字的过程</p>
<table>
<thead>
<tr>
<th><strong>方案</strong></th>
<th><strong>说明</strong></th>
</tr>
</thead>
<tbody><tr>
<td>百度OCR</td>
<td>收费</td>
</tr>
<tr>
<td>Tesseract-OCR</td>
<td>Google维护的开源OCR引擎支持JavaPython等语言调用</td>
</tr>
<tr>
<td>Tess4J</td>
<td>封装了Tesseract-OCR 支持Java调用</td>
</tr>
</tbody></table>
<h2 id="二-Tesseract-OCR-的特点"><a href="#二-Tesseract-OCR-的特点" class="headerlink" title="二.Tesseract-OCR 的特点"></a>二.Tesseract-OCR 的特点</h2><ul>
<li><p>Tesseract支持UTF-8编码格式并且可以“开箱即用”地识别100多种语言</p>
</li>
<li><p>Tesseract支持多种输出格式纯文本hOCRHTMLPDF等</p>
</li>
<li><p>官方建议为了获得更好的OCR结果最好提供给高质量的图像</p>
</li>
<li><p>Tesseract进行识别其他语言的训练具体的训练方式请参考官方提供的文档<a target="_blank" rel="noopener" href="https://tesseract-ocr.github.io/tessdoc/">https://tesseract-ocr.github.io/tessdoc/</a></p>
</li>
</ul>
<h2 id="三-使用案例"><a href="#三-使用案例" class="headerlink" title="三.使用案例"></a>三.使用案例</h2><h3 id="1-导入相关的依赖"><a href="#1-导入相关的依赖" class="headerlink" title="1.导入相关的依赖"></a>1.导入相关的依赖</h3><figure class="highlight xml"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br></pre></td><td class="code"><pre><span class="line"><span class="tag">&lt;<span class="name">dependency</span>&gt;</span></span><br><span class="line"> <span class="tag">&lt;<span class="name">groupId</span>&gt;</span>net.sourceforge.tess4j<span class="tag">&lt;/<span class="name">groupId</span>&gt;</span></span><br><span class="line"> <span class="tag">&lt;<span class="name">artifactId</span>&gt;</span>tess4j<span class="tag">&lt;/<span class="name">artifactId</span>&gt;</span></span><br><span class="line"> <span class="tag">&lt;<span class="name">version</span>&gt;</span>4.1.1<span class="tag">&lt;/<span class="name">version</span>&gt;</span></span><br><span class="line"><span class="tag">&lt;/<span class="name">dependency</span>&gt;</span></span><br></pre></td></tr></table></figure>
<h3 id="2-导入中文字体库"><a href="#2-导入中文字体库" class="headerlink" title="2.导入中文字体库"></a>2.导入中文字体库</h3><p>地址: <a target="_blank" rel="noopener" href="https://wwvc.lanzouj.com/iuPhc1h7j46f">https://wwvc.lanzouj.com/iuPhc1h7j46f</a></p>
<blockquote>
<p>chi_sim.traineddata</p>
</blockquote>
<p><img src="/pictures/image-20231208212352206.png" alt="image-20231208212352206"></p>
<h3 id="3-编写测试类进行测试"><a href="#3-编写测试类进行测试" class="headerlink" title="3.编写测试类进行测试"></a>3.编写测试类进行测试</h3><p><strong>待识别的图片</strong></p>
<p><img src="/pictures/image-20231208205943762.png" alt="image-20231208205943762"></p>
<p><strong>测试程序</strong></p>
<figure class="highlight java"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">package</span> com.heima;</span><br><span class="line"></span><br><span class="line"></span><br><span class="line"><span class="keyword">import</span> net.sourceforge.tess4j.Tesseract;</span><br><span class="line"><span class="keyword">import</span> net.sourceforge.tess4j.TesseractException;</span><br><span class="line"></span><br><span class="line"><span class="keyword">import</span> java.io.File;</span><br><span class="line"></span><br><span class="line"><span class="comment">/**</span></span><br><span class="line"><span class="comment"> * <span class="doctag">@author</span> Jason Gong</span></span><br><span class="line"><span class="comment"> * <span class="doctag">@version</span> 1.0</span></span><br><span class="line"><span class="comment"> * <span class="doctag">@Date</span> 2023/12/8</span></span><br><span class="line"><span class="comment"> * <span class="doctag">@Description</span></span></span><br><span class="line"><span class="comment"> */</span></span><br><span class="line"><span class="keyword">public</span> <span class="keyword">class</span> <span class="title class_">Main</span> &#123;</span><br><span class="line"></span><br><span class="line"> <span class="comment">/**</span></span><br><span class="line"><span class="comment"> * 识别图片中的文字</span></span><br><span class="line"><span class="comment"> *</span></span><br><span class="line"><span class="comment"> * <span class="doctag">@param</span> args</span></span><br><span class="line"><span class="comment"> */</span></span><br><span class="line"> <span class="keyword">public</span> <span class="keyword">static</span> <span class="keyword">void</span> <span class="title function_">main</span><span class="params">(String[] args)</span> <span class="keyword">throws</span> TesseractException &#123;</span><br><span class="line"> <span class="comment">//创建实例</span></span><br><span class="line"> <span class="type">Tesseract</span> <span class="variable">tesseract</span> <span class="operator">=</span> <span class="keyword">new</span> <span class="title class_">Tesseract</span>();</span><br><span class="line"> <span class="comment">//设置字体库的路径</span></span><br><span class="line"> tesseract.setDatapath(<span class="string">&quot;C:\\Gong\\data\\tess4j&quot;</span>);</span><br><span class="line"> <span class="comment">//设置语言</span></span><br><span class="line"> <span class="comment">//字体库为chi_sim.traineddata,语言取.前面的内容,即文件名</span></span><br><span class="line"> <span class="comment">//简体中文</span></span><br><span class="line"> tesseract.setLanguage(<span class="string">&quot;chi_sim&quot;</span>);</span><br><span class="line"> <span class="comment">//识别图片</span></span><br><span class="line"> <span class="type">String</span> <span class="variable">ocr</span> <span class="operator">=</span> tesseract.doOCR(<span class="keyword">new</span> <span class="title class_">File</span>(<span class="string">&quot;C:\\Gong\\data\\tess4j\\tess4j.png&quot;</span>));</span><br><span class="line"> <span class="comment">//打印识别的结果</span></span><br><span class="line"> <span class="comment">//打印的时候可以去除回车和tab空格</span></span><br><span class="line"> <span class="comment">//System.out.println(ocr.replaceAll(&quot;\\n|\\r&quot;,&quot;-&quot;));</span></span><br><span class="line"> System.out.println(ocr);</span><br><span class="line"> &#125;</span><br><span class="line">&#125;</span><br></pre></td></tr></table></figure>
<p><strong>识别的结果</strong></p>
<p><img src="/pictures/image-20231208211200859.png" alt="image-20231208211200859"></p>
<h2 id="四-封装成工具类使用"><a href="#四-封装成工具类使用" class="headerlink" title="四.封装成工具类使用"></a>四.封装成工具类使用</h2><h3 id="1-创建工具类"><a href="#1-创建工具类" class="headerlink" title="1.创建工具类"></a>1.创建工具类</h3><figure class="highlight java"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">package</span> com.heima.common.tess4j;</span><br><span class="line"></span><br><span class="line"><span class="keyword">import</span> lombok.Getter;</span><br><span class="line"><span class="keyword">import</span> lombok.Setter;</span><br><span class="line"><span class="keyword">import</span> net.sourceforge.tess4j.ITesseract;</span><br><span class="line"><span class="keyword">import</span> net.sourceforge.tess4j.Tesseract;</span><br><span class="line"><span class="keyword">import</span> net.sourceforge.tess4j.TesseractException;</span><br><span class="line"><span class="keyword">import</span> org.springframework.boot.context.properties.ConfigurationProperties;</span><br><span class="line"><span class="keyword">import</span> org.springframework.stereotype.Component;</span><br><span class="line"></span><br><span class="line"><span class="keyword">import</span> java.awt.image.BufferedImage;</span><br><span class="line"></span><br><span class="line"><span class="meta">@Getter</span></span><br><span class="line"><span class="meta">@Setter</span></span><br><span class="line"><span class="meta">@Component</span></span><br><span class="line"><span class="meta">@ConfigurationProperties(prefix = &quot;tess4j&quot;)</span></span><br><span class="line"><span class="keyword">public</span> <span class="keyword">class</span> <span class="title class_">Tess4jClient</span> &#123;</span><br><span class="line"></span><br><span class="line"> <span class="keyword">private</span> String dataPath;</span><br><span class="line"> <span class="keyword">private</span> String language;</span><br><span class="line"></span><br><span class="line"> <span class="keyword">public</span> String <span class="title function_">doOCR</span><span class="params">(BufferedImage image)</span> <span class="keyword">throws</span> TesseractException &#123;</span><br><span class="line"> <span class="comment">//创建Tesseract对象</span></span><br><span class="line"> <span class="type">ITesseract</span> <span class="variable">tesseract</span> <span class="operator">=</span> <span class="keyword">new</span> <span class="title class_">Tesseract</span>();</span><br><span class="line"> <span class="comment">//设置字体库路径</span></span><br><span class="line"> tesseract.setDatapath(dataPath);</span><br><span class="line"> <span class="comment">//中文识别</span></span><br><span class="line"> tesseract.setLanguage(language);</span><br><span class="line"> <span class="comment">//执行ocr识别</span></span><br><span class="line"> <span class="type">String</span> <span class="variable">result</span> <span class="operator">=</span> tesseract.doOCR(image);</span><br><span class="line"> <span class="comment">//替换回车和tal键 使结果为一行</span></span><br><span class="line"> result = result.replaceAll(<span class="string">&quot;\\r|\\n&quot;</span>, <span class="string">&quot;-&quot;</span>).replaceAll(<span class="string">&quot; &quot;</span>, <span class="string">&quot;&quot;</span>);</span><br><span class="line"> <span class="keyword">return</span> result;</span><br><span class="line"> &#125;</span><br><span class="line"></span><br><span class="line">&#125;</span><br></pre></td></tr></table></figure>
<h3 id="2-配置文件中添加配置"><a href="#2-配置文件中添加配置" class="headerlink" title="2.配置文件中添加配置"></a>2.配置文件中添加配置</h3><figure class="highlight yaml"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><span class="line"><span class="attr">tess4j:</span></span><br><span class="line"> <span class="attr">data-path:</span> <span class="string">C:\workspace\tessdata</span> <span class="comment"># 字体库的路径</span></span><br><span class="line"> <span class="attr">language:</span> <span class="string">chi_sim</span> <span class="comment"># 识别的字体d</span></span><br></pre></td></tr></table></figure>
</article><div class="tag_share"><div class="post-meta__tag-list"><a class="post-meta__tags" href="/tags/OCR/">OCR</a></div><div class="post_share"><div class="social-share" data-image="/img/2.png" data-sites="wechat,weibo,qq"></div><link rel="stylesheet" href="/cdn/css/share.min.css" media="print" onload="this.media='all'"><script src="/cdn/js/social-share.min.js" defer></script></div></div><div class="post-reward"><div class="reward-button"><i class="fas fa-qrcode"></i> 打赏</div><div class="reward-main"><ul class="reward-all"><li class="reward-item"><a href="/img/wechat.jpg" target="_blank"><img class="post-qr-code-img" src="/img/wechat.jpg" alt="微信"/></a><div class="post-qr-code-desc">微信</div></li><li class="reward-item"><a href="/img/alipay.jpg" target="_blank"><img class="post-qr-code-img" src="/img/alipay.jpg" alt="支付宝"/></a><div class="post-qr-code-desc">支付宝</div></li></ul></div></div><br/><div id="post-comment"><div class="comment-head"><div class="comment-headline"><i class="far fa-comment-alt fa-fw"></i><span> 评论</span></div></div><div class="comment-wrap"><div><div id="gitalk-container"></div></div></div></div></div><div class="aside-content" id="aside-content"><div class="card-widget card-info"><div class="is-center"><div class="avatar-img"><img src="/img/avatar.jpg" onerror="this.onerror=null;this.src='/img/loading.gif'" alt="avatar"/></div><div class="author-info__name">Jason</div><div class="author-info__description">Debug the World</div></div><div class="card-info-data site-data is-center"><a href="/archives/"><div class="headline">文章</div><div class="length-num">60</div></a><a href="/tags/"><div class="headline">标签</div><div class="length-num">39</div></a><a href="/categories/"><div class="headline">分类</div><div class="length-num">10</div></a></div><a id="card-info-btn"><i class="fab fa-microsoft"></i><span>Ctrl + D 收藏</span></a><div class="card-info-social-icons is-center"><a class="social-icon" href="https://github.com/JasonsGong" target="_blank" title="Github"><i class="fab fa-github"></i></a><a class="social-icon" href="tencent://AddContact/?fromId=45&amp;fromSubId=1&amp;subcmd=all&amp;uin=2602183349&amp;website=www.oicqzone.com" target="_blank" title="QQ"><i class="fab fa-qq"></i></a><a class="social-icon" href="mailto:2602183349@qq.com" target="_blank" title="Email"><i class="fas fa-envelope-open-text"></i></a><a class="social-icon" href="https://github.com/JasonsGong?tab=repositories" target="_blank" title="代码仓库"><i class="fas fa-database"></i></a></div></div><div class="card-widget card-announcement"><div class="item-headline"><i class="fas fa-bullhorn fa-shake"></i><span>公告</span></div><div class="announcement_content">本网站是静态网站,更新页面资源请使用Ctrl+F5;若网站内文章对你有帮助,请使用Ctrl+D收藏该网站</div></div><div class="sticky_layout"><div class="card-widget" id="card-toc"><div class="item-headline"><i class="fas fa-stream"></i><span>目录</span><span class="toc-percentage"></span></div><div class="toc-content is-expand"><ol class="toc"><li class="toc-item toc-level-2"><a class="toc-link" href="#%E4%B8%80-%E4%BB%80%E4%B9%88%E6%98%AFOCR"><span class="toc-text">一.什么是OCR</span></a></li><li class="toc-item toc-level-2"><a class="toc-link" href="#%E4%BA%8C-Tesseract-OCR-%E7%9A%84%E7%89%B9%E7%82%B9"><span class="toc-text">二.Tesseract-OCR 的特点</span></a></li><li class="toc-item toc-level-2"><a class="toc-link" href="#%E4%B8%89-%E4%BD%BF%E7%94%A8%E6%A1%88%E4%BE%8B"><span class="toc-text">三.使用案例</span></a><ol class="toc-child"><li class="toc-item toc-level-3"><a class="toc-link" href="#1-%E5%AF%BC%E5%85%A5%E7%9B%B8%E5%85%B3%E7%9A%84%E4%BE%9D%E8%B5%96"><span class="toc-text">1.导入相关的依赖</span></a></li><li class="toc-item toc-level-3"><a class="toc-link" href="#2-%E5%AF%BC%E5%85%A5%E4%B8%AD%E6%96%87%E5%AD%97%E4%BD%93%E5%BA%93"><span class="toc-text">2.导入中文字体库</span></a></li><li class="toc-item toc-level-3"><a class="toc-link" href="#3-%E7%BC%96%E5%86%99%E6%B5%8B%E8%AF%95%E7%B1%BB%E8%BF%9B%E8%A1%8C%E6%B5%8B%E8%AF%95"><span class="toc-text">3.编写测试类进行测试</span></a></li></ol></li><li class="toc-item toc-level-2"><a class="toc-link" href="#%E5%9B%9B-%E5%B0%81%E8%A3%85%E6%88%90%E5%B7%A5%E5%85%B7%E7%B1%BB%E4%BD%BF%E7%94%A8"><span class="toc-text">四.封装成工具类使用</span></a><ol class="toc-child"><li class="toc-item toc-level-3"><a class="toc-link" href="#1-%E5%88%9B%E5%BB%BA%E5%B7%A5%E5%85%B7%E7%B1%BB"><span class="toc-text">1.创建工具类</span></a></li><li class="toc-item toc-level-3"><a class="toc-link" href="#2-%E9%85%8D%E7%BD%AE%E6%96%87%E4%BB%B6%E4%B8%AD%E6%B7%BB%E5%8A%A0%E9%85%8D%E7%BD%AE"><span class="toc-text">2.配置文件中添加配置</span></a></li></ol></li></ol></div></div><div class="card-widget card-recent-post"><div class="item-headline"><i class="fas fa-history"></i><span>最近更新</span></div><div class="aside-list"><div class="aside-list-item"><a class="thumbnail" href="/posts/8957.html" title="Linux从入门到进阶"><img src="/img/8.png" onerror="this.onerror=null;this.src='/img/404.jpg'" alt="Linux从入门到进阶"/></a><div class="content"><a class="title" href="/posts/8957.html" title="Linux从入门到进阶">Linux从入门到进阶</a><time datetime="2024-05-10T05:31:11.691Z" title="更新于 2024-05-10 13:31:11">2024-05-10</time></div></div><div class="aside-list-item"><a class="thumbnail" href="/posts/35630.html" title="接口测试工具"><img src="/img/2.png" onerror="this.onerror=null;this.src='/img/404.jpg'" alt="接口测试工具"/></a><div class="content"><a class="title" href="/posts/35630.html" title="接口测试工具">接口测试工具</a><time datetime="2024-05-10T05:31:11.686Z" title="更新于 2024-05-10 13:31:11">2024-05-10</time></div></div><div class="aside-list-item"><a class="thumbnail" href="/posts/32246.html" title="SpringBoot中整合Swagger2"><img src="/img/3.png" onerror="this.onerror=null;this.src='/img/404.jpg'" alt="SpringBoot中整合Swagger2"/></a><div class="content"><a class="title" href="/posts/32246.html" title="SpringBoot中整合Swagger2">SpringBoot中整合Swagger2</a><time datetime="2024-05-10T05:31:11.681Z" title="更新于 2024-05-10 13:31:11">2024-05-10</time></div></div><div class="aside-list-item"><a class="thumbnail" href="/posts/19306.html" title="Docker容器化技术"><img src="/img/1.png" onerror="this.onerror=null;this.src='/img/404.jpg'" alt="Docker容器化技术"/></a><div class="content"><a class="title" href="/posts/19306.html" title="Docker容器化技术">Docker容器化技术</a><time datetime="2024-05-10T05:31:11.675Z" title="更新于 2024-05-10 13:31:11">2024-05-10</time></div></div><div class="aside-list-item"><a class="thumbnail" href="/posts/20683.html" title="Linux中开发环境的搭建"><img src="/img/8.png" onerror="this.onerror=null;this.src='/img/404.jpg'" alt="Linux中开发环境的搭建"/></a><div class="content"><a class="title" href="/posts/20683.html" title="Linux中开发环境的搭建">Linux中开发环境的搭建</a><time datetime="2024-05-10T05:31:11.669Z" title="更新于 2024-05-10 13:31:11">2024-05-10</time></div></div></div></div></div></div></main><footer id="footer"><div id="footer-wrap"></div></footer></div><div id="rightside"><div id="rightside-config-hide"><button id="readmode" type="button" title="阅读模式"><i class="fas fa-book-open"></i></button><button id="darkmode" type="button" title="浅色和深色模式转换"><i class="fas fa-adjust"></i></button><button id="hide-aside-btn" type="button" title="单栏和双栏切换"><i class="fas fa-arrows-alt-h"></i></button></div><div id="rightside-config-show"><button id="rightside_config" type="button" title="设置"><i class="fas fa-cog fa-spin"></i></button><button class="close" id="mobile-toc-button" type="button" title="目录"><i class="fas fa-list-ul"></i></button><a id="to_comment" href="#post-comment" title="直达评论"><i class="fas fa-comment-alt"></i></a><button id="go-up" type="button" title="回到顶部"><span class="scroll-percent"></span><i class="fas fa-arrow-up"></i></button></div></div><div><script src="/js/utils.js"></script><script src="/js/main.js"></script><script src="/cdn/js/medium-zoom.min.js"></script><script src="/cdn/js/instantpage.min.js" type="module"></script><script src="/cdn/js/snackbar.min.js"></script><div class="js-pjax"><script>function loadGitalk () {
function initGitalk () {
var gitalk = new Gitalk(Object.assign({
clientID: '00fb27b1e484536359c2',
clientSecret: 'be41a12281c68b6e228d1a27e8d08aeb91541145',
repo: 'BlogComment',
owner: 'JasonsGong',
admin: ['JasonsGong'],
id: 'df0537ff8bb88a23d412a84e879cee27',
updateCountCallback: commentCount
},null))
gitalk.render('gitalk-container')
}
if (typeof Gitalk === 'function') initGitalk()
else {
getCSS('/cdn/css/gitalk.min.css')
getScript('/cdn/js/gitalk.min.js').then(initGitalk)
}
}
function commentCount(n){
let isCommentCount = document.querySelector('#post-meta .gitalk-comment-count')
if (isCommentCount) {
isCommentCount.textContent= n
}
}
if ('Gitalk' === 'Gitalk' || !true) {
if (true) btf.loadComment(document.getElementById('gitalk-container'), loadGitalk)
else loadGitalk()
} else {
function loadOtherComment () {
loadGitalk()
}
}</script></div><script async data-pjax src="//busuanzi.ibruce.info/busuanzi/2.3/busuanzi.pure.mini.js"></script><div id="local-search"><div class="search-dialog"><nav class="search-nav"><span class="search-dialog-title">搜索</span><span id="loading-status"></span><button class="search-close-button"><i class="fas fa-times"></i></button></nav><div class="is-center" id="loading-database"><i class="fas fa-spinner fa-pulse"></i><span> 数据库加载中</span></div><div class="search-wrap"><div id="local-search-input"><div class="local-search-box"><input class="local-search-box--input" placeholder="搜索文章" type="text"/></div></div><br/><div class="no-result" id="local-search-results"></div><div id="local-search-stats-wrap"></div></div></div><div id="search-mask"></div><script src="/js/search/local-search.js"></script></div></div><!-- hexo injector body_end start --><script data-pjax>
function butterfly_swiper_injector_config(){
var parent_div_git = document.getElementById('recent-posts');
var item_html = '<div class="recent-post-item" style="height: auto;width: 100%"><div class="blog-slider swiper-container-fade swiper-container-horizontal" id="swiper_container"><div class="blog-slider__wrp swiper-wrapper" style="transition-duration: 0ms;"><div class="blog-slider__item swiper-slide" style="width: 750px; opacity: 1; transform: translate3d(0px, 0px, 0px); transition-duration: 0ms;"><a class="blog-slider__img" href="posts/19306.html" alt=""><img width="48" height="48" src="/img/1.png" alt="" onerror="this.src=https://unpkg.zhimg.com/akilar-candyassets/image/loading.gif; this.onerror = null;"/></a><div class="blog-slider__content"><span class="blog-slider__code">2023-04-21</span><a class="blog-slider__title" href="posts/19306.html" alt="">Docker容器化技术</a><div class="blog-slider__text">Docker</div><a class="blog-slider__button" href="posts/19306.html" alt="">详情 </a></div></div><div class="blog-slider__item swiper-slide" style="width: 750px; opacity: 1; transform: translate3d(0px, 0px, 0px); transition-duration: 0ms;"><a class="blog-slider__img" href="posts/47003.html" alt=""><img width="48" height="48" src="/img/5.png" alt="" onerror="this.src=https://unpkg.zhimg.com/akilar-candyassets/image/loading.gif; this.onerror = null;"/></a><div class="blog-slider__content"><span class="blog-slider__code">2023-03-10</span><a class="blog-slider__title" href="posts/47003.html" alt="">常用正则表达式大全</a><div class="blog-slider__text">正则表达式</div><a class="blog-slider__button" href="posts/47003.html" alt="">详情 </a></div></div><div class="blog-slider__item swiper-slide" style="width: 750px; opacity: 1; transform: translate3d(0px, 0px, 0px); transition-duration: 0ms;"><a class="blog-slider__img" href="posts/20683.html" alt=""><img width="48" height="48" src="/img/8.png" alt="" onerror="this.src=https://unpkg.zhimg.com/akilar-candyassets/image/loading.gif; this.onerror = null;"/></a><div class="blog-slider__content"><span class="blog-slider__code">2023-06-05</span><a class="blog-slider__title" href="posts/20683.html" alt="">Linux中开发环境的搭建</a><div class="blog-slider__text">环境搭建</div><a class="blog-slider__button" href="posts/20683.html" alt="">详情 </a></div></div><div class="blog-slider__item swiper-slide" style="width: 750px; opacity: 1; transform: translate3d(0px, 0px, 0px); transition-duration: 0ms;"><a class="blog-slider__img" href="posts/63333.html" alt=""><img width="48" height="48" src="/img/10.png" alt="" onerror="this.src=https://unpkg.zhimg.com/akilar-candyassets/image/loading.gif; this.onerror = null;"/></a><div class="blog-slider__content"><span class="blog-slider__code">2023-06-03</span><a class="blog-slider__title" href="posts/63333.html" alt="">开发环境的搭建</a><div class="blog-slider__text">环境搭建</div><a class="blog-slider__button" href="posts/63333.html" alt="">详情 </a></div></div></div><div class="blog-slider__pagination swiper-pagination-clickable swiper-pagination-bullets"></div></div></div>';
if (parent_div_git !== null && typeof parent_div_git !== 'undefined') {
parent_div_git.insertAdjacentHTML("afterbegin",item_html)
}
}
var elist = 'undefined'.split(',');
var cpage = location.pathname;
var epage = 'all';
var flag = 0;
for (var i=0;i<elist.length;i++){
if (cpage.includes(elist[i])){
flag++;
}
}
if ((epage ==='all')&&(flag == 0)){
butterfly_swiper_injector_config();
}
else if (epage === cpage){
butterfly_swiper_injector_config();
}
</script><script defer src="https://npm.elemecdn.com/hexo-butterfly-swiper/lib/swiper.min.js"></script><script defer data-pjax src="https://npm.elemecdn.com/hexo-butterfly-swiper/lib/swiper_init.js"></script><!-- hexo injector body_end end --></body></html>