mirror of
https://github.com/JasonsGong/JasonsGong.github.io.git
synced 2024-11-24 09:09:35 +08:00
294 lines
41 KiB
HTML
294 lines
41 KiB
HTML
<!DOCTYPE html><html lang="zh-CN" data-theme="light"><head><meta charset="UTF-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0,viewport-fit=cover"><title>OCR-图片文字识别 | The Blog</title><meta name="author" content="Jason"><meta name="copyright" content="Jason"><meta name="format-detection" content="telephone=no"><meta name="theme-color" content="#ffffff"><meta name="description" content="一.什么是OCROCR (Optical Character Recognition,光学字符识别)是指电子设备(例如扫描仪或数码相机)检查纸上打印的字符,通过检测暗、亮的模式确定其形状,然后用字符识别方法将形状翻译成计算机文字的过程 方案 说明 百度OCR 收费 Tesseract-OCR Google维护的开源OCR引擎,支持Java,Python等语言调用 Tess4J">
|
||
<meta property="og:type" content="article">
|
||
<meta property="og:title" content="OCR-图片文字识别">
|
||
<meta property="og:url" content="https://qingling.icu/posts/58456.html">
|
||
<meta property="og:site_name" content="The Blog">
|
||
<meta property="og:description" content="一.什么是OCROCR (Optical Character Recognition,光学字符识别)是指电子设备(例如扫描仪或数码相机)检查纸上打印的字符,通过检测暗、亮的模式确定其形状,然后用字符识别方法将形状翻译成计算机文字的过程 方案 说明 百度OCR 收费 Tesseract-OCR Google维护的开源OCR引擎,支持Java,Python等语言调用 Tess4J">
|
||
<meta property="og:locale" content="zh_CN">
|
||
<meta property="og:image" content="https://qingling.icu/img/2.png">
|
||
<meta property="article:published_time" content="2023-12-08T12:36:43.000Z">
|
||
<meta property="article:modified_time" content="2023-12-08T13:23:52.255Z">
|
||
<meta property="article:author" content="Jason">
|
||
<meta property="article:tag" content="OCR">
|
||
<meta name="twitter:card" content="summary">
|
||
<meta name="twitter:image" content="https://qingling.icu/img/2.png"><link rel="shortcut icon" href="/img/%E5%9B%BE%E6%A0%87.png"><link rel="canonical" href="https://qingling.icu/posts/58456.html"><link rel="preconnect" href="//fastly.jsdelivr.net"/><link rel="preconnect" href="//busuanzi.ibruce.info"/><link rel="stylesheet" href="/css/index.css"><link rel="stylesheet" href="/cdn/icon/fontawesome-free/css/all.min.css" media="print" onload="this.media='all'"><link rel="stylesheet" href="/cdn/css/snackbar.min.css" media="print" onload="this.media='all'"><link rel="stylesheet" href="/cdn/css/fancybox.min.css" media="print" onload="this.media='all'"><script>const GLOBAL_CONFIG = {
|
||
root: '/',
|
||
algolia: undefined,
|
||
localSearch: {"path":"/search.xml","preload":true,"top_n_per_article":1,"unescape":false,"languages":{"hits_empty":"找不到您查询的内容:${query}","hits_stats":"共找到 ${hits} 篇文章"}},
|
||
translate: undefined,
|
||
noticeOutdate: undefined,
|
||
highlight: {"plugin":"highlighjs","highlightCopy":true,"highlightLang":true,"highlightHeightLimit":400},
|
||
copy: {
|
||
success: '复制成功',
|
||
error: '复制错误',
|
||
noSupport: '浏览器不支持'
|
||
},
|
||
relativeDate: {
|
||
homepage: true,
|
||
post: true
|
||
},
|
||
runtime: '天',
|
||
dateSuffix: {
|
||
just: '刚刚',
|
||
min: '分钟前',
|
||
hour: '小时前',
|
||
day: '天前',
|
||
month: '个月前'
|
||
},
|
||
copyright: undefined,
|
||
lightbox: 'mediumZoom',
|
||
Snackbar: {"chs_to_cht":"你已切换为繁体","cht_to_chs":"你已切换为简体","day_to_night":"你已切换为深色模式","night_to_day":"你已切换为浅色模式","bgLight":"#006650","bgDark":"#006650","position":"top-center"},
|
||
source: {
|
||
justifiedGallery: {
|
||
js: 'https://fastly.jsdelivr.net/npm/flickr-justified-gallery/dist/fjGallery.min.js',
|
||
css: 'https://fastly.jsdelivr.net/npm/flickr-justified-gallery/dist/fjGallery.min.css'
|
||
}
|
||
},
|
||
isPhotoFigcaption: false,
|
||
islazyload: false,
|
||
isAnchor: true,
|
||
percent: {
|
||
toc: true,
|
||
rightside: false,
|
||
},
|
||
autoDarkmode: true
|
||
}</script><script id="config-diff">var GLOBAL_CONFIG_SITE = {
|
||
title: 'OCR-图片文字识别',
|
||
isPost: true,
|
||
isHome: false,
|
||
isHighlightShrink: false,
|
||
isToc: true,
|
||
postUpdate: '2023-12-08 21:23:52'
|
||
}</script><noscript><style type="text/css">
|
||
#nav {
|
||
opacity: 1
|
||
}
|
||
.justified-gallery img {
|
||
opacity: 1
|
||
}
|
||
|
||
#recent-posts time,
|
||
#post-meta time {
|
||
display: inline !important
|
||
}
|
||
</style></noscript><script>(win=>{
|
||
win.saveToLocal = {
|
||
set: function setWithExpiry(key, value, ttl) {
|
||
if (ttl === 0) return
|
||
const now = new Date()
|
||
const expiryDay = ttl * 86400000
|
||
const item = {
|
||
value: value,
|
||
expiry: now.getTime() + expiryDay,
|
||
}
|
||
localStorage.setItem(key, JSON.stringify(item))
|
||
},
|
||
|
||
get: function getWithExpiry(key) {
|
||
const itemStr = localStorage.getItem(key)
|
||
|
||
if (!itemStr) {
|
||
return undefined
|
||
}
|
||
const item = JSON.parse(itemStr)
|
||
const now = new Date()
|
||
|
||
if (now.getTime() > item.expiry) {
|
||
localStorage.removeItem(key)
|
||
return undefined
|
||
}
|
||
return item.value
|
||
}
|
||
}
|
||
|
||
win.getScript = url => new Promise((resolve, reject) => {
|
||
const script = document.createElement('script')
|
||
script.src = url
|
||
script.async = true
|
||
script.onerror = reject
|
||
script.onload = script.onreadystatechange = function() {
|
||
const loadState = this.readyState
|
||
if (loadState && loadState !== 'loaded' && loadState !== 'complete') return
|
||
script.onload = script.onreadystatechange = null
|
||
resolve()
|
||
}
|
||
document.head.appendChild(script)
|
||
})
|
||
|
||
win.getCSS = (url,id = false) => new Promise((resolve, reject) => {
|
||
const link = document.createElement('link')
|
||
link.rel = 'stylesheet'
|
||
link.href = url
|
||
if (id) link.id = id
|
||
link.onerror = reject
|
||
link.onload = link.onreadystatechange = function() {
|
||
const loadState = this.readyState
|
||
if (loadState && loadState !== 'loaded' && loadState !== 'complete') return
|
||
link.onload = link.onreadystatechange = null
|
||
resolve()
|
||
}
|
||
document.head.appendChild(link)
|
||
})
|
||
|
||
win.activateDarkMode = function () {
|
||
document.documentElement.setAttribute('data-theme', 'dark')
|
||
if (document.querySelector('meta[name="theme-color"]') !== null) {
|
||
document.querySelector('meta[name="theme-color"]').setAttribute('content', '#0d0d0d')
|
||
}
|
||
}
|
||
win.activateLightMode = function () {
|
||
document.documentElement.setAttribute('data-theme', 'light')
|
||
if (document.querySelector('meta[name="theme-color"]') !== null) {
|
||
document.querySelector('meta[name="theme-color"]').setAttribute('content', '#ffffff')
|
||
}
|
||
}
|
||
const t = saveToLocal.get('theme')
|
||
|
||
const isDarkMode = window.matchMedia('(prefers-color-scheme: dark)').matches
|
||
const isLightMode = window.matchMedia('(prefers-color-scheme: light)').matches
|
||
const isNotSpecified = window.matchMedia('(prefers-color-scheme: no-preference)').matches
|
||
const hasNoSupport = !isDarkMode && !isLightMode && !isNotSpecified
|
||
|
||
if (t === undefined) {
|
||
if (isLightMode) activateLightMode()
|
||
else if (isDarkMode) activateDarkMode()
|
||
else if (isNotSpecified || hasNoSupport) {
|
||
const now = new Date()
|
||
const hour = now.getHours()
|
||
const isNight = hour <= 8 || hour >= 22
|
||
isNight ? activateDarkMode() : activateLightMode()
|
||
}
|
||
window.matchMedia('(prefers-color-scheme: dark)').addListener(function (e) {
|
||
if (saveToLocal.get('theme') === undefined) {
|
||
e.matches ? activateDarkMode() : activateLightMode()
|
||
}
|
||
})
|
||
} else if (t === 'light') activateLightMode()
|
||
else activateDarkMode()
|
||
|
||
const asideStatus = saveToLocal.get('aside-status')
|
||
if (asideStatus !== undefined) {
|
||
if (asideStatus === 'hide') {
|
||
document.documentElement.classList.add('hide-aside')
|
||
} else {
|
||
document.documentElement.classList.remove('hide-aside')
|
||
}
|
||
}
|
||
|
||
const detectApple = () => {
|
||
if(/iPad|iPhone|iPod|Macintosh/.test(navigator.userAgent)){
|
||
document.documentElement.classList.add('apple')
|
||
}
|
||
}
|
||
detectApple()
|
||
})(window)</script><script src="https://apps.bdimg.com/libs/jquery/2.1.4/jquery.min.js"></script><script type="text/javascript" src ="/js/welcome.js" ></script><script src="/js/sweetalert.js"></script><link rel="stylesheet" href="/css/sweetalert.css"><!-- hexo injector head_end start --><link rel="stylesheet" href="https://npm.elemecdn.com/hexo-butterfly-swiper/lib/swiper.min.css" media="print" onload="this.media='all'"><link rel="stylesheet" href="https://npm.elemecdn.com/hexo-butterfly-swiper/lib/swiperstyle.css" media="print" onload="this.media='all'"><!-- hexo injector head_end end --><meta name="generator" content="Hexo 6.3.0"></head><body><div id="sidebar"><div id="menu-mask"></div><div id="sidebar-menus"><div class="avatar-img is-center"><img src="/img/avatar.jpg" onerror="onerror=null;src='/img/loading.gif'" alt="avatar"/></div><div class="sidebar-site-data site-data is-center"><a href="/archives/"><div class="headline">文章</div><div class="length-num">60</div></a><a href="/tags/"><div class="headline">标签</div><div class="length-num">39</div></a><a href="/categories/"><div class="headline">分类</div><div class="length-num">10</div></a></div><br/><div class="menus_items"><div class="menus_item"><a class="site-page" target="_blank" rel="noopener" href="https://www.tutorialspoint.com/compile_java8_online.php"><i class="fa-fw fas fa-code"></i><span> 代码</span></a></div><div class="menus_item"><a class="site-page" href="/notice/"><i class="fa-fw fas fa-stream"></i><span> 公告</span></a></div><div class="menus_item"><a class="site-page" href="/website/"><i class="fa-fw fas fa-list"></i><span> 网址</span></a></div><div class="menus_item"><a class="site-page" href="/"><i class="fa-fw fas fa-home"></i><span> 主页</span></a></div></div></div></div><div class="post" id="body-wrap"><header class="not-top-img" id="page-header"><nav id="nav"><span id="blog-info"><a href="/" title="The Blog"><img class="site-icon" src="/img/logo.png"/><span class="site-name">The Blog</span></a></span><div id="menus"><div id="search-button"><a class="site-page social-icon search" href="javascript:void(0);"><i class="fas fa-search fa-fw"></i><span> 搜索</span></a></div><div class="menus_items"><div class="menus_item"><a class="site-page" target="_blank" rel="noopener" href="https://www.tutorialspoint.com/compile_java8_online.php"><i class="fa-fw fas fa-code"></i><span> 代码</span></a></div><div class="menus_item"><a class="site-page" href="/notice/"><i class="fa-fw fas fa-stream"></i><span> 公告</span></a></div><div class="menus_item"><a class="site-page" href="/website/"><i class="fa-fw fas fa-list"></i><span> 网址</span></a></div><div class="menus_item"><a class="site-page" href="/"><i class="fa-fw fas fa-home"></i><span> 主页</span></a></div></div><div id="toggle-menu"><a class="site-page" href="javascript:void(0);"><i class="fas fa-bars fa-fw"></i></a></div></div></nav></header><main class="layout" id="content-inner"><div id="post"><div id="post-info"><h1 class="post-title">OCR-图片文字识别</h1><div id="post-meta"><div class="meta-firstline"><span class="post-meta-date"><i class="far fa-calendar-alt fa-fw post-meta-icon"></i><span class="post-meta-label">发表于</span><time class="post-meta-date-created" datetime="2023-12-08T12:36:43.000Z" title="发表于 2023-12-08 20:36:43">2023-12-08</time><span class="post-meta-separator">|</span><i class="fas fa-history fa-fw post-meta-icon"></i><span class="post-meta-label">更新于</span><time class="post-meta-date-updated" datetime="2023-12-08T13:23:52.255Z" title="更新于 2023-12-08 21:23:52">2023-12-08</time></span><span class="post-meta-categories"><span class="post-meta-separator">|</span><i class="fas fa-inbox fa-fw post-meta-icon"></i><a class="post-meta-categories" href="/categories/%E5%90%8E%E7%AB%AF/">后端</a></span></div><div class="meta-secondline"><span class="post-meta-separator">|</span><span class="post-meta-wordcount"><i class="far fa-file-word fa-fw post-meta-icon"></i><span class="post-meta-label">字数总计:</span><span class="word-count">652</span><span class="post-meta-separator">|</span><i class="far fa-clock fa-fw post-meta-icon"></i><span class="post-meta-label">阅读时长:</span><span>2分钟</span></span><span class="post-meta-separator">|</span><span class="post-meta-pv-cv" id="" data-flag-title="OCR-图片文字识别"><i class="far fa-eye fa-fw post-meta-icon"></i><span class="post-meta-label">阅读量:</span><span id="busuanzi_value_page_pv"><i class="fa-solid fa-spinner fa-spin"></i></span></span></div></div></div><article class="post-content" id="article-container"><h2 id="一-什么是OCR"><a href="#一-什么是OCR" class="headerlink" title="一.什么是OCR"></a>一.什么是OCR</h2><p>OCR (Optical Character Recognition,光学字符识别)是指电子设备(例如扫描仪或数码相机)检查纸上打印的字符,通过检测暗、亮的模式确定其形状,然后用字符识别方法将形状翻译成计算机文字的过程</p>
|
||
<table>
|
||
<thead>
|
||
<tr>
|
||
<th><strong>方案</strong></th>
|
||
<th><strong>说明</strong></th>
|
||
</tr>
|
||
</thead>
|
||
<tbody><tr>
|
||
<td>百度OCR</td>
|
||
<td>收费</td>
|
||
</tr>
|
||
<tr>
|
||
<td>Tesseract-OCR</td>
|
||
<td>Google维护的开源OCR引擎,支持Java,Python等语言调用</td>
|
||
</tr>
|
||
<tr>
|
||
<td>Tess4J</td>
|
||
<td>封装了Tesseract-OCR ,支持Java调用</td>
|
||
</tr>
|
||
</tbody></table>
|
||
<h2 id="二-Tesseract-OCR-的特点"><a href="#二-Tesseract-OCR-的特点" class="headerlink" title="二.Tesseract-OCR 的特点"></a>二.Tesseract-OCR 的特点</h2><ul>
|
||
<li><p>Tesseract支持UTF-8编码格式,并且可以“开箱即用”地识别100多种语言</p>
|
||
</li>
|
||
<li><p>Tesseract支持多种输出格式:纯文本,hOCR(HTML),PDF等</p>
|
||
</li>
|
||
<li><p>官方建议,为了获得更好的OCR结果,最好提供给高质量的图像</p>
|
||
</li>
|
||
<li><p>Tesseract进行识别其他语言的训练,具体的训练方式请参考官方提供的文档:<a target="_blank" rel="noopener" href="https://tesseract-ocr.github.io/tessdoc/">https://tesseract-ocr.github.io/tessdoc/</a></p>
|
||
</li>
|
||
</ul>
|
||
<h2 id="三-使用案例"><a href="#三-使用案例" class="headerlink" title="三.使用案例"></a>三.使用案例</h2><h3 id="1-导入相关的依赖"><a href="#1-导入相关的依赖" class="headerlink" title="1.导入相关的依赖"></a>1.导入相关的依赖</h3><figure class="highlight xml"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br></pre></td><td class="code"><pre><span class="line"><span class="tag"><<span class="name">dependency</span>></span></span><br><span class="line"> <span class="tag"><<span class="name">groupId</span>></span>net.sourceforge.tess4j<span class="tag"></<span class="name">groupId</span>></span></span><br><span class="line"> <span class="tag"><<span class="name">artifactId</span>></span>tess4j<span class="tag"></<span class="name">artifactId</span>></span></span><br><span class="line"> <span class="tag"><<span class="name">version</span>></span>4.1.1<span class="tag"></<span class="name">version</span>></span></span><br><span class="line"><span class="tag"></<span class="name">dependency</span>></span></span><br></pre></td></tr></table></figure>
|
||
|
||
<h3 id="2-导入中文字体库"><a href="#2-导入中文字体库" class="headerlink" title="2.导入中文字体库"></a>2.导入中文字体库</h3><p>地址: <a target="_blank" rel="noopener" href="https://wwvc.lanzouj.com/iuPhc1h7j46f">https://wwvc.lanzouj.com/iuPhc1h7j46f</a></p>
|
||
<blockquote>
|
||
<p>chi_sim.traineddata</p>
|
||
</blockquote>
|
||
<p><img src="/pictures/image-20231208212352206.png" alt="image-20231208212352206"></p>
|
||
<h3 id="3-编写测试类进行测试"><a href="#3-编写测试类进行测试" class="headerlink" title="3.编写测试类进行测试"></a>3.编写测试类进行测试</h3><p><strong>待识别的图片</strong></p>
|
||
<p><img src="/pictures/image-20231208205943762.png" alt="image-20231208205943762"></p>
|
||
<p><strong>测试程序</strong></p>
|
||
<figure class="highlight java"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">package</span> com.heima;</span><br><span class="line"></span><br><span class="line"></span><br><span class="line"><span class="keyword">import</span> net.sourceforge.tess4j.Tesseract;</span><br><span class="line"><span class="keyword">import</span> net.sourceforge.tess4j.TesseractException;</span><br><span class="line"></span><br><span class="line"><span class="keyword">import</span> java.io.File;</span><br><span class="line"></span><br><span class="line"><span class="comment">/**</span></span><br><span class="line"><span class="comment"> * <span class="doctag">@author</span> Jason Gong</span></span><br><span class="line"><span class="comment"> * <span class="doctag">@version</span> 1.0</span></span><br><span class="line"><span class="comment"> * <span class="doctag">@Date</span> 2023/12/8</span></span><br><span class="line"><span class="comment"> * <span class="doctag">@Description</span></span></span><br><span class="line"><span class="comment"> */</span></span><br><span class="line"><span class="keyword">public</span> <span class="keyword">class</span> <span class="title class_">Main</span> {</span><br><span class="line"></span><br><span class="line"> <span class="comment">/**</span></span><br><span class="line"><span class="comment"> * 识别图片中的文字</span></span><br><span class="line"><span class="comment"> *</span></span><br><span class="line"><span class="comment"> * <span class="doctag">@param</span> args</span></span><br><span class="line"><span class="comment"> */</span></span><br><span class="line"> <span class="keyword">public</span> <span class="keyword">static</span> <span class="keyword">void</span> <span class="title function_">main</span><span class="params">(String[] args)</span> <span class="keyword">throws</span> TesseractException {</span><br><span class="line"> <span class="comment">//创建实例</span></span><br><span class="line"> <span class="type">Tesseract</span> <span class="variable">tesseract</span> <span class="operator">=</span> <span class="keyword">new</span> <span class="title class_">Tesseract</span>();</span><br><span class="line"> <span class="comment">//设置字体库的路径</span></span><br><span class="line"> tesseract.setDatapath(<span class="string">"C:\\Gong\\data\\tess4j"</span>);</span><br><span class="line"> <span class="comment">//设置语言</span></span><br><span class="line"> <span class="comment">//字体库为chi_sim.traineddata,语言取.前面的内容,即文件名</span></span><br><span class="line"> <span class="comment">//简体中文</span></span><br><span class="line"> tesseract.setLanguage(<span class="string">"chi_sim"</span>);</span><br><span class="line"> <span class="comment">//识别图片</span></span><br><span class="line"> <span class="type">String</span> <span class="variable">ocr</span> <span class="operator">=</span> tesseract.doOCR(<span class="keyword">new</span> <span class="title class_">File</span>(<span class="string">"C:\\Gong\\data\\tess4j\\tess4j.png"</span>));</span><br><span class="line"> <span class="comment">//打印识别的结果</span></span><br><span class="line"> <span class="comment">//打印的时候可以去除回车和tab空格</span></span><br><span class="line"> <span class="comment">//System.out.println(ocr.replaceAll("\\n|\\r","-"));</span></span><br><span class="line"> System.out.println(ocr);</span><br><span class="line"> }</span><br><span class="line">}</span><br></pre></td></tr></table></figure>
|
||
|
||
<p><strong>识别的结果</strong></p>
|
||
<p><img src="/pictures/image-20231208211200859.png" alt="image-20231208211200859"></p>
|
||
<h2 id="四-封装成工具类使用"><a href="#四-封装成工具类使用" class="headerlink" title="四.封装成工具类使用"></a>四.封装成工具类使用</h2><h3 id="1-创建工具类"><a href="#1-创建工具类" class="headerlink" title="1.创建工具类"></a>1.创建工具类</h3><figure class="highlight java"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">package</span> com.heima.common.tess4j;</span><br><span class="line"></span><br><span class="line"><span class="keyword">import</span> lombok.Getter;</span><br><span class="line"><span class="keyword">import</span> lombok.Setter;</span><br><span class="line"><span class="keyword">import</span> net.sourceforge.tess4j.ITesseract;</span><br><span class="line"><span class="keyword">import</span> net.sourceforge.tess4j.Tesseract;</span><br><span class="line"><span class="keyword">import</span> net.sourceforge.tess4j.TesseractException;</span><br><span class="line"><span class="keyword">import</span> org.springframework.boot.context.properties.ConfigurationProperties;</span><br><span class="line"><span class="keyword">import</span> org.springframework.stereotype.Component;</span><br><span class="line"></span><br><span class="line"><span class="keyword">import</span> java.awt.image.BufferedImage;</span><br><span class="line"></span><br><span class="line"><span class="meta">@Getter</span></span><br><span class="line"><span class="meta">@Setter</span></span><br><span class="line"><span class="meta">@Component</span></span><br><span class="line"><span class="meta">@ConfigurationProperties(prefix = "tess4j")</span></span><br><span class="line"><span class="keyword">public</span> <span class="keyword">class</span> <span class="title class_">Tess4jClient</span> {</span><br><span class="line"></span><br><span class="line"> <span class="keyword">private</span> String dataPath;</span><br><span class="line"> <span class="keyword">private</span> String language;</span><br><span class="line"></span><br><span class="line"> <span class="keyword">public</span> String <span class="title function_">doOCR</span><span class="params">(BufferedImage image)</span> <span class="keyword">throws</span> TesseractException {</span><br><span class="line"> <span class="comment">//创建Tesseract对象</span></span><br><span class="line"> <span class="type">ITesseract</span> <span class="variable">tesseract</span> <span class="operator">=</span> <span class="keyword">new</span> <span class="title class_">Tesseract</span>();</span><br><span class="line"> <span class="comment">//设置字体库路径</span></span><br><span class="line"> tesseract.setDatapath(dataPath);</span><br><span class="line"> <span class="comment">//中文识别</span></span><br><span class="line"> tesseract.setLanguage(language);</span><br><span class="line"> <span class="comment">//执行ocr识别</span></span><br><span class="line"> <span class="type">String</span> <span class="variable">result</span> <span class="operator">=</span> tesseract.doOCR(image);</span><br><span class="line"> <span class="comment">//替换回车和tal键 使结果为一行</span></span><br><span class="line"> result = result.replaceAll(<span class="string">"\\r|\\n"</span>, <span class="string">"-"</span>).replaceAll(<span class="string">" "</span>, <span class="string">""</span>);</span><br><span class="line"> <span class="keyword">return</span> result;</span><br><span class="line"> }</span><br><span class="line"></span><br><span class="line">}</span><br></pre></td></tr></table></figure>
|
||
|
||
<h3 id="2-配置文件中添加配置"><a href="#2-配置文件中添加配置" class="headerlink" title="2.配置文件中添加配置"></a>2.配置文件中添加配置</h3><figure class="highlight yaml"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><span class="line"><span class="attr">tess4j:</span></span><br><span class="line"> <span class="attr">data-path:</span> <span class="string">C:\workspace\tessdata</span> <span class="comment"># 字体库的路径</span></span><br><span class="line"> <span class="attr">language:</span> <span class="string">chi_sim</span> <span class="comment"># 识别的字体d</span></span><br></pre></td></tr></table></figure>
|
||
|
||
</article><div class="tag_share"><div class="post-meta__tag-list"><a class="post-meta__tags" href="/tags/OCR/">OCR</a></div><div class="post_share"><div class="social-share" data-image="/img/2.png" data-sites="wechat,weibo,qq"></div><link rel="stylesheet" href="/cdn/css/share.min.css" media="print" onload="this.media='all'"><script src="/cdn/js/social-share.min.js" defer></script></div></div><div class="post-reward"><div class="reward-button"><i class="fas fa-qrcode"></i> 打赏</div><div class="reward-main"><ul class="reward-all"><li class="reward-item"><a href="/img/wechat.jpg" target="_blank"><img class="post-qr-code-img" src="/img/wechat.jpg" alt="微信"/></a><div class="post-qr-code-desc">微信</div></li><li class="reward-item"><a href="/img/alipay.jpg" target="_blank"><img class="post-qr-code-img" src="/img/alipay.jpg" alt="支付宝"/></a><div class="post-qr-code-desc">支付宝</div></li></ul></div></div><br/><div id="post-comment"><div class="comment-head"><div class="comment-headline"><i class="far fa-comment-alt fa-fw"></i><span> 评论</span></div></div><div class="comment-wrap"><div><div id="gitalk-container"></div></div></div></div></div><div class="aside-content" id="aside-content"><div class="card-widget card-info"><div class="is-center"><div class="avatar-img"><img src="/img/avatar.jpg" onerror="this.onerror=null;this.src='/img/loading.gif'" alt="avatar"/></div><div class="author-info__name">Jason</div><div class="author-info__description">Debug the World!</div></div><div class="card-info-data site-data is-center"><a href="/archives/"><div class="headline">文章</div><div class="length-num">60</div></a><a href="/tags/"><div class="headline">标签</div><div class="length-num">39</div></a><a href="/categories/"><div class="headline">分类</div><div class="length-num">10</div></a></div><a id="card-info-btn"><i class="fab fa-microsoft"></i><span>Ctrl + D 收藏</span></a><div class="card-info-social-icons is-center"><a class="social-icon" href="https://github.com/JasonsGong" target="_blank" title="Github"><i class="fab fa-github"></i></a><a class="social-icon" href="tencent://AddContact/?fromId=45&fromSubId=1&subcmd=all&uin=2602183349&website=www.oicqzone.com" target="_blank" title="QQ"><i class="fab fa-qq"></i></a><a class="social-icon" href="mailto:2602183349@qq.com" target="_blank" title="Email"><i class="fas fa-envelope-open-text"></i></a><a class="social-icon" href="https://github.com/JasonsGong?tab=repositories" target="_blank" title="代码仓库"><i class="fas fa-database"></i></a></div></div><div class="card-widget card-announcement"><div class="item-headline"><i class="fas fa-bullhorn fa-shake"></i><span>公告</span></div><div class="announcement_content">本网站是静态网站,更新页面资源请使用Ctrl+F5;若网站内文章对你有帮助,请使用Ctrl+D收藏该网站!</div></div><div class="sticky_layout"><div class="card-widget" id="card-toc"><div class="item-headline"><i class="fas fa-stream"></i><span>目录</span><span class="toc-percentage"></span></div><div class="toc-content is-expand"><ol class="toc"><li class="toc-item toc-level-2"><a class="toc-link" href="#%E4%B8%80-%E4%BB%80%E4%B9%88%E6%98%AFOCR"><span class="toc-text">一.什么是OCR</span></a></li><li class="toc-item toc-level-2"><a class="toc-link" href="#%E4%BA%8C-Tesseract-OCR-%E7%9A%84%E7%89%B9%E7%82%B9"><span class="toc-text">二.Tesseract-OCR 的特点</span></a></li><li class="toc-item toc-level-2"><a class="toc-link" href="#%E4%B8%89-%E4%BD%BF%E7%94%A8%E6%A1%88%E4%BE%8B"><span class="toc-text">三.使用案例</span></a><ol class="toc-child"><li class="toc-item toc-level-3"><a class="toc-link" href="#1-%E5%AF%BC%E5%85%A5%E7%9B%B8%E5%85%B3%E7%9A%84%E4%BE%9D%E8%B5%96"><span class="toc-text">1.导入相关的依赖</span></a></li><li class="toc-item toc-level-3"><a class="toc-link" href="#2-%E5%AF%BC%E5%85%A5%E4%B8%AD%E6%96%87%E5%AD%97%E4%BD%93%E5%BA%93"><span class="toc-text">2.导入中文字体库</span></a></li><li class="toc-item toc-level-3"><a class="toc-link" href="#3-%E7%BC%96%E5%86%99%E6%B5%8B%E8%AF%95%E7%B1%BB%E8%BF%9B%E8%A1%8C%E6%B5%8B%E8%AF%95"><span class="toc-text">3.编写测试类进行测试</span></a></li></ol></li><li class="toc-item toc-level-2"><a class="toc-link" href="#%E5%9B%9B-%E5%B0%81%E8%A3%85%E6%88%90%E5%B7%A5%E5%85%B7%E7%B1%BB%E4%BD%BF%E7%94%A8"><span class="toc-text">四.封装成工具类使用</span></a><ol class="toc-child"><li class="toc-item toc-level-3"><a class="toc-link" href="#1-%E5%88%9B%E5%BB%BA%E5%B7%A5%E5%85%B7%E7%B1%BB"><span class="toc-text">1.创建工具类</span></a></li><li class="toc-item toc-level-3"><a class="toc-link" href="#2-%E9%85%8D%E7%BD%AE%E6%96%87%E4%BB%B6%E4%B8%AD%E6%B7%BB%E5%8A%A0%E9%85%8D%E7%BD%AE"><span class="toc-text">2.配置文件中添加配置</span></a></li></ol></li></ol></div></div><div class="card-widget card-recent-post"><div class="item-headline"><i class="fas fa-history"></i><span>最近更新</span></div><div class="aside-list"><div class="aside-list-item"><a class="thumbnail" href="/posts/8957.html" title="Linux从入门到进阶"><img src="/img/8.png" onerror="this.onerror=null;this.src='/img/404.jpg'" alt="Linux从入门到进阶"/></a><div class="content"><a class="title" href="/posts/8957.html" title="Linux从入门到进阶">Linux从入门到进阶</a><time datetime="2024-05-10T05:31:11.691Z" title="更新于 2024-05-10 13:31:11">2024-05-10</time></div></div><div class="aside-list-item"><a class="thumbnail" href="/posts/35630.html" title="接口测试工具"><img src="/img/2.png" onerror="this.onerror=null;this.src='/img/404.jpg'" alt="接口测试工具"/></a><div class="content"><a class="title" href="/posts/35630.html" title="接口测试工具">接口测试工具</a><time datetime="2024-05-10T05:31:11.686Z" title="更新于 2024-05-10 13:31:11">2024-05-10</time></div></div><div class="aside-list-item"><a class="thumbnail" href="/posts/32246.html" title="SpringBoot中整合Swagger2"><img src="/img/3.png" onerror="this.onerror=null;this.src='/img/404.jpg'" alt="SpringBoot中整合Swagger2"/></a><div class="content"><a class="title" href="/posts/32246.html" title="SpringBoot中整合Swagger2">SpringBoot中整合Swagger2</a><time datetime="2024-05-10T05:31:11.681Z" title="更新于 2024-05-10 13:31:11">2024-05-10</time></div></div><div class="aside-list-item"><a class="thumbnail" href="/posts/19306.html" title="Docker容器化技术"><img src="/img/1.png" onerror="this.onerror=null;this.src='/img/404.jpg'" alt="Docker容器化技术"/></a><div class="content"><a class="title" href="/posts/19306.html" title="Docker容器化技术">Docker容器化技术</a><time datetime="2024-05-10T05:31:11.675Z" title="更新于 2024-05-10 13:31:11">2024-05-10</time></div></div><div class="aside-list-item"><a class="thumbnail" href="/posts/20683.html" title="Linux中开发环境的搭建"><img src="/img/8.png" onerror="this.onerror=null;this.src='/img/404.jpg'" alt="Linux中开发环境的搭建"/></a><div class="content"><a class="title" href="/posts/20683.html" title="Linux中开发环境的搭建">Linux中开发环境的搭建</a><time datetime="2024-05-10T05:31:11.669Z" title="更新于 2024-05-10 13:31:11">2024-05-10</time></div></div></div></div></div></div></main><footer id="footer"><div id="footer-wrap"></div></footer></div><div id="rightside"><div id="rightside-config-hide"><button id="readmode" type="button" title="阅读模式"><i class="fas fa-book-open"></i></button><button id="darkmode" type="button" title="浅色和深色模式转换"><i class="fas fa-adjust"></i></button><button id="hide-aside-btn" type="button" title="单栏和双栏切换"><i class="fas fa-arrows-alt-h"></i></button></div><div id="rightside-config-show"><button id="rightside_config" type="button" title="设置"><i class="fas fa-cog fa-spin"></i></button><button class="close" id="mobile-toc-button" type="button" title="目录"><i class="fas fa-list-ul"></i></button><a id="to_comment" href="#post-comment" title="直达评论"><i class="fas fa-comment-alt"></i></a><button id="go-up" type="button" title="回到顶部"><span class="scroll-percent"></span><i class="fas fa-arrow-up"></i></button></div></div><div><script src="/js/utils.js"></script><script src="/js/main.js"></script><script src="/cdn/js/medium-zoom.min.js"></script><script src="/cdn/js/instantpage.min.js" type="module"></script><script src="/cdn/js/snackbar.min.js"></script><div class="js-pjax"><script>function loadGitalk () {
|
||
function initGitalk () {
|
||
var gitalk = new Gitalk(Object.assign({
|
||
clientID: '00fb27b1e484536359c2',
|
||
clientSecret: 'be41a12281c68b6e228d1a27e8d08aeb91541145',
|
||
repo: 'BlogComment',
|
||
owner: 'JasonsGong',
|
||
admin: ['JasonsGong'],
|
||
id: 'df0537ff8bb88a23d412a84e879cee27',
|
||
updateCountCallback: commentCount
|
||
},null))
|
||
|
||
gitalk.render('gitalk-container')
|
||
}
|
||
|
||
if (typeof Gitalk === 'function') initGitalk()
|
||
else {
|
||
getCSS('/cdn/css/gitalk.min.css')
|
||
getScript('/cdn/js/gitalk.min.js').then(initGitalk)
|
||
}
|
||
}
|
||
|
||
function commentCount(n){
|
||
let isCommentCount = document.querySelector('#post-meta .gitalk-comment-count')
|
||
if (isCommentCount) {
|
||
isCommentCount.textContent= n
|
||
}
|
||
}
|
||
|
||
if ('Gitalk' === 'Gitalk' || !true) {
|
||
if (true) btf.loadComment(document.getElementById('gitalk-container'), loadGitalk)
|
||
else loadGitalk()
|
||
} else {
|
||
function loadOtherComment () {
|
||
loadGitalk()
|
||
}
|
||
}</script></div><script async data-pjax src="//busuanzi.ibruce.info/busuanzi/2.3/busuanzi.pure.mini.js"></script><div id="local-search"><div class="search-dialog"><nav class="search-nav"><span class="search-dialog-title">搜索</span><span id="loading-status"></span><button class="search-close-button"><i class="fas fa-times"></i></button></nav><div class="is-center" id="loading-database"><i class="fas fa-spinner fa-pulse"></i><span> 数据库加载中</span></div><div class="search-wrap"><div id="local-search-input"><div class="local-search-box"><input class="local-search-box--input" placeholder="搜索文章" type="text"/></div></div><br/><div class="no-result" id="local-search-results"></div><div id="local-search-stats-wrap"></div></div></div><div id="search-mask"></div><script src="/js/search/local-search.js"></script></div></div><!-- hexo injector body_end start --><script data-pjax>
|
||
function butterfly_swiper_injector_config(){
|
||
var parent_div_git = document.getElementById('recent-posts');
|
||
var item_html = '<div class="recent-post-item" style="height: auto;width: 100%"><div class="blog-slider swiper-container-fade swiper-container-horizontal" id="swiper_container"><div class="blog-slider__wrp swiper-wrapper" style="transition-duration: 0ms;"><div class="blog-slider__item swiper-slide" style="width: 750px; opacity: 1; transform: translate3d(0px, 0px, 0px); transition-duration: 0ms;"><a class="blog-slider__img" href="posts/19306.html" alt=""><img width="48" height="48" src="/img/1.png" alt="" onerror="this.src=https://unpkg.zhimg.com/akilar-candyassets/image/loading.gif; this.onerror = null;"/></a><div class="blog-slider__content"><span class="blog-slider__code">2023-04-21</span><a class="blog-slider__title" href="posts/19306.html" alt="">Docker容器化技术</a><div class="blog-slider__text">Docker</div><a class="blog-slider__button" href="posts/19306.html" alt="">详情 </a></div></div><div class="blog-slider__item swiper-slide" style="width: 750px; opacity: 1; transform: translate3d(0px, 0px, 0px); transition-duration: 0ms;"><a class="blog-slider__img" href="posts/47003.html" alt=""><img width="48" height="48" src="/img/5.png" alt="" onerror="this.src=https://unpkg.zhimg.com/akilar-candyassets/image/loading.gif; this.onerror = null;"/></a><div class="blog-slider__content"><span class="blog-slider__code">2023-03-10</span><a class="blog-slider__title" href="posts/47003.html" alt="">常用正则表达式大全</a><div class="blog-slider__text">正则表达式</div><a class="blog-slider__button" href="posts/47003.html" alt="">详情 </a></div></div><div class="blog-slider__item swiper-slide" style="width: 750px; opacity: 1; transform: translate3d(0px, 0px, 0px); transition-duration: 0ms;"><a class="blog-slider__img" href="posts/20683.html" alt=""><img width="48" height="48" src="/img/8.png" alt="" onerror="this.src=https://unpkg.zhimg.com/akilar-candyassets/image/loading.gif; this.onerror = null;"/></a><div class="blog-slider__content"><span class="blog-slider__code">2023-06-05</span><a class="blog-slider__title" href="posts/20683.html" alt="">Linux中开发环境的搭建</a><div class="blog-slider__text">环境搭建</div><a class="blog-slider__button" href="posts/20683.html" alt="">详情 </a></div></div><div class="blog-slider__item swiper-slide" style="width: 750px; opacity: 1; transform: translate3d(0px, 0px, 0px); transition-duration: 0ms;"><a class="blog-slider__img" href="posts/63333.html" alt=""><img width="48" height="48" src="/img/10.png" alt="" onerror="this.src=https://unpkg.zhimg.com/akilar-candyassets/image/loading.gif; this.onerror = null;"/></a><div class="blog-slider__content"><span class="blog-slider__code">2023-06-03</span><a class="blog-slider__title" href="posts/63333.html" alt="">开发环境的搭建</a><div class="blog-slider__text">环境搭建</div><a class="blog-slider__button" href="posts/63333.html" alt="">详情 </a></div></div></div><div class="blog-slider__pagination swiper-pagination-clickable swiper-pagination-bullets"></div></div></div>';
|
||
if (parent_div_git !== null && typeof parent_div_git !== 'undefined') {
|
||
parent_div_git.insertAdjacentHTML("afterbegin",item_html)
|
||
}
|
||
}
|
||
var elist = 'undefined'.split(',');
|
||
var cpage = location.pathname;
|
||
var epage = 'all';
|
||
var flag = 0;
|
||
|
||
for (var i=0;i<elist.length;i++){
|
||
if (cpage.includes(elist[i])){
|
||
flag++;
|
||
}
|
||
}
|
||
|
||
if ((epage ==='all')&&(flag == 0)){
|
||
butterfly_swiper_injector_config();
|
||
}
|
||
else if (epage === cpage){
|
||
butterfly_swiper_injector_config();
|
||
}
|
||
</script><script defer src="https://npm.elemecdn.com/hexo-butterfly-swiper/lib/swiper.min.js"></script><script defer data-pjax src="https://npm.elemecdn.com/hexo-butterfly-swiper/lib/swiper_init.js"></script><!-- hexo injector body_end end --></body></html> |