Added ability to automatically fix defined recognition errors.
This commit is contained in:
parent
fd78dde837
commit
0fd694787a
@ -8,10 +8,11 @@
|
||||
#include "Settings.h"
|
||||
#include "ImageProcessing.h"
|
||||
#include "StAssert.h"
|
||||
#include "RecognizerHelper.h"
|
||||
|
||||
Recognizer::Recognizer (QObject *parent) :
|
||||
QObject (parent),
|
||||
engine_ (NULL), imageScale_ (0) {
|
||||
engine_ (NULL), recognizerHelper_ (new RecognizerHelper), imageScale_ (0) {
|
||||
applySettings ();
|
||||
}
|
||||
|
||||
@ -19,6 +20,8 @@ void Recognizer::applySettings () {
|
||||
QSettings settings;
|
||||
settings.beginGroup (settings_names::recogntionGroup);
|
||||
|
||||
recognizerHelper_->load ();
|
||||
|
||||
tessDataDir_ = settings.value (settings_names::tessDataPlace,
|
||||
settings_values::tessDataPlace).toString ();
|
||||
if (tessDataDir_.right (1) != "/") {
|
||||
@ -57,8 +60,8 @@ void Recognizer::recognize (ProcessingItem item) {
|
||||
bool isCustomLanguage = (!item.ocrLanguage.isEmpty () &&
|
||||
item.ocrLanguage != ocrLanguage_);
|
||||
tesseract::TessBaseAPI *engine = (isCustomLanguage) ? NULL : engine_;
|
||||
QString language = (isCustomLanguage) ? item.ocrLanguage : ocrLanguage_;
|
||||
if (engine == NULL) {
|
||||
QString language = (isCustomLanguage) ? item.ocrLanguage : ocrLanguage_;
|
||||
if (!initEngine (engine, language)) {
|
||||
return;
|
||||
}
|
||||
@ -78,7 +81,7 @@ void Recognizer::recognize (ProcessingItem item) {
|
||||
}
|
||||
|
||||
if (!result.isEmpty ()) {
|
||||
item.recognized = result;
|
||||
item.recognized = recognizerHelper_->substitute (result, language);
|
||||
emit recognized (item);
|
||||
}
|
||||
else {
|
||||
|
@ -9,6 +9,7 @@
|
||||
namespace tesseract {
|
||||
class TessBaseAPI;
|
||||
}
|
||||
class RecognizerHelper;
|
||||
|
||||
class Recognizer : public QObject {
|
||||
Q_OBJECT
|
||||
@ -29,6 +30,7 @@ class Recognizer : public QObject {
|
||||
|
||||
private:
|
||||
tesseract::TessBaseAPI *engine_;
|
||||
RecognizerHelper *recognizerHelper_;
|
||||
|
||||
QString tessDataDir_;
|
||||
QString ocrLanguage_;
|
||||
|
78
RecognizerHelper.cpp
Normal file
78
RecognizerHelper.cpp
Normal file
@ -0,0 +1,78 @@
|
||||
#include <QFile>
|
||||
|
||||
#include "RecognizerHelper.h"
|
||||
|
||||
RecognizerHelper::RecognizerHelper ()
|
||||
: fileName_ ("subs.csv") {
|
||||
}
|
||||
|
||||
void RecognizerHelper::load () {
|
||||
subs_.clear ();
|
||||
QFile f (fileName_);
|
||||
if (!f.open (QFile::ReadOnly)) {
|
||||
return;
|
||||
}
|
||||
QByteArray data = f.readAll ();
|
||||
f.close ();
|
||||
QStringList lines = QString::fromUtf8 (data).split ('\n', QString::SkipEmptyParts);
|
||||
foreach (const QString &line, lines) {
|
||||
QStringList parts = line.mid (1, line.size () - 2).split ("\",\""); // remove "
|
||||
if (parts.size () < 3) {
|
||||
continue;
|
||||
}
|
||||
subs_.append (Sub (parts[0], parts[1], parts[2]));
|
||||
}
|
||||
}
|
||||
|
||||
void RecognizerHelper::save () {
|
||||
QFile f (fileName_);
|
||||
if (!f.open (QFile::WriteOnly)) {
|
||||
return;
|
||||
}
|
||||
foreach (const Sub &sub, subs_) {
|
||||
QStringList parts = QStringList () << sub.language << sub.source << sub.target;
|
||||
QString line = "\"" + parts.join ("\",\"") + "\"\n";
|
||||
f.write (line.toUtf8 ());
|
||||
}
|
||||
f.close ();
|
||||
}
|
||||
|
||||
QString RecognizerHelper::substitute (const QString &source, const QString &language) const {
|
||||
QString result = source;
|
||||
while (true) {
|
||||
int bestMatchIndex = -1;
|
||||
int bestMatchLen = 0;
|
||||
int index = -1;
|
||||
foreach (const Sub &sub, subs_) {
|
||||
++index;
|
||||
if (sub.language != language || !result.contains (sub.source)) {
|
||||
continue;
|
||||
}
|
||||
int len = sub.source.length ();
|
||||
if (len > bestMatchLen) {
|
||||
bestMatchLen = len;
|
||||
bestMatchIndex = index;
|
||||
}
|
||||
}
|
||||
if (bestMatchIndex > -1) {
|
||||
const Sub &sub = subs_.at (bestMatchIndex);
|
||||
result.replace (sub.source, sub.target);
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
const RecognizerHelper::Subs &RecognizerHelper::subs () const {
|
||||
return subs_;
|
||||
}
|
||||
|
||||
void RecognizerHelper::setSubs (const Subs &subs) {
|
||||
subs_ = subs;
|
||||
}
|
||||
|
||||
RecognizerHelper::Sub::Sub (const QString &language, const QString &source, const QString &target)
|
||||
: language (language), source (source), target (target) {
|
||||
}
|
33
RecognizerHelper.h
Normal file
33
RecognizerHelper.h
Normal file
@ -0,0 +1,33 @@
|
||||
#ifndef RECOGNIZERHELPER_H
|
||||
#define RECOGNIZERHELPER_H
|
||||
|
||||
#include <QString>
|
||||
|
||||
class RecognizerHelper {
|
||||
public:
|
||||
struct Sub {
|
||||
Sub (const QString &language = QString (), const QString &source = QString (),
|
||||
const QString &target = QString ());
|
||||
QString language;
|
||||
QString source;
|
||||
QString target;
|
||||
};
|
||||
typedef QList<Sub> Subs;
|
||||
|
||||
public:
|
||||
RecognizerHelper ();
|
||||
|
||||
void load ();
|
||||
void save ();
|
||||
|
||||
QString substitute (const QString &source, const QString& language) const;
|
||||
|
||||
const Subs &subs () const;
|
||||
void setSubs (const Subs &subs);
|
||||
|
||||
private:
|
||||
QString fileName_;
|
||||
Subs subs_;
|
||||
};
|
||||
|
||||
#endif // RECOGNIZERHELPER_H
|
@ -39,7 +39,8 @@ SOURCES += main.cpp\
|
||||
LanguageHelper.cpp \
|
||||
WebTranslator.cpp \
|
||||
WebTranslatorProxy.cpp \
|
||||
TranslatorHelper.cpp
|
||||
TranslatorHelper.cpp \
|
||||
RecognizerHelper.cpp
|
||||
|
||||
HEADERS += \
|
||||
Manager.h \
|
||||
@ -55,7 +56,8 @@ HEADERS += \
|
||||
WebTranslator.h \
|
||||
WebTranslatorProxy.h \
|
||||
StAssert.h \
|
||||
TranslatorHelper.h
|
||||
TranslatorHelper.h \
|
||||
RecognizerHelper.h
|
||||
|
||||
FORMS += \
|
||||
SettingsEditor.ui \
|
||||
|
@ -2,6 +2,8 @@
|
||||
#include "ui_SettingsEditor.h"
|
||||
#include "LanguageHelper.h"
|
||||
#include "TranslatorHelper.h"
|
||||
#include "RecognizerHelper.h"
|
||||
#include "StAssert.h"
|
||||
|
||||
#include <QSettings>
|
||||
#include <QFileDialog>
|
||||
@ -11,7 +13,8 @@
|
||||
|
||||
SettingsEditor::SettingsEditor (const LanguageHelper &dictionary, QWidget *parent) :
|
||||
QDialog (parent),
|
||||
ui (new Ui::SettingsEditor), translatorHelper_ (new TranslatorHelper), dictionary_ (dictionary),
|
||||
ui (new Ui::SettingsEditor), translatorHelper_ (new TranslatorHelper),
|
||||
recognizerHelper_ (new RecognizerHelper), dictionary_ (dictionary),
|
||||
buttonGroup_ (new QButtonGroup (this)) {
|
||||
ui->setupUi (this);
|
||||
|
||||
@ -22,6 +25,9 @@ SettingsEditor::SettingsEditor (const LanguageHelper &dictionary, QWidget *paren
|
||||
connect (ui->tessdataEdit, SIGNAL (textChanged (const QString &)),
|
||||
SLOT (initOcrLangCombo (const QString &)));
|
||||
|
||||
connect (ui->recognizerFixTable, SIGNAL (itemChanged (QTableWidgetItem *)),
|
||||
SLOT (recognizerFixTableItemChanged (QTableWidgetItem *)));
|
||||
|
||||
ui->translateLangCombo->addItems (dictionary_.translateLanguagesUi ());
|
||||
loadSettings ();
|
||||
loadState ();
|
||||
@ -29,6 +35,7 @@ SettingsEditor::SettingsEditor (const LanguageHelper &dictionary, QWidget *paren
|
||||
|
||||
SettingsEditor::~SettingsEditor () {
|
||||
saveState ();
|
||||
delete recognizerHelper_;
|
||||
delete translatorHelper_;
|
||||
delete ui;
|
||||
}
|
||||
@ -58,8 +65,29 @@ void SettingsEditor::saveSettings () const {
|
||||
QString ocrLanguageVal = dictionary_.ocrUiToCode (ui->ocrLangCombo->currentText ());
|
||||
settings.setValue (ocrLanguage, ocrLanguageVal);
|
||||
settings.setValue (imageScale, ui->imageScaleSpin->value ());
|
||||
settings.endGroup ();
|
||||
|
||||
{ //Recognizer substitutions
|
||||
RecognizerHelper::Subs subs;
|
||||
QTableWidget *t = ui->recognizerFixTable; // Shortcut
|
||||
for (int i = 0, end = t->rowCount () - 1; i < end; ++i) {
|
||||
QComboBox *combo = static_cast<QComboBox *>(t->cellWidget (i, SubsColLanguage));
|
||||
QString langUi = combo->currentText ();
|
||||
RecognizerHelper::Sub sub;
|
||||
sub.language = dictionary_.ocrUiToCode (langUi);
|
||||
#define GET(COL) (t->item (i, COL) ? t->item (i, COL)->text () : QString ())
|
||||
sub.source = GET (SubsColSource);
|
||||
sub.target = GET (SubsColTarget);
|
||||
#undef GET
|
||||
if (langUi.isEmpty () || sub.language == langUi || sub.source.isEmpty ()) {
|
||||
continue;
|
||||
}
|
||||
subs.append (sub);
|
||||
}
|
||||
recognizerHelper_->setSubs (subs);
|
||||
recognizerHelper_->save ();
|
||||
}
|
||||
|
||||
settings.endGroup ();
|
||||
|
||||
settings.beginGroup (translationGroup);
|
||||
settings.setValue (doTranslation, ui->doTranslationCheck->isChecked ());
|
||||
@ -115,8 +143,28 @@ void SettingsEditor::loadSettings () {
|
||||
QString ocrLanguage = dictionary_.ocrCodeToUi (GET (ocrLanguage).toString ());
|
||||
ui->ocrLangCombo->setCurrentText (ocrLanguage);
|
||||
ui->imageScaleSpin->setValue (GET (imageScale).toInt ());
|
||||
|
||||
{//Recognizer substitutions
|
||||
recognizerHelper_->load ();
|
||||
RecognizerHelper::Subs subs = recognizerHelper_->subs ();
|
||||
ui->recognizerFixTable->setRowCount (subs.size ());
|
||||
int row = 0;
|
||||
foreach (const RecognizerHelper::Sub & sub, subs) {
|
||||
if (!initSubsTableRow (row, sub.language)) {
|
||||
continue;
|
||||
}
|
||||
ui->recognizerFixTable->setItem (row, SubsColSource, new QTableWidgetItem (sub.source));
|
||||
ui->recognizerFixTable->setItem (row, SubsColTarget, new QTableWidgetItem (sub.target));
|
||||
++row;
|
||||
}
|
||||
ui->recognizerFixTable->setRowCount (row + 1);
|
||||
initSubsTableRow (row);
|
||||
ui->recognizerFixTable->resizeColumnsToContents ();
|
||||
}
|
||||
|
||||
settings.endGroup ();
|
||||
|
||||
|
||||
settings.beginGroup (settings_names::translationGroup);
|
||||
ui->doTranslationCheck->setChecked (GET (doTranslation).toBool ());
|
||||
ui->translatorDebugCheck->setChecked (GET (translationDebugMode).toBool ());
|
||||
@ -138,6 +186,23 @@ void SettingsEditor::loadSettings () {
|
||||
#undef GET
|
||||
}
|
||||
|
||||
bool SettingsEditor::initSubsTableRow (int row, const QString &languageCode) {
|
||||
QString lang = dictionary_.ocrCodeToUi (languageCode);
|
||||
if (!languageCode.isEmpty () && lang == languageCode) {
|
||||
return false;
|
||||
}
|
||||
QComboBox *langCombo = new QComboBox (ui->recognizerFixTable);
|
||||
langCombo->setModel (ui->ocrLangCombo->model ());
|
||||
if (!languageCode.isEmpty ()) {
|
||||
langCombo->setCurrentText (lang);
|
||||
}
|
||||
else {
|
||||
langCombo->setCurrentIndex (ui->ocrLangCombo->currentIndex ());
|
||||
}
|
||||
ui->recognizerFixTable->setCellWidget (row, SubsColLanguage, langCombo);
|
||||
return true;
|
||||
}
|
||||
|
||||
void SettingsEditor::saveState () const {
|
||||
QSettings settings;
|
||||
settings.beginGroup (settings_names::guiGroup);
|
||||
@ -154,3 +219,21 @@ void SettingsEditor::initOcrLangCombo (const QString &path) {
|
||||
ui->ocrLangCombo->clear ();
|
||||
ui->ocrLangCombo->addItems (dictionary_.availableOcrLanguagesUi (path));
|
||||
}
|
||||
|
||||
void SettingsEditor::recognizerFixTableItemChanged (QTableWidgetItem *item) {
|
||||
ST_ASSERT (item->column () < 3);
|
||||
int row = item->row ();
|
||||
QTableWidget *t = ui->recognizerFixTable;
|
||||
#define CHECK(COL) (!t->item (row, COL) || t->item (row, COL)->text ().isEmpty ())
|
||||
bool isRowEmpty = CHECK (SubsColSource) && CHECK (SubsColTarget);
|
||||
#undef CHECK
|
||||
int lastRow = ui->recognizerFixTable->rowCount () - 1;
|
||||
if (isRowEmpty && row != lastRow) {
|
||||
ui->recognizerFixTable->removeRow (row);
|
||||
}
|
||||
else if (!isRowEmpty && row == lastRow) {
|
||||
int newRow = lastRow + 1;
|
||||
ui->recognizerFixTable->insertRow (newRow);
|
||||
initSubsTableRow (newRow);
|
||||
}
|
||||
}
|
||||
|
@ -5,15 +5,21 @@
|
||||
#include <QButtonGroup>
|
||||
#include <QMap>
|
||||
|
||||
class QTableWidgetItem;
|
||||
namespace Ui {
|
||||
class SettingsEditor;
|
||||
}
|
||||
class LanguageHelper;
|
||||
class TranslatorHelper;
|
||||
class RecognizerHelper;
|
||||
|
||||
class SettingsEditor : public QDialog {
|
||||
Q_OBJECT
|
||||
|
||||
enum SubsCol {
|
||||
SubsColLanguage = 0, SubsColSource, SubsColTarget
|
||||
};
|
||||
|
||||
public:
|
||||
explicit SettingsEditor (const LanguageHelper &dictionary, QWidget *parent = 0);
|
||||
~SettingsEditor ();
|
||||
@ -28,15 +34,18 @@ class SettingsEditor : public QDialog {
|
||||
void saveSettings () const;
|
||||
void openTessdataDialog ();
|
||||
void initOcrLangCombo (const QString &path);
|
||||
void recognizerFixTableItemChanged (QTableWidgetItem *item);
|
||||
|
||||
private:
|
||||
void loadSettings ();
|
||||
void saveState () const;
|
||||
void loadState ();
|
||||
bool initSubsTableRow (int row, const QString &languageCode = QString ());
|
||||
|
||||
private:
|
||||
Ui::SettingsEditor *ui;
|
||||
TranslatorHelper *translatorHelper_;
|
||||
RecognizerHelper *recognizerHelper_;
|
||||
const LanguageHelper &dictionary_;
|
||||
QButtonGroup *buttonGroup_;
|
||||
};
|
||||
|
@ -140,7 +140,7 @@
|
||||
<attribute name="title">
|
||||
<string>Распознавание</string>
|
||||
</attribute>
|
||||
<layout class="QGridLayout" name="gridLayout_3">
|
||||
<layout class="QGridLayout" name="gridLayout_2">
|
||||
<item row="0" column="0">
|
||||
<widget class="QLabel" name="label_2">
|
||||
<property name="toolTip">
|
||||
@ -196,18 +196,43 @@
|
||||
<item row="2" column="1" colspan="2">
|
||||
<widget class="QSpinBox" name="imageScaleSpin"/>
|
||||
</item>
|
||||
<item row="3" column="0">
|
||||
<spacer name="verticalSpacer_3">
|
||||
<property name="orientation">
|
||||
<enum>Qt::Vertical</enum>
|
||||
<item row="3" column="0" colspan="3">
|
||||
<widget class="QLabel" name="label_11">
|
||||
<property name="toolTip">
|
||||
<string><html><head/><body><p>Символы, регулярно распознаваемые с ошибками. При обнаружении будут заменены на указанные.</p></body></html></string>
|
||||
</property>
|
||||
<property name="sizeHint" stdset="0">
|
||||
<size>
|
||||
<width>20</width>
|
||||
<height>132</height>
|
||||
</size>
|
||||
<property name="text">
|
||||
<string>Исправления:</string>
|
||||
</property>
|
||||
</spacer>
|
||||
<property name="alignment">
|
||||
<set>Qt::AlignCenter</set>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="4" column="0" colspan="3">
|
||||
<widget class="QTableWidget" name="recognizerFixTable">
|
||||
<property name="selectionBehavior">
|
||||
<enum>QAbstractItemView::SelectRows</enum>
|
||||
</property>
|
||||
<property name="sortingEnabled">
|
||||
<bool>true</bool>
|
||||
</property>
|
||||
<column>
|
||||
<property name="text">
|
||||
<string>Язык</string>
|
||||
</property>
|
||||
</column>
|
||||
<column>
|
||||
<property name="text">
|
||||
<string>Исходный текст</string>
|
||||
</property>
|
||||
</column>
|
||||
<column>
|
||||
<property name="text">
|
||||
<string>Исправление</string>
|
||||
</property>
|
||||
</column>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
|
Loading…
Reference in New Issue
Block a user