From 37d8d3afcd40629119c636e50fc4ab59f57ca829 Mon Sep 17 00:00:00 2001 From: Gres Date: Sun, 30 Mar 2014 18:36:32 +0400 Subject: [PATCH] Use Leptonica for image proprocessing. Convert image to grayscale to reduce size. Limit scaled image max size. --- ImageProcessing.cpp | 127 +++++++++++++++++++++++++++++++++++++++++++ ImageProcessing.h | 18 ++++++ Recognizer.cpp | 19 ++----- ScreenTranslator.pro | 8 ++- 4 files changed, 156 insertions(+), 16 deletions(-) create mode 100644 ImageProcessing.cpp create mode 100644 ImageProcessing.h diff --git a/ImageProcessing.cpp b/ImageProcessing.cpp new file mode 100644 index 0000000..7bcca35 --- /dev/null +++ b/ImageProcessing.cpp @@ -0,0 +1,127 @@ +#include + +#include + +#include "ImageProcessing.h" + +Pix *convertImage(const QImage& image) +{ + PIX *pix; + + QImage swapped = image.rgbSwapped(); + int width = swapped.width(); + int height = swapped.height(); + int depth = swapped.depth(); + int wpl = swapped.bytesPerLine() / 4; + + pix = pixCreate(width, height, depth); + pixSetWpl(pix, wpl); + pixSetColormap(pix, NULL); + l_uint32 *outData = pix->data; + + for (int y = 0; y < height; y++) + { + l_uint32 *lines = outData + y * wpl; + QByteArray a((const char*)swapped.scanLine(y), swapped.bytesPerLine()); + for (int j = 0; j < a.size(); j++) + { + *((l_uint8 *)lines + j) = a[j]; + } + } + + const qreal toDPM = 1.0 / 0.0254; + int resolutionX = swapped.dotsPerMeterX() / toDPM; + int resolutionY = swapped.dotsPerMeterY() / toDPM; + + if (resolutionX < 300) resolutionX = 300; + if (resolutionY < 300) resolutionY = 300; + pixSetResolution(pix, resolutionX, resolutionY); + + return pixEndianByteSwapNew(pix); +} + +QImage convertImage(Pix &image) +{ + int width = pixGetWidth(&image); + int height = pixGetHeight(&image); + int depth = pixGetDepth(&image); + int bytesPerLine = pixGetWpl(&image) * 4; + l_uint32 * datas = pixGetData(pixEndianByteSwapNew(&image)); + + QImage::Format format; + if (depth == 1) + format = QImage::Format_Mono; + else if (depth == 8) + format = QImage::Format_Indexed8; + else + format = QImage::Format_RGB32; + + QImage result((uchar*)datas, width, height, bytesPerLine, format); + + // Set resolution + l_int32 xres, yres; + pixGetResolution(&image, &xres, &yres); + const qreal toDPM = 1.0 / 0.0254; + result.setDotsPerMeterX(xres * toDPM); + result.setDotsPerMeterY(yres * toDPM); + + // Handle pallete + QVector _bwCT; + _bwCT.append(qRgb(255,255,255)); + _bwCT.append(qRgb(0,0,0)); + + QVector _grayscaleCT(256); + for (int i = 0; i < 256; i++) { + _grayscaleCT.append(qRgb(i, i, i)); + } + switch (depth) { + case 1: + result.setColorTable(_bwCT); + break; + case 8: + result.setColorTable(_grayscaleCT); + break; + default: + result.setColorTable(_grayscaleCT); + } + + if (result.isNull()) { + static QImage none(0,0,QImage::Format_Invalid); + qDebug("Invalid format!!!\n"); + return none; + } + + return result.rgbSwapped(); +} + +Pix *prepareImage(const QImage &image, int preferredScale) +{ + Pix* pix = convertImage (image); + Q_ASSERT (pix != NULL); + + Pix* gray = pixConvertRGBToGray (pix, 0.0, 0.0, 0.0); + Q_ASSERT (gray != NULL); + pixDestroy (&pix); + + Pix* scaled = gray; + if (preferredScale > 0) + { + float maxScaleX = MAX_INT16 / double (gray->w); + float scaleX = std::min (float (preferredScale), maxScaleX); + float maxScaleY = MAX_INT16 / double (gray->h); + float scaleY = std::min (float (preferredScale), maxScaleY); + float scale = std::min (scaleX, scaleY); + scaled = pixScale (gray, scale, scale); + } + Q_ASSERT (scaled != NULL); + if (scaled != gray) + { + pixDestroy (&gray); + } + return scaled; +} + +void cleanupImage(Pix **image) +{ + pixDestroy (image); +} diff --git a/ImageProcessing.h b/ImageProcessing.h new file mode 100644 index 0000000..e8c3dc3 --- /dev/null +++ b/ImageProcessing.h @@ -0,0 +1,18 @@ +#ifndef IMAGEPROCESSING_H +#define IMAGEPROCESSING_H + +#include + +class Pix; + +//! Convert QImage to Leptonica's PIX. +Pix* convertImage(const QImage& image); +//! Convert Leptonica's PIX to QImage. +QImage convertImage(Pix &image); + +//! Propare image for OCR. +Pix* prepareImage (const QImage& image, int preferredScale); +//! Free allocated resources for image. +void cleanupImage (Pix** image); + +#endif // IMAGEPROCESSING_H diff --git a/Recognizer.cpp b/Recognizer.cpp index 44ddbe9..a4689bd 100644 --- a/Recognizer.cpp +++ b/Recognizer.cpp @@ -6,6 +6,7 @@ #include #include "Settings.h" +#include "ImageProcessing.h" Recognizer::Recognizer(QObject *parent) : QObject(parent), @@ -68,22 +69,14 @@ void Recognizer::recognize(ProcessingItem item) } } - QPixmap scaled = item.source; - if (imageScale_ > 0) - { - scaled = scaled.scaledToHeight (scaled.height () * imageScale_, - Qt::SmoothTransformation); - } - QImage image = scaled.toImage (); - const int bytesPerPixel = image.depth () / 8; - engine_->SetImage (image.bits (), image.width (), image.height (), - bytesPerPixel, image.bytesPerLine ()); - + Pix* image = prepareImage (item.source.toImage (), imageScale_); + Q_ASSERT (image != NULL); + engine_->SetImage (image); char* outText = engine_->GetUTF8Text(); + QString result = QString (outText).trimmed (); engine_->Clear(); + cleanupImage (&image); - QString result (outText); - result = result.trimmed(); if (!result.isEmpty ()) { item.recognized = result; diff --git a/ScreenTranslator.pro b/ScreenTranslator.pro index 338b5ba..40dbdae 100644 --- a/ScreenTranslator.pro +++ b/ScreenTranslator.pro @@ -13,7 +13,7 @@ TEMPLATE = app INCLUDEPATH += D:/Files/build/include -LIBS += -LD:/Files/build/bin -ltesseract +LIBS += -LD:/Files/build/bin -ltesseract -llept SOURCES += main.cpp\ Manager.cpp \ @@ -23,7 +23,8 @@ SOURCES += main.cpp\ Recognizer.cpp \ Translator.cpp \ ResultDialog.cpp \ - ProcessingItem.cpp + ProcessingItem.cpp \ + ImageProcessing.cpp HEADERS += \ Manager.h \ @@ -34,7 +35,8 @@ HEADERS += \ Translator.h \ Settings.h \ ProcessingItem.h \ - ResultDialog.h + ResultDialog.h \ + ImageProcessing.h FORMS += \ SettingsEditor.ui \