Use Leptonica for image proprocessing.

Convert image to grayscale to reduce size. Limit scaled image max size.
2014-03-30 18:36:32 +04:00 · 2014-03-30 18:36:32 +04:00 · 37d8d3afcd
commit 37d8d3afcd
parent aec6416819
4 changed files with 156 additions and 16 deletions
--- a/ImageProcessing.cpp
+++ b/ImageProcessing.cpp
@ -0,0 +1,127 @@
 #include <leptonica/allheaders.h>
 #include <tesseract/host.h>
 #include "ImageProcessing.h"
 Pix *convertImage(const QImage& image)
 {
  PIX *pix;
  QImage swapped = image.rgbSwapped();
  int width = swapped.width();
  int height = swapped.height();
  int depth = swapped.depth();
  int wpl = swapped.bytesPerLine() / 4;
  pix = pixCreate(width, height, depth);
  pixSetWpl(pix, wpl);
  pixSetColormap(pix, NULL);
  l_uint32 *outData = pix->data;
  for (int y = 0; y < height; y++)
  {
    l_uint32 *lines = outData + y * wpl;
    QByteArray a((const char*)swapped.scanLine(y), swapped.bytesPerLine());
    for (int j = 0; j < a.size(); j++)
    {
      *((l_uint8 *)lines + j) = a[j];
    }
  }
  const qreal toDPM = 1.0 / 0.0254;
  int resolutionX = swapped.dotsPerMeterX() / toDPM;
  int resolutionY = swapped.dotsPerMeterY() / toDPM;
  if (resolutionX < 300) resolutionX = 300;
  if (resolutionY < 300) resolutionY = 300;
  pixSetResolution(pix, resolutionX, resolutionY);
  return pixEndianByteSwapNew(pix);
 }
 QImage convertImage(Pix &image)
 {
  int width = pixGetWidth(&image);
  int height = pixGetHeight(&image);
  int depth = pixGetDepth(&image);
  int bytesPerLine = pixGetWpl(&image) * 4;
  l_uint32 * datas = pixGetData(pixEndianByteSwapNew(&image));
  QImage::Format format;
  if (depth == 1)
    format = QImage::Format_Mono;
  else if (depth == 8)
    format = QImage::Format_Indexed8;
  else
    format = QImage::Format_RGB32;
  QImage result((uchar*)datas, width, height, bytesPerLine, format);
  // Set resolution
  l_int32 	xres, yres;
  pixGetResolution(&image, &xres, &yres);
  const qreal toDPM = 1.0 / 0.0254;
  result.setDotsPerMeterX(xres * toDPM);
  result.setDotsPerMeterY(yres * toDPM);
  // Handle pallete
  QVector<QRgb> _bwCT;
  _bwCT.append(qRgb(255,255,255));
  _bwCT.append(qRgb(0,0,0));
  QVector<QRgb> _grayscaleCT(256);
  for (int i = 0; i < 256; i++)  {
    _grayscaleCT.append(qRgb(i, i, i));
  }
  switch (depth) {
    case 1:
      result.setColorTable(_bwCT);
      break;
    case 8:
      result.setColorTable(_grayscaleCT);
      break;
    default:
      result.setColorTable(_grayscaleCT);
  }
  if (result.isNull()) {
    static QImage none(0,0,QImage::Format_Invalid);
    qDebug("Invalid format!!!\n");
    return none;
  }
  return result.rgbSwapped();
 }
 Pix *prepareImage(const QImage &image, int preferredScale)
 {
  Pix* pix = convertImage (image);
  Q_ASSERT (pix != NULL);
  Pix* gray = pixConvertRGBToGray (pix, 0.0, 0.0, 0.0);
  Q_ASSERT (gray != NULL);
  pixDestroy (&pix);
  Pix* scaled = gray;
  if (preferredScale > 0)
  {
    float maxScaleX = MAX_INT16 / double (gray->w);
    float scaleX = std::min (float (preferredScale), maxScaleX);
    float maxScaleY = MAX_INT16 / double (gray->h);
    float scaleY = std::min (float (preferredScale), maxScaleY);
    float scale = std::min (scaleX, scaleY);
    scaled = pixScale (gray, scale, scale);
  }
  Q_ASSERT (scaled != NULL);
  if (scaled != gray)
  {
    pixDestroy (&gray);
  }
  return scaled;
 }
 void cleanupImage(Pix **image)
 {
  pixDestroy (image);
 }
--- a/ImageProcessing.h
+++ b/ImageProcessing.h
@ -0,0 +1,18 @@
 #ifndef IMAGEPROCESSING_H
 #define IMAGEPROCESSING_H
 #include <QImage>
 class Pix;
 //! Convert QImage to Leptonica's PIX.
 Pix* convertImage(const QImage& image);
 //! Convert Leptonica's PIX to QImage.
 QImage convertImage(Pix &image);
 //! Propare image for OCR.
 Pix* prepareImage (const QImage& image, int preferredScale);
 //! Free allocated resources for image.
 void cleanupImage (Pix** image);
 #endif // IMAGEPROCESSING_H
--- a/Recognizer.cpp
+++ b/Recognizer.cpp
@ -6,6 +6,7 @@
 #include <QSettings>
 #include "Settings.h"
 #include "ImageProcessing.h"
 Recognizer::Recognizer(QObject *parent) :
  QObject(parent),
@ -68,22 +69,14 @@ void Recognizer::recognize(ProcessingItem item)
    }
  }
-  QPixmap scaled = item.source;
+  Pix* image = prepareImage (item.source.toImage (), imageScale_);
-  if (imageScale_ > 0)
+  Q_ASSERT (image != NULL);
-  {
+  engine_->SetImage (image);
    scaled = scaled.scaledToHeight (scaled.height () * imageScale_,
                                    Qt::SmoothTransformation);
  }
  QImage image = scaled.toImage ();
  const int bytesPerPixel = image.depth () / 8;
  engine_->SetImage (image.bits (), image.width (), image.height (),
                     bytesPerPixel, image.bytesPerLine ());
  char* outText = engine_->GetUTF8Text();
  QString result = QString (outText).trimmed ();
  engine_->Clear();
  cleanupImage (&image);
  QString result (outText);
  result = result.trimmed();
  if (!result.isEmpty ())
  {
    item.recognized = result;
--- a/ScreenTranslator.pro
+++ b/ScreenTranslator.pro
@ -13,7 +13,7 @@ TEMPLATE = app
 INCLUDEPATH += D:/Files/build/include
-LIBS += -LD:/Files/build/bin -ltesseract
+LIBS += -LD:/Files/build/bin -ltesseract -llept
 SOURCES += main.cpp\
    Manager.cpp \
@ -23,7 +23,8 @@ SOURCES += main.cpp\
    Recognizer.cpp \
    Translator.cpp \
    ResultDialog.cpp \
-    ProcessingItem.cpp
+    ProcessingItem.cpp \
    ImageProcessing.cpp
 HEADERS  += \
    Manager.h \
@ -34,7 +35,8 @@ HEADERS  += \
    Translator.h \
    Settings.h \
    ProcessingItem.h \
-    ResultDialog.h
+    ResultDialog.h \
    ImageProcessing.h
 FORMS    += \
    SettingsEditor.ui \