ScreenTranslator/distr/iss/make_tess_iss.sh

231 lines
7.5 KiB
Bash
Raw Normal View History

#!/bin/bash
TESSDATA_DIR="../../../tessdata"
TESSDATA_TAG="3.04.00"
for arg in ${@}; do
case "$arg" in
"tessdata" ) TESSDATA_DIR="$arg";;
"tag="* ) TESSDATA_TAG=${arg:4};;
esac
done
TESSDATA_DIR=`readlink -e $TESSDATA_DIR`
OUT_FILE="tessdata.iss"
OUT_FILE=`readlink -m $OUT_FILE`
LANGS_FILE="code2langTr.txt"
function getLangsOrder {
local FIELD=$1
local ORDER=`cat $LANGS_FILE | cut -d' ' -f$FIELD | sort`
local FNAMES=""
for i in $ORDER; do
local FNAME=`grep "$i" $LANGS_FILE | cut -d' ' -f1`
if [[ -z "$FNAME" || "${FNAME:0:1}" == "#" ]]; then
continue;
fi
FNAMES=$FNAMES" $FNAME"
done
echo $FNAMES
}
FILES="[Files]\n"
COMPONENTS="[Components]\nName: \"Languages\"; Description: \"{cm:Languages}\"; Types: custom full\n"
MESSAGES_EN="\n[CustomMessages]\nen.Languages=OCR Languages\n"
MESSAGES_RU="\n[CustomMessages]\nru.Languages=Языки распознавания\n"
PREV_LANG=""
COMPACT_LANGS="eng rus deu spa chi_sim fra jpn"
function fillIss {
local LANG_FIELD=$1
local COMPONENT_LANG=$2
local ONLY_COMPONENTS=$3
COMPONENTS=$COMPONENTS"\n"
local ORDER=`cat $LANGS_FILE | cut -d' ' -f$LANG_FIELD | sort`
for i in $ORDER; do
local LANG_LINE=$(grep " $i " $LANGS_FILE)
local FNAME=$(echo "$LANG_LINE" | cut -d' ' -f1)
local LANG_EN=$(echo "$LANG_LINE" | cut -d' ' -f2)
local LANG_RU=$(echo "$LANG_LINE" | cut -d' ' -f3)
if [[ -z "$FNAME" || "${FNAME:0:1}" == "#" ]]; then
continue;
fi
local CUR_LANG_FILES=`find $TESSDATA_DIR -name "$FNAME.*"`
if [ -z "$CUR_LANG_FILES" ]; then
echo "no lang"
continue
fi
local COMPONENT_SIZE=0
for s in `find $TESSDATA_DIR -name "$FNAME.*" -exec wc -c {} \; | cut -d' ' -f1`; do
COMPONENT_SIZE=$(expr $COMPONENT_SIZE + $s)
done
TYPES="full"
if [[ $COMPACT_LANGS =~ $FNAME ]]; then
TYPES="compact custom $TYPES"
fi
COMPONENTS=$COMPONENTS"Name: \"Languages\\\\$LANG_EN\"; Description: \"{cm:$LANG_EN}\"; Languages: $COMPONENT_LANG;
Types: $TYPES; ExtraDiskSpaceRequired: $COMPONENT_SIZE\n"
if $ONLY_COMPONENTS; then
continue;
fi
MESSAGES_EN=$MESSAGES_EN"en.$LANG_EN=$(echo "$LANG_EN" | sed 's/_/ /g')\n"
MESSAGES_RU=$MESSAGES_RU"ru.$LANG_EN=$(echo "$LANG_RU" | sed 's/_/ /g')\n"
for f in $CUR_LANG_FILES; do
local FNAME=$(basename "$f")
FILES=$FILES"Source: \"{tmp}\\\\$FNAME\"; DestDir: \"{app}\\\\tessdata\"; Components: Languages\\\\$LANG_EN;
Flags: external; Check: DwinsHs_Check(ExpandConstant('{tmp}\\\\$FNAME'),
'https://cdn.rawgit.com/tesseract-ocr/tessdata/$TESSDATA_TAG/$FNAME', 'ST_setup', 'Get', 0);\n"
done
done
}
fillIss 2 "en" false
fillIss 3 "ru" true
echo -e $FILES > $OUT_FILE
echo -e $COMPONENTS >> $OUT_FILE
echo -e $MESSAGES_EN >> $OUT_FILE
echo -e $MESSAGES_RU >> $OUT_FILE
iconv -f utf8 -t cp1251 $OUT_FILE -o $OUT_FILE.1
mv $OUT_FILE.1 $OUT_FILE
exit 0
function fillIss {
local ORDER=$1
local FIELD=$2
local ONLY_COMPONENTS=$3
local FILES=""
for i in $ORDER; do
local CUR_LANG_FILES=`find $TESSDATA_DIR -name \"$i.*\"`
if [ -z "$CUR_LANG_FILES" ]; then
continue
fi
FILES=$FILES" `find $TESSDATA_DIR -name \"$i.*\"`"
done
for i in $FILES; do
local fName=$(basename "$i")
local LANG=$(echo "$fName" | cut -d'.' -f1)
LANG_LINE=$(grep "$lang " $LANGS_FILE)
if [[ -z "$LANG_LINE" || "${LANG_LINE:0:1}" == "#" ]]; then
continue;
fi
LANG_EN=$(echo "$LANG_LINE" | cut -d' ' -f2)
LANG_RU=$(echo "$LANG_LINE" | cut -d' ' -f3)
if ! $ONLY_COMP; then
FILES=$FILES"Source: \"{tmp}\\\\$fName\"; DestDir: \"{app}\\\\tessdata\"; Components: Languages\\\\$LANG_EN;
Flags: external; Check: DwinsHs_Check(ExpandConstant('{tmp}\\\\$fName'),
'https://cdn.rawgit.com/tesseract-ocr/tessdata/$TESSDATA_TAG/$fName', 'ST_setup', 'Get', 0);\n"
fi
if [ "$PREV_LANG" != "$LANG_EN" ]; then
PREV_LANG="$LANG_EN"
SIZE=0
for s in `find $TESSDATA_DIR -name "$lang.*" -exec wc -c {} \; | cut -d' ' -f1`; do
SIZE=$(expr $SIZE + $s)
done
TYPES="full"
if [[ $COMPACT_LANGS =~ $lang ]]; then
TYPES="compact custom $TYPES"
fi
COMPONENTS=$COMPONENTS"Name: \"Languages\\\\$LANG_EN\"; Description: \"{cm:$LANG_EN}\"; Languages: $LLANG; Types: $TYPES; ExtraDiskSpaceRequired: $SIZE\n"
if ! $ONLY_COMP; then
MESSAGES_EN=$MESSAGES_EN"en.$LANG_EN=$(echo "$LANG_EN" | sed 's/_/ /g')\n"
MESSAGES_RU=$MESSAGES_RU"ru.$LANG_EN=$(echo "$LANG_RU" | sed 's/_/ /g')\n"
fi
fi
done
}
echo $(fillIss "$(getLangsOrder 2)" "en" false)
function doJob {
LLANG=$2
ONLY_COMP=$3
for fff in $1; do
for f in `find $TESSDATA_DIR -name "$fff.*"`; do
f=$(basename "$f")
lang=$(echo "$f" | cut -d'.' -f1)
LANG_LINE=$(grep "$lang " $LANGS_FILE)
if [[ -z "$LANG_LINE" || "${LANG_LINE:0:1}" == "#" ]]; then
continue;
fi
LANG_EN=$(echo "$LANG_LINE" | cut -d' ' -f2)
LANG_RU=$(echo "$LANG_LINE" | cut -d' ' -f3)
if ! $ONLY_COMP; then
FILES=$FILES"Source: \"{tmp}\\\\$f\"; DestDir: \"{app}\\\\tessdata\"; Components: Languages\\\\$LANG_EN;
Flags: external; Check: DwinsHs_Check(ExpandConstant('{tmp}\\\\$f'),
'https://cdn.rawgit.com/tesseract-ocr/tessdata/$TESSDATA_TAG/$f', 'ST_setup', 'Get', 0);\n"
fi
if [ "$PREV_LANG" != "$LANG_EN" ]; then
PREV_LANG="$LANG_EN"
SIZE=0
for s in `find $TESSDATA_DIR -name "$lang.*" -exec wc -c {} \; | cut -d' ' -f1`; do
SIZE=$(expr $SIZE + $s)
done
TYPES="full"
if [[ $COMPACT_LANGS =~ $lang ]]; then
TYPES="compact custom $TYPES"
fi
COMPONENTS=$COMPONENTS"Name: \"Languages\\\\$LANG_EN\"; Description: \"{cm:$LANG_EN}\"; Languages: $LLANG; Types: $TYPES; ExtraDiskSpaceRequired: $SIZE\n"
if ! $ONLY_COMP; then
MESSAGES_EN=$MESSAGES_EN"en.$LANG_EN=$(echo "$LANG_EN" | sed 's/_/ /g')\n"
MESSAGES_RU=$MESSAGES_RU"ru.$LANG_EN=$(echo "$LANG_RU" | sed 's/_/ /g')\n"
fi
fi
done
done
}
doJob "$(getOrder 2)" "en" false
doJob "$(getOrder 3)" "ru" true
echo $COMPONENTS
echo -e $FILES > $OUT_FILE
echo -e $COMPONENTS >> $OUT_FILE
echo -e $MESSAGES_EN >> $OUT_FILE
echo -e $MESSAGES_RU >> $OUT_FILE
iconv -f utf8 -t cp1251 $OUT_FILE -o $OUT_FILE.1
mv $OUT_FILE.1 $OUT_FILE
exit 0
for f in `ls $TESSDATA_DIR | sort`; do
lang=$(echo "$f" | cut -d'.' -f1)
LANG_LINE=$(grep "$lang " $LANGS_FILE)
if [[ -z "$LANG_LINE" || "${LANG_LINE:0:1}" == "#" ]]; then
continue;
fi
LANG_EN=$(echo "$LANG_LINE" | cut -d' ' -f2)
LANG_RU=$(echo "$LANG_LINE" | cut -d' ' -f3)
FILES=$FILES"Source: \"{tmp}\\\\$f\"; DestDir: \"{app}\\\\tessdata\"; Components: Languages\\\\$LANG_EN;
Flags: external; Check: DwinsHs_Check(ExpandConstant('{tmp}\\\\$f'),
'https://cdn.rawgit.com/tesseract-ocr/tessdata/$TESSDATA_TAG/$f', 'ST_setup', 'Get', 0);\n"
if [ "$PREV_LANG" != "$LANG_EN" ]; then
PREV_LANG="$LANG_EN"
SIZE=0
for s in `find $TESSDATA_DIR -name "$lang.*" -exec wc -c {} \; | cut -d' ' -f1`; do
SIZE=$(expr $SIZE + $s)
done
TYPES="full"
if [[ $COMPACT_LANGS =~ $lang ]]; then
TYPES="compact custom $TYPES"
fi
COMPONENTS=$COMPONENTS"Name: \"Languages\\\\$LANG_EN\"; Description: \"{cm:$LANG_EN}\"; Types: $TYPES; ExtraDiskSpaceRequired: $SIZE\n"
MESSAGES_EN=$MESSAGES_EN"en.$LANG_EN=$(echo "$LANG_EN" | sed 's/_/ /g')\n"
MESSAGES_RU=$MESSAGES_RU"ru.$LANG_EN=$(echo "$LANG_RU" | sed 's/_/ /g')\n"
fi
done
echo -e $FILES > $OUT_FILE
echo -e $COMPONENTS >> $OUT_FILE
echo -e $MESSAGES_EN >> $OUT_FILE
echo -e $MESSAGES_RU >> $OUT_FILE
iconv -f utf8 -t cp1251 $OUT_FILE -o $OUT_FILE.1
mv $OUT_FILE.1 $OUT_FILE