231 lines
7.5 KiB
Bash
Executable File
231 lines
7.5 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
TESSDATA_DIR="../../../tessdata"
|
|
TESSDATA_TAG="3.04.00"
|
|
for arg in ${@}; do
|
|
case "$arg" in
|
|
"tessdata" ) TESSDATA_DIR="$arg";;
|
|
"tag="* ) TESSDATA_TAG=${arg:4};;
|
|
esac
|
|
done
|
|
TESSDATA_DIR=`readlink -e $TESSDATA_DIR`
|
|
OUT_FILE="tessdata.iss"
|
|
OUT_FILE=`readlink -m $OUT_FILE`
|
|
LANGS_FILE="code2langTr.txt"
|
|
|
|
function getLangsOrder {
|
|
local FIELD=$1
|
|
local ORDER=`cat $LANGS_FILE | cut -d' ' -f$FIELD | sort`
|
|
local FNAMES=""
|
|
for i in $ORDER; do
|
|
local FNAME=`grep "$i" $LANGS_FILE | cut -d' ' -f1`
|
|
if [[ -z "$FNAME" || "${FNAME:0:1}" == "#" ]]; then
|
|
continue;
|
|
fi
|
|
FNAMES=$FNAMES" $FNAME"
|
|
done
|
|
echo $FNAMES
|
|
}
|
|
|
|
FILES="[Files]\n"
|
|
COMPONENTS="[Components]\nName: \"Languages\"; Description: \"{cm:Languages}\"; Types: custom full\n"
|
|
MESSAGES_EN="\n[CustomMessages]\nen.Languages=OCR Languages\n"
|
|
MESSAGES_RU="\n[CustomMessages]\nru.Languages=Языки распознавания\n"
|
|
PREV_LANG=""
|
|
COMPACT_LANGS="eng rus deu spa chi_sim fra jpn"
|
|
|
|
|
|
function fillIss {
|
|
local LANG_FIELD=$1
|
|
local COMPONENT_LANG=$2
|
|
local ONLY_COMPONENTS=$3
|
|
COMPONENTS=$COMPONENTS"\n"
|
|
local ORDER=`cat $LANGS_FILE | cut -d' ' -f$LANG_FIELD | sort`
|
|
for i in $ORDER; do
|
|
local LANG_LINE=$(grep " $i " $LANGS_FILE)
|
|
local FNAME=$(echo "$LANG_LINE" | cut -d' ' -f1)
|
|
local LANG_EN=$(echo "$LANG_LINE" | cut -d' ' -f2)
|
|
local LANG_RU=$(echo "$LANG_LINE" | cut -d' ' -f3)
|
|
if [[ -z "$FNAME" || "${FNAME:0:1}" == "#" ]]; then
|
|
continue;
|
|
fi
|
|
|
|
local CUR_LANG_FILES=`find $TESSDATA_DIR -name "$FNAME.*"`
|
|
if [ -z "$CUR_LANG_FILES" ]; then
|
|
echo "no lang"
|
|
continue
|
|
fi
|
|
|
|
|
|
local COMPONENT_SIZE=0
|
|
for s in `find $TESSDATA_DIR -name "$FNAME.*" -exec wc -c {} \; | cut -d' ' -f1`; do
|
|
COMPONENT_SIZE=$(expr $COMPONENT_SIZE + $s)
|
|
done
|
|
TYPES="full"
|
|
if [[ $COMPACT_LANGS =~ $FNAME ]]; then
|
|
TYPES="compact custom $TYPES"
|
|
fi
|
|
COMPONENTS=$COMPONENTS"Name: \"Languages\\\\$LANG_EN\"; Description: \"{cm:$LANG_EN}\"; Languages: $COMPONENT_LANG;
|
|
Types: $TYPES; ExtraDiskSpaceRequired: $COMPONENT_SIZE\n"
|
|
|
|
|
|
if $ONLY_COMPONENTS; then
|
|
continue;
|
|
fi
|
|
MESSAGES_EN=$MESSAGES_EN"en.$LANG_EN=$(echo "$LANG_EN" | sed 's/_/ /g')\n"
|
|
MESSAGES_RU=$MESSAGES_RU"ru.$LANG_EN=$(echo "$LANG_RU" | sed 's/_/ /g')\n"
|
|
|
|
for f in $CUR_LANG_FILES; do
|
|
local FNAME=$(basename "$f")
|
|
FILES=$FILES"Source: \"{tmp}\\\\$FNAME\"; DestDir: \"{app}\\\\tessdata\"; Components: Languages\\\\$LANG_EN;
|
|
Flags: external; Check: DwinsHs_Check(ExpandConstant('{tmp}\\\\$FNAME'),
|
|
'https://cdn.rawgit.com/tesseract-ocr/tessdata/$TESSDATA_TAG/$FNAME', 'ST_setup', 'Get', 0);\n"
|
|
done
|
|
done
|
|
}
|
|
fillIss 2 "en" false
|
|
fillIss 3 "ru" true
|
|
|
|
echo -e $FILES > $OUT_FILE
|
|
echo -e $COMPONENTS >> $OUT_FILE
|
|
echo -e $MESSAGES_EN >> $OUT_FILE
|
|
echo -e $MESSAGES_RU >> $OUT_FILE
|
|
iconv -f utf8 -t cp1251 $OUT_FILE -o $OUT_FILE.1
|
|
mv $OUT_FILE.1 $OUT_FILE
|
|
|
|
exit 0
|
|
function fillIss {
|
|
local ORDER=$1
|
|
local FIELD=$2
|
|
local ONLY_COMPONENTS=$3
|
|
local FILES=""
|
|
for i in $ORDER; do
|
|
local CUR_LANG_FILES=`find $TESSDATA_DIR -name \"$i.*\"`
|
|
if [ -z "$CUR_LANG_FILES" ]; then
|
|
continue
|
|
fi
|
|
FILES=$FILES" `find $TESSDATA_DIR -name \"$i.*\"`"
|
|
done
|
|
for i in $FILES; do
|
|
local fName=$(basename "$i")
|
|
local LANG=$(echo "$fName" | cut -d'.' -f1)
|
|
LANG_LINE=$(grep "$lang " $LANGS_FILE)
|
|
if [[ -z "$LANG_LINE" || "${LANG_LINE:0:1}" == "#" ]]; then
|
|
continue;
|
|
fi
|
|
LANG_EN=$(echo "$LANG_LINE" | cut -d' ' -f2)
|
|
LANG_RU=$(echo "$LANG_LINE" | cut -d' ' -f3)
|
|
if ! $ONLY_COMP; then
|
|
FILES=$FILES"Source: \"{tmp}\\\\$fName\"; DestDir: \"{app}\\\\tessdata\"; Components: Languages\\\\$LANG_EN;
|
|
Flags: external; Check: DwinsHs_Check(ExpandConstant('{tmp}\\\\$fName'),
|
|
'https://cdn.rawgit.com/tesseract-ocr/tessdata/$TESSDATA_TAG/$fName', 'ST_setup', 'Get', 0);\n"
|
|
fi
|
|
if [ "$PREV_LANG" != "$LANG_EN" ]; then
|
|
PREV_LANG="$LANG_EN"
|
|
SIZE=0
|
|
for s in `find $TESSDATA_DIR -name "$lang.*" -exec wc -c {} \; | cut -d' ' -f1`; do
|
|
SIZE=$(expr $SIZE + $s)
|
|
done
|
|
TYPES="full"
|
|
if [[ $COMPACT_LANGS =~ $lang ]]; then
|
|
TYPES="compact custom $TYPES"
|
|
fi
|
|
COMPONENTS=$COMPONENTS"Name: \"Languages\\\\$LANG_EN\"; Description: \"{cm:$LANG_EN}\"; Languages: $LLANG; Types: $TYPES; ExtraDiskSpaceRequired: $SIZE\n"
|
|
|
|
if ! $ONLY_COMP; then
|
|
MESSAGES_EN=$MESSAGES_EN"en.$LANG_EN=$(echo "$LANG_EN" | sed 's/_/ /g')\n"
|
|
MESSAGES_RU=$MESSAGES_RU"ru.$LANG_EN=$(echo "$LANG_RU" | sed 's/_/ /g')\n"
|
|
fi
|
|
fi
|
|
done
|
|
}
|
|
echo $(fillIss "$(getLangsOrder 2)" "en" false)
|
|
|
|
|
|
|
|
function doJob {
|
|
LLANG=$2
|
|
ONLY_COMP=$3
|
|
for fff in $1; do
|
|
for f in `find $TESSDATA_DIR -name "$fff.*"`; do
|
|
f=$(basename "$f")
|
|
lang=$(echo "$f" | cut -d'.' -f1)
|
|
LANG_LINE=$(grep "$lang " $LANGS_FILE)
|
|
if [[ -z "$LANG_LINE" || "${LANG_LINE:0:1}" == "#" ]]; then
|
|
continue;
|
|
fi
|
|
LANG_EN=$(echo "$LANG_LINE" | cut -d' ' -f2)
|
|
LANG_RU=$(echo "$LANG_LINE" | cut -d' ' -f3)
|
|
if ! $ONLY_COMP; then
|
|
FILES=$FILES"Source: \"{tmp}\\\\$f\"; DestDir: \"{app}\\\\tessdata\"; Components: Languages\\\\$LANG_EN;
|
|
Flags: external; Check: DwinsHs_Check(ExpandConstant('{tmp}\\\\$f'),
|
|
'https://cdn.rawgit.com/tesseract-ocr/tessdata/$TESSDATA_TAG/$f', 'ST_setup', 'Get', 0);\n"
|
|
fi
|
|
if [ "$PREV_LANG" != "$LANG_EN" ]; then
|
|
PREV_LANG="$LANG_EN"
|
|
SIZE=0
|
|
for s in `find $TESSDATA_DIR -name "$lang.*" -exec wc -c {} \; | cut -d' ' -f1`; do
|
|
SIZE=$(expr $SIZE + $s)
|
|
done
|
|
TYPES="full"
|
|
if [[ $COMPACT_LANGS =~ $lang ]]; then
|
|
TYPES="compact custom $TYPES"
|
|
fi
|
|
COMPONENTS=$COMPONENTS"Name: \"Languages\\\\$LANG_EN\"; Description: \"{cm:$LANG_EN}\"; Languages: $LLANG; Types: $TYPES; ExtraDiskSpaceRequired: $SIZE\n"
|
|
|
|
if ! $ONLY_COMP; then
|
|
MESSAGES_EN=$MESSAGES_EN"en.$LANG_EN=$(echo "$LANG_EN" | sed 's/_/ /g')\n"
|
|
MESSAGES_RU=$MESSAGES_RU"ru.$LANG_EN=$(echo "$LANG_RU" | sed 's/_/ /g')\n"
|
|
fi
|
|
fi
|
|
done
|
|
done
|
|
}
|
|
doJob "$(getOrder 2)" "en" false
|
|
doJob "$(getOrder 3)" "ru" true
|
|
echo $COMPONENTS
|
|
echo -e $FILES > $OUT_FILE
|
|
echo -e $COMPONENTS >> $OUT_FILE
|
|
echo -e $MESSAGES_EN >> $OUT_FILE
|
|
echo -e $MESSAGES_RU >> $OUT_FILE
|
|
iconv -f utf8 -t cp1251 $OUT_FILE -o $OUT_FILE.1
|
|
mv $OUT_FILE.1 $OUT_FILE
|
|
|
|
|
|
exit 0
|
|
|
|
|
|
for f in `ls $TESSDATA_DIR | sort`; do
|
|
lang=$(echo "$f" | cut -d'.' -f1)
|
|
LANG_LINE=$(grep "$lang " $LANGS_FILE)
|
|
if [[ -z "$LANG_LINE" || "${LANG_LINE:0:1}" == "#" ]]; then
|
|
continue;
|
|
fi
|
|
LANG_EN=$(echo "$LANG_LINE" | cut -d' ' -f2)
|
|
LANG_RU=$(echo "$LANG_LINE" | cut -d' ' -f3)
|
|
FILES=$FILES"Source: \"{tmp}\\\\$f\"; DestDir: \"{app}\\\\tessdata\"; Components: Languages\\\\$LANG_EN;
|
|
Flags: external; Check: DwinsHs_Check(ExpandConstant('{tmp}\\\\$f'),
|
|
'https://cdn.rawgit.com/tesseract-ocr/tessdata/$TESSDATA_TAG/$f', 'ST_setup', 'Get', 0);\n"
|
|
if [ "$PREV_LANG" != "$LANG_EN" ]; then
|
|
PREV_LANG="$LANG_EN"
|
|
SIZE=0
|
|
for s in `find $TESSDATA_DIR -name "$lang.*" -exec wc -c {} \; | cut -d' ' -f1`; do
|
|
SIZE=$(expr $SIZE + $s)
|
|
done
|
|
TYPES="full"
|
|
if [[ $COMPACT_LANGS =~ $lang ]]; then
|
|
TYPES="compact custom $TYPES"
|
|
fi
|
|
COMPONENTS=$COMPONENTS"Name: \"Languages\\\\$LANG_EN\"; Description: \"{cm:$LANG_EN}\"; Types: $TYPES; ExtraDiskSpaceRequired: $SIZE\n"
|
|
MESSAGES_EN=$MESSAGES_EN"en.$LANG_EN=$(echo "$LANG_EN" | sed 's/_/ /g')\n"
|
|
MESSAGES_RU=$MESSAGES_RU"ru.$LANG_EN=$(echo "$LANG_RU" | sed 's/_/ /g')\n"
|
|
fi
|
|
done
|
|
|
|
echo -e $FILES > $OUT_FILE
|
|
echo -e $COMPONENTS >> $OUT_FILE
|
|
echo -e $MESSAGES_EN >> $OUT_FILE
|
|
echo -e $MESSAGES_RU >> $OUT_FILE
|
|
iconv -f utf8 -t cp1251 $OUT_FILE -o $OUT_FILE.1
|
|
mv $OUT_FILE.1 $OUT_FILE
|