63 lines
2.7 KiB
Bash
Executable File
63 lines
2.7 KiB
Bash
Executable File
cd /tesseract-4.1.1 || exit 1
|
||
|
||
VLOC=$HOME/local
|
||
VBIN=$VLOC/bin
|
||
VINC=$VLOC/include
|
||
VLIB=$VLOC/lib
|
||
|
||
mkdir -p $VINC $VLIB $VBIN/pkgconfig
|
||
|
||
export PKG_CONFIG_PATH=$VLIB/pkgconfig
|
||
export LD_LIBRARY_PATH=$VLIB
|
||
|
||
./autogen.sh
|
||
LIBLEPT_HEADERSDIR=$VINC ./configure --prefix=$VLOC --with-extra-libraries=$VLIB
|
||
|
||
make
|
||
make install
|
||
make training
|
||
make training-install
|
||
|
||
export TESSDATA_PREFIX=/tesseract-4.1.1/tessdata
|
||
cd $TESSDATA_PREFIX || exit 1
|
||
wget https://github.com/tesseract-ocr/tessdata_best/raw/master/rus.traineddata
|
||
|
||
# download langdata
|
||
cd /tesseract-4.1.1
|
||
mkdir langdata
|
||
cd langdata
|
||
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/radical-stroke.txt
|
||
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/common.punc
|
||
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/font_properties
|
||
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/Latin.unicharset
|
||
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/Latin.xheights
|
||
|
||
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/Cyrillic.unicharset
|
||
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/Cyrillic.xheights
|
||
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/desired_bigrams.txt
|
||
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/common.unicharambigs
|
||
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/forbidden_characters_default
|
||
|
||
mkdir rus
|
||
cd rus
|
||
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/rus/rus.training_text
|
||
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/rus/rus.punc
|
||
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/rus/rus.numbers
|
||
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/rus/rus.wordlist
|
||
|
||
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/rus/desired_characters
|
||
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/rus/okfonts.txt
|
||
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/rus/rus.singles_text
|
||
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/rus/rus.unicharambigs
|
||
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/rus/rus.unicharset
|
||
|
||
{
|
||
echo "export PATH=$PATH:$VBIN"; \
|
||
echo "export PKG_CONFIG_PATH=$PKG_CONFIG_PATH"; \
|
||
echo "export LD_LIBRARY_PATH=$LD_LIBRARY_PATH"; \
|
||
echo "export TESSDATA_PREFIX=$TESSDATA_PREFIX"; \
|
||
} >> "$HOME"/.bashrc
|
||
|
||
# фиксим проблему с фазой Е в tesstrain-utils.sh
|
||
mv -v $VBIN/tesstrain_utils.sh $VBIN/tesstrain_utils.sh.bak
|
||
cp -v /vagrant/tesstrain_utils.sh $VBIN/tesstrain_utils.sh |