diff --git a/Vagrantfile b/Vagrantfile index 35d1021..b9cd16f 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -28,9 +28,16 @@ Vagrant.configure("2") do |config| # Error: # [default] GuestAdditions seems to be installed (5.1.38) correctly, but not running. # @see solution from: https://github.com/dotless-de/vagrant-vbguest/issues/333#issuecomment-487105544 - if Vagrant.has_plugin?("vagrant-vbguest") - config.vbguest.auto_update = false - end +# if Vagrant.has_plugin?("vagrant-vbguest") +# config.vbguest.auto_update = false +# end + + config.trigger.after :up do |trigger| + trigger.info = "Trigger: install dependencies on every up" + trigger.run_remote = {inline: <<-SHELL + + SHELL + } end # Enable X11 forwarding for graphical apps. # Make sure you have xquartz installed if using OSX host! diff --git a/fonts/GOST 2.304-81 type A (plotter).FON b/fonts/GOST 2.304-81 type A (plotter).FON deleted file mode 100644 index 9028c8d..0000000 Binary files a/fonts/GOST 2.304-81 type A (plotter).FON and /dev/null differ diff --git a/fonts/GOST 2.304-81 type B (plotter).FON b/fonts/GOST 2.304-81 type B (plotter).FON deleted file mode 100644 index 70266c8..0000000 Binary files a/fonts/GOST 2.304-81 type B (plotter).FON and /dev/null differ diff --git a/fonts/GOST type A.TTF b/fonts/GOST type A.ttf similarity index 100% rename from fonts/GOST type A.TTF rename to fonts/GOST type A.ttf diff --git a/install.sh b/install.sh new file mode 100755 index 0000000..b253475 --- /dev/null +++ b/install.sh @@ -0,0 +1,59 @@ +cd /tesseract-4.1.1 || exit 1 + +VLOC=$HOME/local +VBIN=$VLOC/bin +VINC=$VLOC/include +VLIB=$VLOC/lib + +mkdir -p $VINC $VLIB $VBIN/pkgconfig + +export PKG_CONFIG_PATH=$VLIB/pkgconfig +export LD_LIBRARY_PATH=$VLIB + +./autogen.sh +LIBLEPT_HEADERSDIR=$VINC ./configure --prefix=$VLOC --with-extra-libraries=$VLIB + +make +make install +make training +make training-install + +export TESSDATA_PREFIX=/tesseract-4.1.1/tessdata +cd $TESSDATA_PREFIX || exit 1 +wget https://github.com/tesseract-ocr/tessdata_best/raw/master/rus.traineddata + +# download langdata +cd /tesseract-4.1.1 +mkdir langdata +cd langdata +wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/radical-stroke.txt +wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/common.punc +wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/font_properties +wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/Latin.unicharset +wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/Latin.xheights + +wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/Cyrillic.unicharset +wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/Cyrillic.xheights +wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/desired_bigrams.txt +wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/common.unicharambigs +wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/forbidden_characters_default + +mkdir rus +cd rus +wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/rus/rus.training_text +wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/rus/rus.punc +wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/rus/rus.numbers +wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/rus/rus.wordlist + +wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/rus/desired_characters +wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/rus/okfonts.txt +wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/rus/rus.singles_text +wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/rus/rus.unicharambigs +wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/rus/rus.unicharset + +{ + echo "export PATH=$PATH:$VBIN"; \ + echo "export PKG_CONFIG_PATH=$PKG_CONFIG_PATH"; \ + echo "export LD_LIBRARY_PATH=$LD_LIBRARY_PATH"; \ + echo "export TESSDATA_PREFIX=$TESSDATA_PREFIX"; \ +} >> "$HOME"/.bashrc diff --git a/provision.sh b/provision.sh index 55b73ea..1954479 100644 --- a/provision.sh +++ b/provision.sh @@ -6,52 +6,33 @@ env|grep DISPLAY # echo "%vagrant ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/vagrant -export PATH=$PATH:$HOME/local/bin -echo "PATH=$PATH:$HOME/local/bin" >> $HOME/.bashrc -echo "PATH=$PATH:$HOME/local/bin" >> /etc/profile # зависимости export DEBIAN_FRONTEND=noninteractive +locale-gen en_US.UTF-8 apt-get update apt-get install -y htop nano libsm6 libxext6 libxrender-dev \ libicu-dev libpango1.0-dev libcairo2-dev g++ autoconf automake \ libtool pkg-config libpng-dev libjpeg62-turbo-dev libtiff5-dev \ zlib1g-dev libicu-dev libleptonica-dev -# Устанавливаем чертежные шрифты в систему -export FONTS_DIR=/usr/local/share/fonts -echo "export FONTS_DIR=$FONTS_DIR" >> /etc/profile -cp /vagrant/fonts/* $FONTS_DIR -chmod -R 644 $FONTS_DIR -fc-cache -v - # скачиваем и собираем tesseract 4.1.1 cd / wget https://github.com/tesseract-ocr/tesseract/archive/4.1.1.tar.gz tar -zxvf 4.1.1.tar.gz -cd /tesseract-4.1.1 || exit 1 -mkdir -p $HOME/local/include $HOME/local/lib/pkgconfig - -export PKG_CONFIG_PATH=$HOME/local/lib/pkgconfig -echo "export PKG_CONFIG_PATH=$HOME/local/lib/pkgconfig" >> /etc/profile - -./autogen.sh -LIBLEPT_HEADERSDIR=$HOME/local/include ./configure \ - --prefix=$HOME/local/ --with-extra-libraries=$HOME/local/lib -make -make install -export LD_LIBRARY_PATH=$HOME/local/lib -echo "LD_LIBRARY_PATH=$HOME/local/lib" >> /etc/profile - -make training -make training-install - -export TESSDATA_PREFIX=/tesseract-4.1.1/tessdata -acho "export TESSDATA_PREFIX=/tesseract-4.1.1/tessdata" >> /etc/profile -cd /tessdata -wget https://github.com/tesseract-ocr/tessdata_best/raw/master/rus.traineddata - - - +chown -R vagrant:vagrant /tesseract-4.1.1 +# установка, сборка и сохранение переменных среды +cd /vagrant || exit 1 +chmod +x ./install.sh +runuser -l vagrant -c '/vagrant/install.sh' +# Устанавливаем чертежные шрифты в систему +export FONTS_DIR=/usr/local/share/fonts +cp /vagrant/fonts/* $FONTS_DIR +chown -R vagrant:vagrant $FONTS_DIR +chmod -R 775 $FONTS_DIR +chmod 644 $FONTS_DIR/* +fc-cache +fc-list | grep 'GOST' +echo "export FONTS_DIR=$FONTS_DIR" >> /home/vagrant/.bashrc diff --git a/train.sh b/train.sh new file mode 100644 index 0000000..32a1042 --- /dev/null +++ b/train.sh @@ -0,0 +1,5 @@ + + +tesstrain.sh --fonts_dir $FONTS_DIR --fontlist "GOST Type A" --lang rus \ + --linedata_only --noextract_font_properties --langdata_dir /tesseract-4.1.1/langdata \ + --tessdata_dir $TESSDATA_PREFIX --output_dir ./ \ No newline at end of file