From a34b9ffbab61d50b86385aadabc2f82d4f94681c Mon Sep 17 00:00:00 2001 From: Nikita Orlov Date: Thu, 13 Feb 2020 16:32:12 +0300 Subject: [PATCH] =?UTF-8?q?=D1=83=D1=81=D1=82=D0=B0=D0=BD=D0=BE=D0=B2?= =?UTF-8?q?=D0=BA=D0=B0=20=D0=B8=20=D1=81=D0=B1=D0=BE=D1=80=D0=BA=D0=B0=20?= =?UTF-8?q?=D0=BF=D0=BE=D0=B4=20vagrant=20=D1=84=D0=B8=D0=BA=D1=81=20?= =?UTF-8?q?=D0=BA=D0=BE=D0=BF=D0=B8=D1=80=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8?= =?UTF-8?q?=D1=8F=20=D1=88=D1=80=D0=B8=D1=84=D1=82=D0=BE=D0=B2=20=D1=83?= =?UTF-8?q?=D1=81=D1=82=D0=B0=D0=BD=D0=BE=D0=B2=D0=BA=D0=B0=20=D0=BF=D0=B5?= =?UTF-8?q?=D1=80=D0=B5=D0=BC=D0=B5=D0=BD=D0=BD=D1=8B=D1=85=20=D1=81=D1=80?= =?UTF-8?q?=D0=B5=D0=B4=D1=8B=20=D0=BF=D1=80=D0=B8=D0=BC=D0=B5=D1=80=20?= =?UTF-8?q?=D0=B4=D0=BB=D1=8F=20=D0=BD=D0=B0=D1=87=D0=B0=D0=BB=D0=B0=20?= =?UTF-8?q?=D0=BE=D0=B1=D1=83=D1=87=D0=B5=D0=BD=D0=B8=D1=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Vagrantfile | 13 +++-- fonts/GOST 2.304-81 type A (plotter).FON | Bin 6032 -> 0 bytes fonts/GOST 2.304-81 type B (plotter).FON | Bin 6160 -> 0 bytes fonts/{GOST type A.TTF => GOST type A.ttf} | Bin install.sh | 59 +++++++++++++++++++++ provision.sh | 49 ++++++----------- train.sh | 5 ++ 7 files changed, 89 insertions(+), 37 deletions(-) delete mode 100644 fonts/GOST 2.304-81 type A (plotter).FON delete mode 100644 fonts/GOST 2.304-81 type B (plotter).FON rename fonts/{GOST type A.TTF => GOST type A.ttf} (100%) create mode 100755 install.sh create mode 100644 train.sh diff --git a/Vagrantfile b/Vagrantfile index 35d1021..b9cd16f 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -28,9 +28,16 @@ Vagrant.configure("2") do |config| # Error: # [default] GuestAdditions seems to be installed (5.1.38) correctly, but not running. # @see solution from: https://github.com/dotless-de/vagrant-vbguest/issues/333#issuecomment-487105544 - if Vagrant.has_plugin?("vagrant-vbguest") - config.vbguest.auto_update = false - end +# if Vagrant.has_plugin?("vagrant-vbguest") +# config.vbguest.auto_update = false +# end + + config.trigger.after :up do |trigger| + trigger.info = "Trigger: install dependencies on every up" + trigger.run_remote = {inline: <<-SHELL + + SHELL + } end # Enable X11 forwarding for graphical apps. # Make sure you have xquartz installed if using OSX host! diff --git a/fonts/GOST 2.304-81 type A (plotter).FON b/fonts/GOST 2.304-81 type A (plotter).FON deleted file mode 100644 index 9028c8d8017797333f4a09bf2ea501c32852de5f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6032 zcmeHLU2Ggz6+Sb2XLdXpZ>OupY`4pH;;w=lIqoLiR7ojOew=NcCbkv3MoKBgiQUvy z>nrPLo%(84(>)F{^ zzB@B(dxH%T5)VkMb>`fA?z#7#bME=Mz9&u{CYgvVK&zz?Noe4A5Fk-w^yz}=ZT=e- z6aWAFpSHjo5sKXPde__Ae<~e+d;9oPGxOH@LjG)Fa@Lx?Fu!1(p0)}X=Bx{IQ_}_O z#LSsOem?)?g7t-&xvBhD=Lfp=bf@~mM-~0D=TSu3H#^5f@AL4igY+_8#^2vO|6!oY zTEH1%bQ||w@II`PNlILdgag`M>Y`D@r)dH0041eNQup}GfObl{i!uQpWn#^nfM!tUUwwWwN{W?Y0&!o}ac3S$*fv z(r>~0d!(Ruf!_nq`=CowoHQv- zcS0V`CrLqTA&p-3i<)_tHW6Fg+{x(Tj4Leg@j_fd2yg zcR&m6cqmPu4H?uQO45F`9|j%?_0uV|&j6kWd_S~<-U9wJpa}@GG$mOd-OcvV0PsG* zAZRC;NfWG}9s{0YNt$IA6<7>l(Tjjr0IvbA0RF_{^f53~EQ%=xr2zeCe@=(+Jv+1E$b#!#vvapgk8h=-Yrx z;UrxK?Q*!E-a!AC7`qTcCXc zZ~;aa{HqJz+0{??qJ01`33wXtY?nosfL{T;(Uqm^;45|QARB!{@27-5K)dvvl+zKD z$buIE-$naP;A=X34e+UL799b68Sr($rEPum>b4ZUhxQFX9S}AwG7ZQ9yc2K~a1t;L zc*f|b7l3~N{2Smu7`=2IxPm_G9-uqB`zX^5|LwNuF~C>Qz6AVg_m(Mvs}GaIsYcg0 zOUQMuYWV0mW0A{Qd~TR2;!8aiadl9WRSGvlw!6Br(rl2;8j7ouCi7~I26;4RE%Ku7 z+6~9126I(eRhUYOBir1xWLt62VY2F~4F(3swfT$Bk6D z8g&h>A9G4nVkORwF*KYK*Apgjr(QQXl{J&IY>arVMlr7DTn*j0kd+u$hpjic`XGHE zHJGEc7%38Cl7e)pIIJN#)Mzq0wH=3S*})1qoiNdn8dPsJ z9H-H8QG--hw#oMUb>claDss&zl6GyA2{oc3+S_ZL+tZ3%Q|p?|)i~L4s%E*F zgQy+5LR1MxQjq@?#uSOm%K-!A7h(m&(9Ro94O9ZKVkFCpWnrfrEgEf0L1S@H zRpJKB9-Llaz6$X~uGZEvIAYyjV9nIQNqf8{qD5>_iW@$Y*T$!3C$1XFHG2gsMSLbs zad;CI;T}RVeBi_W(OmT@aVlC|tC=bIK^a?kR5M}T2&Tldf_4i|h~?MhUf1y!W*W9> zdb@5iU8hK#M{;$Sug7kw38M`9rv3sE-Wb*bn&j_VyAcwKl~5h3Z~U=rZDnUvc|zZ#@hho`*=tLjZlK#^LsMm&00 zuhc3?4tk^(5uOFF5fM@Kmt4=;sc4Zz)OOY^II`IHR+G7ZMqE8cPEr`S=&(iTpM`Lp znLDyvDROVbq{?!n$XDVFTODAkiqP$pL1dmbDX5fgP}z|~;)^1(+KuuR&6k+3r*t)E zLV+NYZ%FDJ%fhXtc6b5yf;~1#(wo+?^~9{NUqY_(4V7vIrV+J8tCH^@@N3UQ3<|qS zLtaIKS#2~{MKrc^cJDCNmqnHG0FUHgOuN{}IHi<BQKk?y*Mvz>i}1j26e`P8bR{#)07f#h@a0UfzPMTMCR2zvFTQ|E*Fr z{x7S{*7?}HUiR)GTXWzxQ1-$I*_c9Z$i`)AhumZ>KfAlEY`VxWURi6&O`ho8SZw@S zN7ptjZW{`hnlo0aD;()NjZbyOSmD*EgQ(2eLCRvHXcDer$XZBEe$foNj^v_Jbe4b( z^}sIIVdP#cO%)``iYaPYPFsSnqaYyCji|8_G&wRS_*PI%)vb*pCassde_zwQT{elId3jq!a)8RrWom8XP{>TQ6w-WS(dav`lvQen3|6d>U_CNAk1hC+98> zPtH!0_jCpN1GIz0tikE|GliM+3p4pS%H`)4Hh(XBGGDMp{U59b?+oM+I?f5j z*JK&MJ0H_hesa1wQD diff --git a/fonts/GOST 2.304-81 type B (plotter).FON b/fonts/GOST 2.304-81 type B (plotter).FON deleted file mode 100644 index 70266c853846b4f9cb547691b5a042ba61251651..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6160 zcmeHLTWlQF8UE(FG6do#C`hqG|2-K<%l&TeNX(fdELJ(@BszO$axD*!|x|p>y_Rgi>nb}=m zVhS&jhn{(sNmJNH8c;E=-502FrkxsjxxI(T16KCS;5loS8|`JcAH z3n`>FJ>PU;^RJZKE^Iz}@93m6Q5rv88b0fsJvTY!93OE?=f<3KV<$&S&f(D$rSZw} zQ&Y|zqhlw>zdqUBoXoFQzyF|-d@714z zL>z)Uxk{l!5%Nu<#@mG^4B9czs`2(BuM|3x(YSPtcU;+oLTwyp-#jpM^q&2LawY6O zG|+$a$iX4!0NcL)+XrqR8alA=w!Z$G_c(H#Q|#VZ=-GbV4rl88#E7%s>6$n*J~cH` zx;mQPf&{p~T;!&{BN1!WkAfuCSsVwtPF(FAKkw`t;zrJn-Mf3Y7Yl{Ls=*)opdb&8 z-y&DzdcfmKC~dbRk0ChhJ`&mJ7sIs`BEi>{7wQeI|6y5T85Xs;O-p|wN3tn;D>782 z3l92mJs!dq{1_YXEH>hK((llYSJ8pLaO~gMzRmU@=)?!?Z&WhKDjm3nG^jYZo9#K$ z4@f`a^Pku*C^n7FeN>JoeU{Wo+N&1upqj@cs)Og$tMCW5ec;~uag!@n(iR0=dnrOhOKOSNH>!D^fZp>ow$STSM?6u zuV*o#XGl3b#^qV_x9%D{No4x-iGKv&q3Vq}`-jNMCNs<8HS1lfFy( zKIvyoSK%eL7fA0mb)khwcV)5@Ur2IZvI{pSx8hE=ljJ`}-qWPtv;RKZrVTlCZRo&J z(sxMDkS>sBH{_w)?S!Tde2FwfI!?OZcJKh(Z@KWTRG5_x|nmD#@{)q(9P;A0>`m_6_WJiHZD#=J0k z4R*r$>bz)bZ>oq(XtG6RsRhE^l7?_yq=mjmU4$PqA|1^=ZLbIh{fy$P0i_4GQxy%_ zPzzL_3ix3VguX}>G3{EStyo6OyNVZhen8czurP}jD5V@Lm5nSxEhA3+>_TLUC=1h? zh4uD26I`H73C&!xzP+`2dsNg#ebMxUT})1w0bQhv292mx!>Z@;hc*li#n5$wX=m05 zBI|n9psGd|*9}!%(Rrz+4BR!!HU|mMT+(Y^uh8|hlPfUCUR!tBO}oO}N&&G@rj*@j zx9o~k6d_AoD^LR6Q|ZgV_X0|yiCi8P7rqi|uoR8{h88Nm?5XUjK9+(dzq;ftg^S9P z?wMgiN$94jCloU@=$)cHZRf@80=I0eYOiq^$H> zsy?>n(m6SnHk{__UYaxIAgNrM%C+T~m@;!~R!YT#L#Ih(JCA; zn-S(xs!fGfo3%zX+ANN?RdP4<{xmb~z_y~T8fq_3XO)l^ z=CwRSiXrXBa1Hzq=K~+U>YITHQ)66F!Ug7IsX-fQvepGuwKEnWTBQEy*WBL8vRbCU6~xwRK~b z{!;>#F=NNu%0oooGHRqOd7h0}ic07Qfgdg~IEqVkPgUZOHn~*$+J#sfO|OiP+StZf zH|cn*h&LI!0VS>2HlOz{Ykm28k%a~9#(Aa2Ma=Mk@We39>Vj2GE{@G0jslUk%eF8* z>4FrTSS`+Jt;ayCsDMkzbyhCB4Qs`^bh%x5!BYZaji%P}jipxOe4`qH5$BtrDxA!^>@3!0%Y&E(T+w_5Ax@)V^bO65 z#4sAo5B*RiE^&7s)xewoC?uEfXkEtaYLLO-8Cgse5becv;Kh|dltI8ofx;*Sx;ig!t*_u@4{pB04u+8kEp z2s6hRT>+VEV@w_i^*tJy3o$R$q06Fd(seR?Kw$9|>9TbeBKEpA^Pbslxvo^nvY0)a zxyBx40SpL)rAa<27d*NlJ(6gS-JVD@H?Feo@5 zq&-$Lxf;B9F}%ofCeVa#6XRFqADQL@CMiNvW?JHUy)$>wqQdgntVjdQ*YrS){wopz zhmaYkl1B0IZ_`0EL9TweomSd^Irl9id9v8R&Ns!1>^3LTTP?(O1Q8&rB-~n@K&@31 zx8Bz%~lT zsH7r^)KuV_gXfY3QOUX~Bl5@2#vnp+cihTSOxlc~6A5bRY z^;6-xb%NaYvgErW)d|ehQ#hs1?DS9#Juj>h7pl6)Qv*ZMb;V$nVtD+oBLW*2sm*dO ztlNNbuM5wk`ZSYSPo*^-8$0p>W=56(GKM9dikNmYmmw{lph_dV$wq|H~d0rCC&Wu78y22<+$$uFEA&-C6+GsI?H(?R{m!~Td zlAxot3`xyMFIlXBk9?)VO_0~Y$mc&#@!d?wnA zW9!$1yRaF9rO^k5r$+Xl8$EN9ufV-WPmhh9lvjE~-M5aP9WISO;1mnj7Is}*zt=1B z+UvHF)5B*Dj8B{|jh?=D3e>08{g}JJ&+$5JfMZ7~htzRQD&CN#oA->s)wq)KjGsJr zVk#2#)`7!=`-U8;Xzd=>={kBizMo_m*2mUW#;)$`3UbXn>DX|o;eZT_mZPQNvB@*T Lkxj4E-_rjD-#T=` diff --git a/fonts/GOST type A.TTF b/fonts/GOST type A.ttf similarity index 100% rename from fonts/GOST type A.TTF rename to fonts/GOST type A.ttf diff --git a/install.sh b/install.sh new file mode 100755 index 0000000..b253475 --- /dev/null +++ b/install.sh @@ -0,0 +1,59 @@ +cd /tesseract-4.1.1 || exit 1 + +VLOC=$HOME/local +VBIN=$VLOC/bin +VINC=$VLOC/include +VLIB=$VLOC/lib + +mkdir -p $VINC $VLIB $VBIN/pkgconfig + +export PKG_CONFIG_PATH=$VLIB/pkgconfig +export LD_LIBRARY_PATH=$VLIB + +./autogen.sh +LIBLEPT_HEADERSDIR=$VINC ./configure --prefix=$VLOC --with-extra-libraries=$VLIB + +make +make install +make training +make training-install + +export TESSDATA_PREFIX=/tesseract-4.1.1/tessdata +cd $TESSDATA_PREFIX || exit 1 +wget https://github.com/tesseract-ocr/tessdata_best/raw/master/rus.traineddata + +# download langdata +cd /tesseract-4.1.1 +mkdir langdata +cd langdata +wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/radical-stroke.txt +wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/common.punc +wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/font_properties +wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/Latin.unicharset +wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/Latin.xheights + +wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/Cyrillic.unicharset +wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/Cyrillic.xheights +wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/desired_bigrams.txt +wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/common.unicharambigs +wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/forbidden_characters_default + +mkdir rus +cd rus +wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/rus/rus.training_text +wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/rus/rus.punc +wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/rus/rus.numbers +wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/rus/rus.wordlist + +wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/rus/desired_characters +wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/rus/okfonts.txt +wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/rus/rus.singles_text +wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/rus/rus.unicharambigs +wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/rus/rus.unicharset + +{ + echo "export PATH=$PATH:$VBIN"; \ + echo "export PKG_CONFIG_PATH=$PKG_CONFIG_PATH"; \ + echo "export LD_LIBRARY_PATH=$LD_LIBRARY_PATH"; \ + echo "export TESSDATA_PREFIX=$TESSDATA_PREFIX"; \ +} >> "$HOME"/.bashrc diff --git a/provision.sh b/provision.sh index 55b73ea..1954479 100644 --- a/provision.sh +++ b/provision.sh @@ -6,52 +6,33 @@ env|grep DISPLAY # echo "%vagrant ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/vagrant -export PATH=$PATH:$HOME/local/bin -echo "PATH=$PATH:$HOME/local/bin" >> $HOME/.bashrc -echo "PATH=$PATH:$HOME/local/bin" >> /etc/profile # зависимости export DEBIAN_FRONTEND=noninteractive +locale-gen en_US.UTF-8 apt-get update apt-get install -y htop nano libsm6 libxext6 libxrender-dev \ libicu-dev libpango1.0-dev libcairo2-dev g++ autoconf automake \ libtool pkg-config libpng-dev libjpeg62-turbo-dev libtiff5-dev \ zlib1g-dev libicu-dev libleptonica-dev -# Устанавливаем чертежные шрифты в систему -export FONTS_DIR=/usr/local/share/fonts -echo "export FONTS_DIR=$FONTS_DIR" >> /etc/profile -cp /vagrant/fonts/* $FONTS_DIR -chmod -R 644 $FONTS_DIR -fc-cache -v - # скачиваем и собираем tesseract 4.1.1 cd / wget https://github.com/tesseract-ocr/tesseract/archive/4.1.1.tar.gz tar -zxvf 4.1.1.tar.gz -cd /tesseract-4.1.1 || exit 1 -mkdir -p $HOME/local/include $HOME/local/lib/pkgconfig - -export PKG_CONFIG_PATH=$HOME/local/lib/pkgconfig -echo "export PKG_CONFIG_PATH=$HOME/local/lib/pkgconfig" >> /etc/profile - -./autogen.sh -LIBLEPT_HEADERSDIR=$HOME/local/include ./configure \ - --prefix=$HOME/local/ --with-extra-libraries=$HOME/local/lib -make -make install -export LD_LIBRARY_PATH=$HOME/local/lib -echo "LD_LIBRARY_PATH=$HOME/local/lib" >> /etc/profile - -make training -make training-install - -export TESSDATA_PREFIX=/tesseract-4.1.1/tessdata -acho "export TESSDATA_PREFIX=/tesseract-4.1.1/tessdata" >> /etc/profile -cd /tessdata -wget https://github.com/tesseract-ocr/tessdata_best/raw/master/rus.traineddata - - - +chown -R vagrant:vagrant /tesseract-4.1.1 +# установка, сборка и сохранение переменных среды +cd /vagrant || exit 1 +chmod +x ./install.sh +runuser -l vagrant -c '/vagrant/install.sh' +# Устанавливаем чертежные шрифты в систему +export FONTS_DIR=/usr/local/share/fonts +cp /vagrant/fonts/* $FONTS_DIR +chown -R vagrant:vagrant $FONTS_DIR +chmod -R 775 $FONTS_DIR +chmod 644 $FONTS_DIR/* +fc-cache +fc-list | grep 'GOST' +echo "export FONTS_DIR=$FONTS_DIR" >> /home/vagrant/.bashrc diff --git a/train.sh b/train.sh new file mode 100644 index 0000000..32a1042 --- /dev/null +++ b/train.sh @@ -0,0 +1,5 @@ + + +tesstrain.sh --fonts_dir $FONTS_DIR --fontlist "GOST Type A" --lang rus \ + --linedata_only --noextract_font_properties --langdata_dir /tesseract-4.1.1/langdata \ + --tessdata_dir $TESSDATA_PREFIX --output_dir ./ \ No newline at end of file