1SUMMARY="Language data files for Tesseract OCR engine" 2DESCRIPTION="Tesseract OCR can be fully trained to recognize new languages and scripts. \ 3A set of files for, community made, trained languages are available as \ 4separate packages per language." 5HOMEPAGE="https://github.com/tesseract-ocr/" 6LICENSE="Apache v2" 7COPYRIGHT="1985-1995 HP labs 8 2012 Google Inc." 9REVISION="4" 10SOURCE_URI="https://github.com/tesseract-ocr/tessdata/archive/$portVersion.tar.gz" 11CHECKSUM_SHA256="5dcb37198336b6953843b461ee535df1401b41008d550fc9e43d0edabca7adb1" 12SOURCE_DIR="tessdata-$portVersion" 13DISABLE_SOURCE_PACKAGE=yes 14 15ARCHITECTURES="any" 16 17PROVIDES=" 18 $portName = $portVersion 19 " 20BUILD_REQUIRES=" 21 " 22 23declare -A languages 24# Special data files 25languages[osd]="orientation and script detection" 26languages[equ]="math / equation detection" 27 28# languages data files 29languages[afr]="Afrikaans" 30languages[amh]="Amharic" 31languages[ara]="Arabic" 32languages[asm]="Assamese" 33languages[aze]="Azerbaijani" 34languages[aze_cyrl]="Azerbaijani - Cyrilic" 35languages[bel]="Belarusian" 36languages[ben]="Bengali" 37languages[bod]="Tibetan" 38languages[bos]="Bosnian" 39languages[bul]="Bulgarian" 40languages[cat]="Catalan; Valencian" 41languages[ceb]="Cebuano" 42languages[ces]="Czech" 43languages[chi_sim]="Chinese - Simplified" 44languages[chi_tra]="Chinese - Traditional" 45languages[chr]="Cherokee" 46languages[cym]="Welsh" 47languages[dan]="Danish" 48languages[dan_frak]="Danish - Fraktur script" 49languages[deu]="German" 50languages[deu_frak]="Germain - Fraktur script" 51languages[dzo]="Dzongkha" 52languages[ell]="Greek, Modern (1453-)" 53languages[eng]="English" 54languages[enm]="English, Middle (1100-1500)" 55languages[epo]="Esperanto" 56languages[est]="Estonian" 57languages[eus]="Basque" 58languages[fas]="Persian" 59languages[fin]="Finnish" 60languages[fra]="French" 61languages[frk]="Frankish" 62languages[frm]="French, Middle (ca. 1400-1600)" 63languages[gle]="Irish" 64languages[glg]="Galician" 65languages[grc]="Greek, Ancient (-1453)" 66languages[guj]="Gujarati" 67languages[hat]="Haitian; Haitian Creole" 68languages[heb]="Hebrew" 69languages[hin]="Hindi" 70languages[hrv]="Croatian" 71languages[hun]="Hungarian" 72languages[iku]="Inuktitut" 73languages[ind]="Indonesian" 74languages[isl]="Icelandic" 75languages[ita]="Italian" 76languages[ita_old]="Italian - Old" 77languages[jav]="Javanese" 78languages[jpn]="Japanese" 79languages[kan]="Kannada" 80languages[kat]="Georgian" 81languages[kat_old]="Georgian - Old" 82languages[kaz]="Kazakh" 83languages[khm]="Central Khmer" 84languages[kir]="Kirghiz; Kyrgyz" 85languages[kor]="Korean" 86languages[kur]="Kurdish" 87languages[lao]="Lao" 88languages[lat]="Latin" 89languages[lav]="Latvian" 90languages[lit]="Lithuanian" 91languages[mal]="Malayalam" 92languages[mar]="Marathi" 93languages[mkd]="Macedonian" 94languages[mlt]="Maltese" 95languages[msa]="Malay" 96languages[mya]="Burmese" 97languages[nep]="Nepali" 98languages[nld]="Dutch; Flemish" 99languages[nor]="Norvegian" 100languages[ori]="Oriya" 101languages[pan]="Panjabi; Punjabi" 102languages[pol]="Polish" 103languages[por]="Portuguese" 104languages[pus]="Pushto; Pastho" 105languages[ron]="Romanian; Moldavian; Moldovan" 106languages[rus]="Russian" 107languages[san]="Sanskrit" 108languages[sin]="Sinhala; Sinhalese" 109languages[slk]="Slovak" 110languages[slk_frak]="Slovak - Fraktur script" 111languages[slv]="Slovenian" 112languages[spa]="Spanish; Castilian" 113languages[spa_old]="Spanish; Castilian - Old" 114languages[sqi]="Albanian" 115languages[srp]="Serbian" 116languages[srp_latn]="Serbian - Latin" 117languages[swa]="Swahili" 118languages[swe]="Swedish" 119languages[syr]="Syriac" 120languages[tam]="Tamil" 121languages[tel]="Telugu" 122languages[tgk]="Tajik" 123languages[tgl]="Tagalog" 124languages[tha]="Thai" 125languages[tir]="Tigrinya" 126languages[tur]="Turkish" 127languages[uig]="Uighur; Uyghur" 128languages[ukr]="Ukrainian" 129languages[urd]="Urdu" 130languages[uzb]="Uzbek" 131languages[uzb_cyrl]="Uzbek - Cyrilic" 132languages[vie]="Vietnamese" 133languages[yid]="Yiddish" 134 135for lang in "${!languages[@]}"; do 136 desc=${languages[${lang}]} 137 138 eval "\ 139 SUMMARY_${lang}=\"Data files for ${desc}\";\ 140 PROVIDES_${lang}=\"\ 141 ${portName}_${lang} = $portVersion\ 142 \"; \ 143 REQUIRES_${lang}=\"\ 144 haiku\n\ 145 vendor_tesseract >= 3\n\ 146 \"" 147done 148 149INSTALL() 150{ 151 mkdir -p $dataDir/tessdata 152 for lang in "${!languages[@]}"; do 153 cp $lang.* $dataDir/tessdata 154 packageEntries $lang \ 155 $dataDir/tessdata/$lang.* 156 done 157} 158