1SUMMARY="Language data files for Tesseract OCR engine"
2DESCRIPTION="Tesseract OCR can be fully trained to recognize new languages and scripts. \
3A set of files for, community made, trained languages are available as \
4separate packages per language."
5HOMEPAGE="https://github.com/tesseract-ocr/"
6LICENSE="Apache v2"
7COPYRIGHT="1985-1995 HP labs
8	2012 Google Inc."
9REVISION="4"
10SOURCE_URI="https://github.com/tesseract-ocr/tessdata/archive/$portVersion.tar.gz"
11CHECKSUM_SHA256="5dcb37198336b6953843b461ee535df1401b41008d550fc9e43d0edabca7adb1"
12SOURCE_DIR="tessdata-$portVersion"
13DISABLE_SOURCE_PACKAGE=yes
14
15ARCHITECTURES="any"
16
17PROVIDES="
18	$portName = $portVersion
19	"
20BUILD_REQUIRES="
21	"
22
23declare -A languages
24# Special data files
25languages[osd]="orientation and script detection"
26languages[equ]="math / equation detection"
27
28# languages data files
29languages[afr]="Afrikaans"
30languages[amh]="Amharic"
31languages[ara]="Arabic"
32languages[asm]="Assamese"
33languages[aze]="Azerbaijani"
34languages[aze_cyrl]="Azerbaijani - Cyrilic"
35languages[bel]="Belarusian"
36languages[ben]="Bengali"
37languages[bod]="Tibetan"
38languages[bos]="Bosnian"
39languages[bul]="Bulgarian"
40languages[cat]="Catalan; Valencian"
41languages[ceb]="Cebuano"
42languages[ces]="Czech"
43languages[chi_sim]="Chinese - Simplified"
44languages[chi_tra]="Chinese - Traditional"
45languages[chr]="Cherokee"
46languages[cym]="Welsh"
47languages[dan]="Danish"
48languages[dan_frak]="Danish - Fraktur script"
49languages[deu]="German"
50languages[deu_frak]="Germain - Fraktur script"
51languages[dzo]="Dzongkha"
52languages[ell]="Greek, Modern (1453-)"
53languages[eng]="English"
54languages[enm]="English, Middle (1100-1500)"
55languages[epo]="Esperanto"
56languages[est]="Estonian"
57languages[eus]="Basque"
58languages[fas]="Persian"
59languages[fin]="Finnish"
60languages[fra]="French"
61languages[frk]="Frankish"
62languages[frm]="French, Middle (ca. 1400-1600)"
63languages[gle]="Irish"
64languages[glg]="Galician"
65languages[grc]="Greek, Ancient (-1453)"
66languages[guj]="Gujarati"
67languages[hat]="Haitian; Haitian Creole"
68languages[heb]="Hebrew"
69languages[hin]="Hindi"
70languages[hrv]="Croatian"
71languages[hun]="Hungarian"
72languages[iku]="Inuktitut"
73languages[ind]="Indonesian"
74languages[isl]="Icelandic"
75languages[ita]="Italian"
76languages[ita_old]="Italian - Old"
77languages[jav]="Javanese"
78languages[jpn]="Japanese"
79languages[kan]="Kannada"
80languages[kat]="Georgian"
81languages[kat_old]="Georgian - Old"
82languages[kaz]="Kazakh"
83languages[khm]="Central Khmer"
84languages[kir]="Kirghiz; Kyrgyz"
85languages[kor]="Korean"
86languages[kur]="Kurdish"
87languages[lao]="Lao"
88languages[lat]="Latin"
89languages[lav]="Latvian"
90languages[lit]="Lithuanian"
91languages[mal]="Malayalam"
92languages[mar]="Marathi"
93languages[mkd]="Macedonian"
94languages[mlt]="Maltese"
95languages[msa]="Malay"
96languages[mya]="Burmese"
97languages[nep]="Nepali"
98languages[nld]="Dutch; Flemish"
99languages[nor]="Norvegian"
100languages[ori]="Oriya"
101languages[pan]="Panjabi; Punjabi"
102languages[pol]="Polish"
103languages[por]="Portuguese"
104languages[pus]="Pushto; Pastho"
105languages[ron]="Romanian; Moldavian; Moldovan"
106languages[rus]="Russian"
107languages[san]="Sanskrit"
108languages[sin]="Sinhala; Sinhalese"
109languages[slk]="Slovak"
110languages[slk_frak]="Slovak - Fraktur script"
111languages[slv]="Slovenian"
112languages[spa]="Spanish; Castilian"
113languages[spa_old]="Spanish; Castilian - Old"
114languages[sqi]="Albanian"
115languages[srp]="Serbian"
116languages[srp_latn]="Serbian - Latin"
117languages[swa]="Swahili"
118languages[swe]="Swedish"
119languages[syr]="Syriac"
120languages[tam]="Tamil"
121languages[tel]="Telugu"
122languages[tgk]="Tajik"
123languages[tgl]="Tagalog"
124languages[tha]="Thai"
125languages[tir]="Tigrinya"
126languages[tur]="Turkish"
127languages[uig]="Uighur; Uyghur"
128languages[ukr]="Ukrainian"
129languages[urd]="Urdu"
130languages[uzb]="Uzbek"
131languages[uzb_cyrl]="Uzbek - Cyrilic"
132languages[vie]="Vietnamese"
133languages[yid]="Yiddish"
134
135for lang in "${!languages[@]}"; do
136	desc=${languages[${lang}]}
137
138	eval "\
139	SUMMARY_${lang}=\"Data files for ${desc}\";\
140	PROVIDES_${lang}=\"\
141		${portName}_${lang} = $portVersion\
142		\"; \
143	REQUIRES_${lang}=\"\
144		haiku\n\
145		vendor_tesseract >= 3\n\
146		\""
147done
148
149INSTALL()
150{
151	mkdir -p $dataDir/tessdata
152	for lang in "${!languages[@]}"; do
153		cp $lang.* $dataDir/tessdata
154		packageEntries $lang \
155			$dataDir/tessdata/$lang.*
156	done
157}
158