1/* vi:set ts=8 sts=4 sw=4:
2 *
3 * VIM - Vi IMproved    by Bram Moolenaar
4 *
5 * Do ":help uganda"  in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10/*
11 * arabic.c: functions for Arabic language
12 *
13 * Included by main.c, when FEAT_ARABIC & FEAT_GUI is defined.
14 *
15 * --
16 *
17 * Author: Nadim Shaikli & Isam Bayazidi
18 *
19 */
20
21static int  A_is_a __ARGS((int cur_c));
22static int  A_is_s __ARGS((int cur_c));
23static int  A_is_f __ARGS((int cur_c));
24static int  chg_c_a2s __ARGS((int cur_c));
25static int  chg_c_a2i __ARGS((int cur_c));
26static int  chg_c_a2m __ARGS((int cur_c));
27static int  chg_c_a2f __ARGS((int cur_c));
28static int  chg_c_i2m __ARGS((int cur_c));
29static int  chg_c_f2m __ARGS((int cur_c));
30static int  chg_c_laa2i __ARGS((int hid_c));
31static int  chg_c_laa2f __ARGS((int hid_c));
32static int  half_shape __ARGS((int c));
33static int  A_firstc_laa __ARGS((int c1, int c));
34static int  A_is_harakat __ARGS((int c));
35static int  A_is_iso __ARGS((int c));
36static int  A_is_formb __ARGS((int c));
37static int  A_is_ok __ARGS((int c));
38static int  A_is_valid __ARGS((int c));
39static int  A_is_special __ARGS((int c));
40
41
42/*
43 * Returns True if c is an ISO-8859-6 shaped ARABIC letter (user entered)
44 */
45    static int
46A_is_a(cur_c)
47    int cur_c;
48{
49    switch (cur_c)
50    {
51	case a_HAMZA:
52	case a_ALEF_MADDA:
53	case a_ALEF_HAMZA_ABOVE:
54	case a_WAW_HAMZA:
55	case a_ALEF_HAMZA_BELOW:
56	case a_YEH_HAMZA:
57	case a_ALEF:
58	case a_BEH:
59	case a_TEH_MARBUTA:
60	case a_TEH:
61	case a_THEH:
62	case a_JEEM:
63	case a_HAH:
64	case a_KHAH:
65	case a_DAL:
66	case a_THAL:
67	case a_REH:
68	case a_ZAIN:
69	case a_SEEN:
70	case a_SHEEN:
71	case a_SAD:
72	case a_DAD:
73	case a_TAH:
74	case a_ZAH:
75	case a_AIN:
76	case a_GHAIN:
77	case a_TATWEEL:
78	case a_FEH:
79	case a_QAF:
80	case a_KAF:
81	case a_LAM:
82	case a_MEEM:
83	case a_NOON:
84	case a_HEH:
85	case a_WAW:
86	case a_ALEF_MAKSURA:
87	case a_YEH:
88	    return TRUE;
89    }
90
91    return FALSE;
92}
93
94
95/*
96 * Returns True if c is an Isolated Form-B ARABIC letter
97 */
98    static int
99A_is_s(cur_c)
100    int cur_c;
101{
102    switch (cur_c)
103    {
104	case a_s_HAMZA:
105	case a_s_ALEF_MADDA:
106	case a_s_ALEF_HAMZA_ABOVE:
107	case a_s_WAW_HAMZA:
108	case a_s_ALEF_HAMZA_BELOW:
109	case a_s_YEH_HAMZA:
110	case a_s_ALEF:
111	case a_s_BEH:
112	case a_s_TEH_MARBUTA:
113	case a_s_TEH:
114	case a_s_THEH:
115	case a_s_JEEM:
116	case a_s_HAH:
117	case a_s_KHAH:
118	case a_s_DAL:
119	case a_s_THAL:
120	case a_s_REH:
121	case a_s_ZAIN:
122	case a_s_SEEN:
123	case a_s_SHEEN:
124	case a_s_SAD:
125	case a_s_DAD:
126	case a_s_TAH:
127	case a_s_ZAH:
128	case a_s_AIN:
129	case a_s_GHAIN:
130	case a_s_FEH:
131	case a_s_QAF:
132	case a_s_KAF:
133	case a_s_LAM:
134	case a_s_MEEM:
135	case a_s_NOON:
136	case a_s_HEH:
137	case a_s_WAW:
138	case a_s_ALEF_MAKSURA:
139	case a_s_YEH:
140	    return TRUE;
141    }
142
143    return FALSE;
144}
145
146
147/*
148 * Returns True if c is a Final shape of an ARABIC letter
149 */
150    static int
151A_is_f(cur_c)
152    int cur_c;
153{
154    switch (cur_c)
155    {
156	case a_f_ALEF_MADDA:
157	case a_f_ALEF_HAMZA_ABOVE:
158	case a_f_WAW_HAMZA:
159	case a_f_ALEF_HAMZA_BELOW:
160	case a_f_YEH_HAMZA:
161	case a_f_ALEF:
162	case a_f_BEH:
163	case a_f_TEH_MARBUTA:
164	case a_f_TEH:
165	case a_f_THEH:
166	case a_f_JEEM:
167	case a_f_HAH:
168	case a_f_KHAH:
169	case a_f_DAL:
170	case a_f_THAL:
171	case a_f_REH:
172	case a_f_ZAIN:
173	case a_f_SEEN:
174	case a_f_SHEEN:
175	case a_f_SAD:
176	case a_f_DAD:
177	case a_f_TAH:
178	case a_f_ZAH:
179	case a_f_AIN:
180	case a_f_GHAIN:
181	case a_f_FEH:
182	case a_f_QAF:
183	case a_f_KAF:
184	case a_f_LAM:
185	case a_f_MEEM:
186	case a_f_NOON:
187	case a_f_HEH:
188	case a_f_WAW:
189	case a_f_ALEF_MAKSURA:
190	case a_f_YEH:
191	case a_f_LAM_ALEF_MADDA_ABOVE:
192	case a_f_LAM_ALEF_HAMZA_ABOVE:
193	case a_f_LAM_ALEF_HAMZA_BELOW:
194	case a_f_LAM_ALEF:
195	    return TRUE;
196    }
197    return FALSE;
198}
199
200
201/*
202 * Change shape - from ISO-8859-6/Isolated to Form-B Isolated
203 */
204    static int
205chg_c_a2s(cur_c)
206    int cur_c;
207{
208    int tempc;
209
210    switch (cur_c)
211    {
212	case a_HAMZA:
213	    tempc = a_s_HAMZA;
214	    break;
215	case a_ALEF_MADDA:
216	    tempc = a_s_ALEF_MADDA;
217	    break;
218	case a_ALEF_HAMZA_ABOVE:
219	    tempc = a_s_ALEF_HAMZA_ABOVE;
220	    break;
221	case a_WAW_HAMZA:
222	    tempc = a_s_WAW_HAMZA;
223	    break;
224	case a_ALEF_HAMZA_BELOW:
225	    tempc = a_s_ALEF_HAMZA_BELOW;
226	    break;
227	case a_YEH_HAMZA:
228	    tempc = a_s_YEH_HAMZA;
229	    break;
230	case a_ALEF:
231	    tempc = a_s_ALEF;
232	    break;
233	case a_TEH_MARBUTA:
234	    tempc = a_s_TEH_MARBUTA;
235	    break;
236	case a_DAL:
237	    tempc = a_s_DAL;
238	    break;
239	case a_THAL:
240	    tempc = a_s_THAL;
241	    break;
242	case a_REH:
243	    tempc = a_s_REH;
244	    break;
245	case a_ZAIN:
246	    tempc = a_s_ZAIN;
247	    break;
248	case a_TATWEEL:			/* exceptions */
249	    tempc = cur_c;
250	    break;
251	case a_WAW:
252	    tempc = a_s_WAW;
253	    break;
254	case a_ALEF_MAKSURA:
255	    tempc = a_s_ALEF_MAKSURA;
256	    break;
257	case a_BEH:
258	    tempc = a_s_BEH;
259	    break;
260	case a_TEH:
261	    tempc = a_s_TEH;
262	    break;
263	case a_THEH:
264	    tempc = a_s_THEH;
265	    break;
266	case a_JEEM:
267	    tempc = a_s_JEEM;
268	    break;
269	case a_HAH:
270	    tempc = a_s_HAH;
271	    break;
272	case a_KHAH:
273	    tempc = a_s_KHAH;
274	    break;
275	case a_SEEN:
276	    tempc = a_s_SEEN;
277	    break;
278	case a_SHEEN:
279	    tempc = a_s_SHEEN;
280	    break;
281	case a_SAD:
282	    tempc = a_s_SAD;
283	    break;
284	case a_DAD:
285	    tempc = a_s_DAD;
286	    break;
287	case a_TAH:
288	    tempc = a_s_TAH;
289	    break;
290	case a_ZAH:
291	    tempc = a_s_ZAH;
292	    break;
293	case a_AIN:
294	    tempc = a_s_AIN;
295	    break;
296	case a_GHAIN:
297	    tempc = a_s_GHAIN;
298	    break;
299	case a_FEH:
300	    tempc = a_s_FEH;
301	    break;
302	case a_QAF:
303	    tempc = a_s_QAF;
304	    break;
305	case a_KAF:
306	    tempc = a_s_KAF;
307	    break;
308	case a_LAM:
309	    tempc = a_s_LAM;
310	    break;
311	case a_MEEM:
312	    tempc = a_s_MEEM;
313	    break;
314	case a_NOON:
315	    tempc = a_s_NOON;
316	    break;
317	case a_HEH:
318	    tempc = a_s_HEH;
319	    break;
320	case a_YEH:
321	    tempc = a_s_YEH;
322	    break;
323	default:
324	    tempc = 0;
325    }
326
327    return tempc;
328}
329
330
331/*
332 * Change shape - from ISO-8859-6/Isolated to Initial
333 */
334    static int
335chg_c_a2i(cur_c)
336    int cur_c;
337{
338    int tempc;
339
340    switch (cur_c)
341    {
342	case a_YEH_HAMZA:
343	    tempc = a_i_YEH_HAMZA;
344	    break;
345	case a_HAMZA:			/* exceptions */
346	    tempc = a_s_HAMZA;
347	    break;
348	case a_ALEF_MADDA:		/* exceptions */
349	    tempc = a_s_ALEF_MADDA;
350	    break;
351	case a_ALEF_HAMZA_ABOVE:	/* exceptions */
352	    tempc = a_s_ALEF_HAMZA_ABOVE;
353	    break;
354	case a_WAW_HAMZA:		/* exceptions */
355	    tempc = a_s_WAW_HAMZA;
356	    break;
357	case a_ALEF_HAMZA_BELOW:	/* exceptions */
358	    tempc = a_s_ALEF_HAMZA_BELOW;
359	    break;
360	case a_ALEF:			/* exceptions */
361	    tempc = a_s_ALEF;
362	    break;
363	case a_TEH_MARBUTA:		/* exceptions */
364	    tempc = a_s_TEH_MARBUTA;
365	    break;
366	case a_DAL:			/* exceptions */
367	    tempc = a_s_DAL;
368	    break;
369	case a_THAL:			/* exceptions */
370	    tempc = a_s_THAL;
371	    break;
372	case a_REH:			/* exceptions */
373	    tempc = a_s_REH;
374	    break;
375	case a_ZAIN:			/* exceptions */
376	    tempc = a_s_ZAIN;
377	    break;
378	case a_TATWEEL:			/* exceptions */
379	    tempc = cur_c;
380	    break;
381	case a_WAW:			/* exceptions */
382	    tempc = a_s_WAW;
383	    break;
384	case a_ALEF_MAKSURA:		/* exceptions */
385	    tempc = a_s_ALEF_MAKSURA;
386	    break;
387	case a_BEH:
388	    tempc = a_i_BEH;
389	    break;
390	case a_TEH:
391	    tempc = a_i_TEH;
392	    break;
393	case a_THEH:
394	    tempc = a_i_THEH;
395	    break;
396	case a_JEEM:
397	    tempc = a_i_JEEM;
398	    break;
399	case a_HAH:
400	    tempc = a_i_HAH;
401	    break;
402	case a_KHAH:
403	    tempc = a_i_KHAH;
404	    break;
405	case a_SEEN:
406	    tempc = a_i_SEEN;
407	    break;
408	case a_SHEEN:
409	    tempc = a_i_SHEEN;
410	    break;
411	case a_SAD:
412	    tempc = a_i_SAD;
413	    break;
414	case a_DAD:
415	    tempc = a_i_DAD;
416	    break;
417	case a_TAH:
418	    tempc = a_i_TAH;
419	    break;
420	case a_ZAH:
421	    tempc = a_i_ZAH;
422	    break;
423	case a_AIN:
424	    tempc = a_i_AIN;
425	    break;
426	case a_GHAIN:
427	    tempc = a_i_GHAIN;
428	    break;
429	case a_FEH:
430	    tempc = a_i_FEH;
431	    break;
432	case a_QAF:
433	    tempc = a_i_QAF;
434	    break;
435	case a_KAF:
436	    tempc = a_i_KAF;
437	    break;
438	case a_LAM:
439	    tempc = a_i_LAM;
440	    break;
441	case a_MEEM:
442	    tempc = a_i_MEEM;
443	    break;
444	case a_NOON:
445	    tempc = a_i_NOON;
446	    break;
447	case a_HEH:
448	    tempc = a_i_HEH;
449	    break;
450	case a_YEH:
451	    tempc = a_i_YEH;
452	    break;
453	default:
454	    tempc = 0;
455    }
456
457    return tempc;
458}
459
460
461/*
462 * Change shape - from ISO-8859-6/Isolated to Medial
463 */
464    static int
465chg_c_a2m(cur_c)
466    int cur_c;
467{
468    int tempc;
469
470    switch (cur_c)
471    {
472	case a_HAMZA:			/* exception */
473	    tempc = a_s_HAMZA;
474	    break;
475	case a_ALEF_MADDA:		/* exception */
476	    tempc = a_f_ALEF_MADDA;
477	    break;
478	case a_ALEF_HAMZA_ABOVE:	/* exception */
479	    tempc = a_f_ALEF_HAMZA_ABOVE;
480	    break;
481	case a_WAW_HAMZA:		/* exception */
482	    tempc = a_f_WAW_HAMZA;
483	    break;
484	case a_ALEF_HAMZA_BELOW:	/* exception */
485	    tempc = a_f_ALEF_HAMZA_BELOW;
486	    break;
487	case a_YEH_HAMZA:
488	    tempc = a_m_YEH_HAMZA;
489	    break;
490	case a_ALEF:			/* exception */
491	    tempc = a_f_ALEF;
492	    break;
493	case a_BEH:
494	    tempc = a_m_BEH;
495	    break;
496	case a_TEH_MARBUTA:		/* exception */
497	    tempc = a_f_TEH_MARBUTA;
498	    break;
499	case a_TEH:
500	    tempc = a_m_TEH;
501	    break;
502	case a_THEH:
503	    tempc = a_m_THEH;
504	    break;
505	case a_JEEM:
506	    tempc = a_m_JEEM;
507	    break;
508	case a_HAH:
509	    tempc = a_m_HAH;
510	    break;
511	case a_KHAH:
512	    tempc = a_m_KHAH;
513	    break;
514	case a_DAL:			/* exception */
515	    tempc = a_f_DAL;
516	    break;
517	case a_THAL:			/* exception */
518	    tempc = a_f_THAL;
519	    break;
520	case a_REH:			/* exception */
521	    tempc = a_f_REH;
522	    break;
523	case a_ZAIN:			/* exception */
524	    tempc = a_f_ZAIN;
525	    break;
526	case a_SEEN:
527	    tempc = a_m_SEEN;
528	    break;
529	case a_SHEEN:
530	    tempc = a_m_SHEEN;
531	    break;
532	case a_SAD:
533	    tempc = a_m_SAD;
534	    break;
535	case a_DAD:
536	    tempc = a_m_DAD;
537	    break;
538	case a_TAH:
539	    tempc = a_m_TAH;
540	    break;
541	case a_ZAH:
542	    tempc = a_m_ZAH;
543	    break;
544	case a_AIN:
545	    tempc = a_m_AIN;
546	    break;
547	case a_GHAIN:
548	    tempc = a_m_GHAIN;
549	    break;
550	case a_TATWEEL:			/* exception */
551	    tempc = cur_c;
552	    break;
553	case a_FEH:
554	    tempc = a_m_FEH;
555	    break;
556	case a_QAF:
557	    tempc = a_m_QAF;
558	    break;
559	case a_KAF:
560	    tempc = a_m_KAF;
561	    break;
562	case a_LAM:
563	    tempc = a_m_LAM;
564	    break;
565	case a_MEEM:
566	    tempc = a_m_MEEM;
567	    break;
568	case a_NOON:
569	    tempc = a_m_NOON;
570	    break;
571	case a_HEH:
572	    tempc = a_m_HEH;
573	    break;
574	case a_WAW:			/* exception */
575	    tempc = a_f_WAW;
576	    break;
577	case a_ALEF_MAKSURA:		/* exception */
578	    tempc = a_f_ALEF_MAKSURA;
579	    break;
580	case a_YEH:
581	    tempc = a_m_YEH;
582	    break;
583	default:
584	    tempc = 0;
585    }
586
587    return tempc;
588}
589
590
591/*
592 * Change shape - from ISO-8859-6/Isolated to final
593 */
594    static int
595chg_c_a2f(cur_c)
596    int cur_c;
597{
598    int tempc;
599
600    /* NOTE: these encodings need to be accounted for
601
602	a_f_ALEF_MADDA;
603	a_f_ALEF_HAMZA_ABOVE;
604	a_f_ALEF_HAMZA_BELOW;
605	a_f_LAM_ALEF_MADDA_ABOVE;
606	a_f_LAM_ALEF_HAMZA_ABOVE;
607	a_f_LAM_ALEF_HAMZA_BELOW;
608	*/
609
610    switch (cur_c)
611    {
612	case a_HAMZA:			/* exception */
613	    tempc = a_s_HAMZA;
614	    break;
615	case a_ALEF_MADDA:
616	    tempc = a_f_ALEF_MADDA;
617	    break;
618	case a_ALEF_HAMZA_ABOVE:
619	    tempc = a_f_ALEF_HAMZA_ABOVE;
620	    break;
621	case a_WAW_HAMZA:
622	    tempc = a_f_WAW_HAMZA;
623	    break;
624	case a_ALEF_HAMZA_BELOW:
625	    tempc = a_f_ALEF_HAMZA_BELOW;
626	    break;
627	case a_YEH_HAMZA:
628	    tempc = a_f_YEH_HAMZA;
629	    break;
630	case a_ALEF:
631	    tempc = a_f_ALEF;
632	    break;
633	case a_BEH:
634	    tempc = a_f_BEH;
635	    break;
636	case a_TEH_MARBUTA:
637	    tempc = a_f_TEH_MARBUTA;
638	    break;
639	case a_TEH:
640	    tempc = a_f_TEH;
641	    break;
642	case a_THEH:
643	    tempc = a_f_THEH;
644	    break;
645	case a_JEEM:
646	    tempc = a_f_JEEM;
647	    break;
648	case a_HAH:
649	    tempc = a_f_HAH;
650	    break;
651	case a_KHAH:
652	    tempc = a_f_KHAH;
653	    break;
654	case a_DAL:
655	    tempc = a_f_DAL;
656	    break;
657	case a_THAL:
658	    tempc = a_f_THAL;
659	    break;
660	case a_REH:
661	    tempc = a_f_REH;
662	    break;
663	case a_ZAIN:
664	    tempc = a_f_ZAIN;
665	    break;
666	case a_SEEN:
667	    tempc = a_f_SEEN;
668	    break;
669	case a_SHEEN:
670	    tempc = a_f_SHEEN;
671	    break;
672	case a_SAD:
673	    tempc = a_f_SAD;
674	    break;
675	case a_DAD:
676	    tempc = a_f_DAD;
677	    break;
678	case a_TAH:
679	    tempc = a_f_TAH;
680	    break;
681	case a_ZAH:
682	    tempc = a_f_ZAH;
683	    break;
684	case a_AIN:
685	    tempc = a_f_AIN;
686	    break;
687	case a_GHAIN:
688	    tempc = a_f_GHAIN;
689	    break;
690	case a_TATWEEL:			/* exception */
691	    tempc = cur_c;
692	    break;
693	case a_FEH:
694	    tempc = a_f_FEH;
695	    break;
696	case a_QAF:
697	    tempc = a_f_QAF;
698	    break;
699	case a_KAF:
700	    tempc = a_f_KAF;
701	    break;
702	case a_LAM:
703	    tempc = a_f_LAM;
704	    break;
705	case a_MEEM:
706	    tempc = a_f_MEEM;
707	    break;
708	case a_NOON:
709	    tempc = a_f_NOON;
710	    break;
711	case a_HEH:
712	    tempc = a_f_HEH;
713	    break;
714	case a_WAW:
715	    tempc = a_f_WAW;
716	    break;
717	case a_ALEF_MAKSURA:
718	    tempc = a_f_ALEF_MAKSURA;
719	    break;
720	case a_YEH:
721	    tempc = a_f_YEH;
722	    break;
723	default:
724	    tempc = 0;
725    }
726
727    return tempc;
728}
729
730
731/*
732 * Change shape - from Initial to Medial
733 */
734    static int
735chg_c_i2m(cur_c)
736    int cur_c;
737{
738    int tempc;
739
740    switch (cur_c)
741    {
742	case a_i_YEH_HAMZA:
743	    tempc = a_m_YEH_HAMZA;
744	    break;
745	case a_i_BEH:
746	    tempc = a_m_BEH;
747	    break;
748	case a_i_TEH:
749	    tempc = a_m_TEH;
750	    break;
751	case a_i_THEH:
752	    tempc = a_m_THEH;
753	    break;
754	case a_i_JEEM:
755	    tempc = a_m_JEEM;
756	    break;
757	case a_i_HAH:
758	    tempc = a_m_HAH;
759	    break;
760	case a_i_KHAH:
761	    tempc = a_m_KHAH;
762	    break;
763	case a_i_SEEN:
764	    tempc = a_m_SEEN;
765	    break;
766	case a_i_SHEEN:
767	    tempc = a_m_SHEEN;
768	    break;
769	case a_i_SAD:
770	    tempc = a_m_SAD;
771	    break;
772	case a_i_DAD:
773	    tempc = a_m_DAD;
774	    break;
775	case a_i_TAH:
776	    tempc = a_m_TAH;
777	    break;
778	case a_i_ZAH:
779	    tempc = a_m_ZAH;
780	    break;
781	case a_i_AIN:
782	    tempc = a_m_AIN;
783	    break;
784	case a_i_GHAIN:
785	    tempc = a_m_GHAIN;
786	    break;
787	case a_i_FEH:
788	    tempc = a_m_FEH;
789	    break;
790	case a_i_QAF:
791	    tempc = a_m_QAF;
792	    break;
793	case a_i_KAF:
794	    tempc = a_m_KAF;
795	    break;
796	case a_i_LAM:
797	    tempc = a_m_LAM;
798	    break;
799	case a_i_MEEM:
800	    tempc = a_m_MEEM;
801	    break;
802	case a_i_NOON:
803	    tempc = a_m_NOON;
804	    break;
805	case a_i_HEH:
806	    tempc = a_m_HEH;
807	    break;
808	case a_i_YEH:
809	    tempc = a_m_YEH;
810	    break;
811	default:
812	    tempc = 0;
813    }
814
815    return tempc;
816}
817
818
819/*
820 * Change shape - from Final to Medial
821 */
822    static int
823chg_c_f2m(cur_c)
824    int cur_c;
825{
826    int tempc;
827
828    switch (cur_c)
829    {
830	/* NOTE: these encodings are multi-positional, no ?
831	   case a_f_ALEF_MADDA:
832	   case a_f_ALEF_HAMZA_ABOVE:
833	   case a_f_ALEF_HAMZA_BELOW:
834	   */
835	case a_f_YEH_HAMZA:
836	    tempc = a_m_YEH_HAMZA;
837	    break;
838	case a_f_WAW_HAMZA:		/* exceptions */
839	case a_f_ALEF:
840	case a_f_TEH_MARBUTA:
841	case a_f_DAL:
842	case a_f_THAL:
843	case a_f_REH:
844	case a_f_ZAIN:
845	case a_f_WAW:
846	case a_f_ALEF_MAKSURA:
847	    tempc = cur_c;
848	    break;
849	case a_f_BEH:
850	    tempc = a_m_BEH;
851	    break;
852	case a_f_TEH:
853	    tempc = a_m_TEH;
854	    break;
855	case a_f_THEH:
856	    tempc = a_m_THEH;
857	    break;
858	case a_f_JEEM:
859	    tempc = a_m_JEEM;
860	    break;
861	case a_f_HAH:
862	    tempc = a_m_HAH;
863	    break;
864	case a_f_KHAH:
865	    tempc = a_m_KHAH;
866	    break;
867	case a_f_SEEN:
868	    tempc = a_m_SEEN;
869	    break;
870	case a_f_SHEEN:
871	    tempc = a_m_SHEEN;
872	    break;
873	case a_f_SAD:
874	    tempc = a_m_SAD;
875	    break;
876	case a_f_DAD:
877	    tempc = a_m_DAD;
878	    break;
879	case a_f_TAH:
880	    tempc = a_m_TAH;
881	    break;
882	case a_f_ZAH:
883	    tempc = a_m_ZAH;
884	    break;
885	case a_f_AIN:
886	    tempc = a_m_AIN;
887	    break;
888	case a_f_GHAIN:
889	    tempc = a_m_GHAIN;
890	    break;
891	case a_f_FEH:
892	    tempc = a_m_FEH;
893	    break;
894	case a_f_QAF:
895	    tempc = a_m_QAF;
896	    break;
897	case a_f_KAF:
898	    tempc = a_m_KAF;
899	    break;
900	case a_f_LAM:
901	    tempc = a_m_LAM;
902	    break;
903	case a_f_MEEM:
904	    tempc = a_m_MEEM;
905	    break;
906	case a_f_NOON:
907	    tempc = a_m_NOON;
908	    break;
909	case a_f_HEH:
910	    tempc = a_m_HEH;
911	    break;
912	case a_f_YEH:
913	    tempc = a_m_YEH;
914	    break;
915	    /* NOTE: these encodings are multi-positional, no ?
916		case a_f_LAM_ALEF_MADDA_ABOVE:
917		case a_f_LAM_ALEF_HAMZA_ABOVE:
918		case a_f_LAM_ALEF_HAMZA_BELOW:
919		case a_f_LAM_ALEF:
920		*/
921	default:
922	    tempc = 0;
923    }
924
925    return tempc;
926}
927
928
929/*
930 * Change shape - from Combination (2 char) to an Isolated
931 */
932    static int
933chg_c_laa2i(hid_c)
934    int hid_c;
935{
936    int tempc;
937
938    switch (hid_c)
939    {
940	case a_ALEF_MADDA:
941	    tempc = a_s_LAM_ALEF_MADDA_ABOVE;
942	    break;
943	case a_ALEF_HAMZA_ABOVE:
944	    tempc = a_s_LAM_ALEF_HAMZA_ABOVE;
945	    break;
946	case a_ALEF_HAMZA_BELOW:
947	    tempc = a_s_LAM_ALEF_HAMZA_BELOW;
948	    break;
949	case a_ALEF:
950	    tempc = a_s_LAM_ALEF;
951	    break;
952	default:
953	    tempc = 0;
954    }
955
956    return tempc;
957}
958
959
960/*
961 * Change shape - from Combination-Isolated to Final
962 */
963    static int
964chg_c_laa2f(hid_c)
965    int hid_c;
966{
967    int tempc;
968
969    switch (hid_c)
970    {
971	case a_ALEF_MADDA:
972	    tempc = a_f_LAM_ALEF_MADDA_ABOVE;
973	    break;
974	case a_ALEF_HAMZA_ABOVE:
975	    tempc = a_f_LAM_ALEF_HAMZA_ABOVE;
976	    break;
977	case a_ALEF_HAMZA_BELOW:
978	    tempc = a_f_LAM_ALEF_HAMZA_BELOW;
979	    break;
980	case a_ALEF:
981	    tempc = a_f_LAM_ALEF;
982	    break;
983	default:
984	    tempc = 0;
985    }
986
987    return tempc;
988}
989
990/*
991 * Do "half-shaping" on character "c".  Return zero if no shaping.
992 */
993    static int
994half_shape(c)
995    int		c;
996{
997    if (A_is_a(c))
998	return chg_c_a2i(c);
999    if (A_is_valid(c) && A_is_f(c))
1000	return chg_c_f2m(c);
1001    return 0;
1002}
1003
1004/*
1005 * Do Arabic shaping on character "c".  Returns the shaped character.
1006 * out:    "ccp" points to the first byte of the character to be shaped.
1007 * in/out: "c1p" points to the first composing char for "c".
1008 * in:     "prev_c"  is the previous character (not shaped)
1009 * in:     "prev_c1" is the first composing char for the previous char
1010 *		     (not shaped)
1011 * in:     "next_c"  is the next character (not shaped).
1012 */
1013    int
1014arabic_shape(c, ccp, c1p, prev_c, prev_c1, next_c)
1015    int		c;
1016    int		*ccp;
1017    int		*c1p;
1018    int		prev_c;
1019    int		prev_c1;
1020    int		next_c;
1021{
1022    int		curr_c;
1023    int		shape_c;
1024    int		curr_laa;
1025    int		prev_laa;
1026
1027    /* Deal only with Arabic character, pass back all others */
1028    if (!A_is_ok(c))
1029	return c;
1030
1031    /* half-shape current and previous character */
1032    shape_c = half_shape(prev_c);
1033
1034    /* Save away current character */
1035    curr_c = c;
1036
1037    curr_laa = A_firstc_laa(c, *c1p);
1038    prev_laa = A_firstc_laa(prev_c, prev_c1);
1039
1040    if (curr_laa)
1041    {
1042	if (A_is_valid(prev_c) && !A_is_f(shape_c)
1043					 && !A_is_s(shape_c) && !prev_laa)
1044	    curr_c = chg_c_laa2f(curr_laa);
1045	else
1046	    curr_c = chg_c_laa2i(curr_laa);
1047
1048	/* Remove the composing character */
1049	*c1p = 0;
1050    }
1051    else if (!A_is_valid(prev_c) && A_is_valid(next_c))
1052	curr_c = chg_c_a2i(c);
1053    else if (!shape_c || A_is_f(shape_c) || A_is_s(shape_c) || prev_laa)
1054	curr_c = A_is_valid(next_c) ? chg_c_a2i(c) : chg_c_a2s(c);
1055    else if (A_is_valid(next_c))
1056	curr_c = A_is_iso(c) ? chg_c_a2m(c) : chg_c_i2m(c);
1057    else if (A_is_valid(prev_c))
1058	curr_c = chg_c_a2f(c);
1059    else
1060	curr_c = chg_c_a2s(c);
1061
1062    /* Sanity check -- curr_c should, in the future, never be 0.
1063     * We should, in the future, insert a fatal error here. */
1064    if (curr_c == NUL)
1065	curr_c = c;
1066
1067    if (curr_c != c && ccp != NULL)
1068    {
1069	char_u buf[MB_MAXBYTES];
1070
1071	/* Update the first byte of the character. */
1072	(*mb_char2bytes)(curr_c, buf);
1073	*ccp = buf[0];
1074    }
1075
1076    /* Return the shaped character */
1077    return curr_c;
1078}
1079
1080
1081/*
1082 * A_firstc_laa returns first character of LAA combination if it exists
1083 */
1084    static int
1085A_firstc_laa(c, c1)
1086    int c;	/* base character */
1087    int c1;	/* first composing character */
1088{
1089    if (c1 != NUL && c == a_LAM && !A_is_harakat(c1))
1090	return c1;
1091    return 0;
1092}
1093
1094
1095/*
1096 * A_is_harakat returns TRUE if 'c' is an Arabic Harakat character
1097 *		(harakat/tanween)
1098 */
1099    static int
1100A_is_harakat(c)
1101    int c;
1102{
1103    return (c >= a_FATHATAN && c <= a_SUKUN);
1104}
1105
1106
1107/*
1108 * A_is_iso returns TRUE if 'c' is an Arabic ISO-8859-6 character
1109 *		(alphabet/number/punctuation)
1110 */
1111    static int
1112A_is_iso(c)
1113    int c;
1114{
1115    return ((c >= a_HAMZA && c <= a_GHAIN)
1116	    || (c >= a_TATWEEL && c <= a_HAMZA_BELOW)
1117	    || c == a_MINI_ALEF);
1118}
1119
1120
1121/*
1122 * A_is_formb returns TRUE if 'c' is an Arabic 10646-1 FormB character
1123 *		(alphabet/number/punctuation)
1124 */
1125    static int
1126A_is_formb(c)
1127    int c;
1128{
1129    return ((c >= a_s_FATHATAN && c <= a_s_DAMMATAN)
1130	    || c == a_s_KASRATAN
1131	    || (c >= a_s_FATHA && c <= a_f_LAM_ALEF)
1132	    || c == a_BYTE_ORDER_MARK);
1133}
1134
1135
1136/*
1137 * A_is_ok returns TRUE if 'c' is an Arabic 10646 (8859-6 or Form-B)
1138 */
1139    static int
1140A_is_ok(c)
1141    int c;
1142{
1143    return (A_is_iso(c) || A_is_formb(c));
1144}
1145
1146
1147/*
1148 * A_is_valid returns TRUE if 'c' is an Arabic 10646 (8859-6 or Form-B)
1149 *		with some exceptions/exclusions
1150 */
1151    static int
1152A_is_valid(c)
1153    int c;
1154{
1155    return (A_is_ok(c) && !A_is_special(c));
1156}
1157
1158
1159/*
1160 * A_is_special returns TRUE if 'c' is not a special Arabic character.
1161 *		Specials don't adhere to most of the rules.
1162 */
1163    static int
1164A_is_special(c)
1165    int c;
1166{
1167    return (c == a_HAMZA || c == a_s_HAMZA);
1168}
1169