Current File : //kunden/usr/share/texlive/texmf-dist/fonts/misc/xetex/fontmapping/arabxetex/arabtex-voc.map
LHSName  "ArabTeX-vocalized"
RHSName  "UNICODE"

Define a U+0061  ;a
Define b U+0062  ;b
Define c U+0063  ;c
Define d U+0064  ;d
Define e U+0065  ;e
Define f U+0066  ;f
Define g U+0067  ;g
Define h U+0068  ;h
Define i U+0069  ;i
Define j U+006A  ;j
Define k U+006B  ;k
Define l U+006C  ;l
Define m U+006D  ;m
Define n U+006E  ;n
Define o U+006F  ;o
Define p U+0070  ;p
Define q U+0071  ;q
Define r U+0072  ;r
Define s U+0073  ;s
Define t U+0074  ;t
Define u U+0075  ;u
Define v U+0076  ;v
Define w U+0077  ;w
Define x U+0078  ;x
Define y U+0079  ;y
Define z U+007A  ;z
Define A U+0041  ;A
Define B U+0042  ;B
Define E U+0045  ;E
Define G U+0047  ;G
Define I U+0049  ;I
Define N U+004E  ;N
Define O U+004F  ;O
Define T U+0054  ;T
Define U U+0055  ;U
Define W U+0057  ;W
Define Y U+0059  ;Y
Define C U+005E  ;^  << CARET
Define UL U+005F ;_  << UNDERLINE 
Define P U+002E  ;.  << DOT
Define V U+002C  ;,  << COMMA (_V_irgule)
Define CO U+003A ;:  << COLON
Define SC U+003B ;; << SEMICOLON
Define Ain U+0060 ;` 
Define H U+0027  ;'  << HAMZA 
Define Q U+0022  ;"  << QUOTE
Define Z U+002D  ;-  << HYPHEN
Define BAR U+007C ;|
Define AN  U+0061 U+004E ; aN
Define LLAH U+006C U+006C U+005F U+0061 U+0068 ; ll_ah
;Define NUL U+00A6;
Define Ha  U+0623
Define Hi  U+0625
Define Hy  U+0626
Define Hw  U+0624
Define Hh  U+06C0 ;; = U+0647 U+0654  
Define HA  U+0622
Define HB  U+0640 U+0654
Define HBAR U+0621 ; '| 

pass(Unicode)

U+E000 > ; eliminate special code for capitalization in transliterations

pass(Unicode)

Class [VWL]         = (U+0061  U+0065  U+0069  U+006F  U+0075 ) ;(a e i o u) 
Class [AVWL]    = (U+064B..U+0650);
Class [LTR]	    = (U+0061..U+007A U+0041 U+0045 U+0047 U+0049 U+004F U+0055 U+0042 U+004E U+0054 U+0059 U+005E U+002E U+005F U+0022 U+0027 U+0060) ;a..z A E I O U B N T Y ^ . _ " "" `
Class [BRACKETS]    = (U+003C U+003E U+00AB U+00BB U+0028 U+0029 U+005B U+005D U+2018 U+2019 U+201C U+201D); < > () « » [ ] ‘ ’ “ ”
Class [PUNCT] = (V SC P CO)
Class [Iy]	    = (U+0049 U+0079) ;(I y)
Class [Uw]          = (U+0055 U+0077) ;(U w)
Class [Uwu]          = (U+0055 U+0077 U+0075) ;(U w u)
;Class [ae]	    = (U+0061 U+0065) ;(a e)
;Class [ou]          = (U+0075 U+006F) ;(u o)
Class [Iyi]	    = (U+0069 U+0079 U+0049) 
Class [UI]          = (U+0049 U+0055) ; U I
Class [Digits] = (U+0030..U+0039)

Define BEG  (#|Z|[BRACKETS])
Define END  (#|[BRACKETS]|[PUNCT] U+0020|[PUNCT] #)
Define ENDZ (#|[BRACKETS]|[PUNCT] U+0020|[PUNCT] #|Z)

; special ligatures
;; 1. Standard ArabTeX input
;; FDF2 is just used as temporary holder, and is converted back to ligature lam lam hah later (to be taken care of by the font)
;; The BAR is used in the DMG transliteration to suppress the initial A
(a|A) BAR? l Z LLAH > U+0627 U+FDF2 ;; 'al-ll_ah' or 'Al-ll_ah'
LLAH > U+FDF2  ;; 'll_ah' -> 'l l h'
;; UPPERCASE mnemonics for various ligatures or special glyphs
U+004C U+004C U+0048 > U+FDF2  ;; 'LLH' -> 'l l h'
U+0046 U+0041 U+004C U+0049 U+004C U+004C U+0041 U+0048 > U+0641 U+064E U+0644 U+0650 U+0644 U+0647 ;; 'FALILLAH'
U+0041 U+004C U+004C U+0041 U+0048 > U+0627 U+0644 U+0644 U+0647 ; 'ALLAH'
U+004C U+004C U+0041 U+0048 > U+0644 U+0644 U+0647 ; 'LLAH'

U+0053 U+004C U+004D > U+FDFA ; 'SLM' : .sallY al-ll_ah `alayhi wa-sallam
U+0028 U+0028 > U+FD3F ;; '((' : ornate right par
U+0029 U+0029 > U+FD3E ;; '))' : ornate left par
U+0052 U+0049 U+0059 U+0041 U+004C > U+FDFC ;; 'RIYAL' : Saudi currency sign
;; most fonts won't have the following ones:
U+0053 U+0041 U+004C U+004C U+0041 U+0053 U+0054 U+004F U+0050 > U+FDF0 ;; 'SALLASTOP' (.sallY with "ye-barree", Quranic stop sign)
U+0051 U+0041 U+004C U+0041 > U+FDF1 ;; 'QALA' : qalY with ye-barree, Quranic stop sign
U+0041 U+004B U+0042 U+0041 U+0052 > U+FDF3 ;; 'AKBAR'
U+004D U+0055 U+0048 U+0041 U+004D U+004D U+0041 U+0044 > U+FDF4 ;; 'MUHAMMAD'
U+0053 U+0041 U+004C U+0041 U+004D > U+FDF5 ;; 'SALAM'
U+0052 U+0041 U+0053 U+004F ? U+0055 U+004C > U+FDF6 ;; 'RASUL' (also unicode 'RASOUL')
U+0041 U+004C U+0041 U+0059 U+0048 (U+0049|U+0045) > U+FDF7 ;; 'ALAYHI' (also unicode 'ALAYHE')
U+0057 U+0041 U+0053 U+0041 U+004C U+004C U+0041 U+004D > U+FDF8 ;; 'WASALLAM' 
U+0053 U+0041 U+004C U+004C U+0041 > U+FDF9 ;; 'SALLA'
U+004A U+0041 U+004C U+004C U+0041 > U+FDFB ;; 'JALLA' : "jalla jalAluhu"
U+0042 U+0041 U+0053 U+004D U+0041 U+004C U+0041 > U+FDFD ;; 'BASMALA' : very few fonts have this ligature afaik

U+005E U+0053 U+004C U+0059 > U+06D6 ;; ^SLY
U+005E U+0051 U+004C U+0059 > U+06D7 ;; ^QLY
U+005E U+004D U+0049 U+004D > U+06D8 ;; ^MIM-
U+005E U+004C U+0041 > U+06D9 ;; ^LA
U+005E U+004A U+0049 U+004D > U+06DA ;; ^JIM
U+005E U+0044 U+004F U+0054 U+0053 > U+06DB ;; ^DOTS
U+005E U+0053 U+0049 U+004E > U+06DC ;; ^SIN
U+0048 U+0049 U+005A U+0042 > U+06DE ;; HIZB
U+0043 U+0049 U+0052 U+0043 U+005A U+0045 U+0052 U+004F > U+06DF ;; CIRCZERO
U+0052 U+0045 U+0043 U+0054 U+005A U+0045 U+0052 U+004F > U+06E0 ;; RECTZERO
U+005E U+004A U+0041 U+005A U+004D > U+06E1 ;; ^JAZM
U+005E U+004D U+0049 U+004D > U+06E2 ;; ^MIM
U+005F U+0053 U+0049 U+004E > U+06E3 ;; _SIN
U+005E U+004D U+0041 U+0044 U+0044 U+0041 > U+06E4 ;; ^MADDA
U+0057 U+0041 U+0057 > U+06E5 ;; WAW
U+0059 U+0045 U+0048 > U+06E6 ;; YEH
U+005E U+0059 U+0045 U+0048 > U+06E7 ;; ^YEH
U+005E U+004E U+0055 U+004E > U+06E8 ;; ^NUN
U+0053 U+0041 U+004A U+0044 U+0041 > U+06E9 ;; SAJDA
U+005F U+0053 U+0054 U+004F U+0050 > U+06EA ;; _STOP
U+005E U+0053 U+0054 U+004F U+0050 > U+06EB ;; ^STOP
U+005E U+0052 U+0053 U+0054 U+004F U+0050 > U+06EC ;; ^RSTOP
U+005F U+004D U+0049 U+004D > U+06ED  ;; _MIM

U+0044 U+004F U+0054 U+0053 > U+061E ;; DOTS  (also possible by typing "::")

;; This is to enclose digits within glyph U+06DD
U+005B U+005B ([Digits]+)=dig U+005D U+005D > U+06DD @dig ; [[digits]]


; vowels
u u > U 
i i > I 
a a > A
CO O > U+06FC ;
a e > a y
a o > a w
CO U > U+06C7 ;
e   > i ;; but not pashto > U+0659 and kashmiri > y + U+0658     (U+06CE is kurdish)
o   > u 
E > I 
O  > U 



;; initial characters ...
a l Z / BEG _ > U+0627 l Z
H l Z / BEG _ > U+0627 l Z
;; wasla on initial alif : NEW! NOT TESTED!
H / BEG _ l l > U+0671 ; e.g. alla_dI -> 'lla_dI 
[VWL]=v1 / BEG _ > U+0627 @v1
A / BEG _ > U+0627 a
Q?=q1 U / BEG _ > U+0627 @q1 u w ; -U
; Q?=q1 I / BEG _ > U+0627 @q1 i y ; -I
Q?=q1 I / BEG _ > U+0627 @q1 i y


;; Words with anomalous orthography:

; mi'aT -> ماية  
;;; i H / m Q ? _ Q ? a ? (T|t) > i A Hy 
m Q i H Q a / _ (T|t)   > m U+0627 Hy 
m Q i H a   / _ (T|t)   > m U+0627 Hy a
m i H Q a   / _ (T|t)   > m i U+0627 Hy 
m i H a     / _ (T|t)   > m i U+0627 Hy a


;; contextual analysis of hamzas
;; THESE DO NOT APPLY FOR SINDHI

; initial hamzas 
H  / (#|^[LTR]) _ Q? [Iyi] >   Hi 
H Q A / (#|^[LTR]) _       >   HA ; alif madda
H  / (#|^[LTR]) _ Q? [Uwu] >   Ha 
H  / (#|^[LTR]) _ Q? a     >   Ha  
Q H / (#|^[LTR]) _         >   HB ; quoted isolated hamza at beginning of word

; quoted hamzas in contextual mode, equivalent to verbatim mode
a Q H >   Ha  
i Q H >   Hi 
y Q H >   Hy 
w Q H >   Hw 
h Q H >   Hh 
A Q H >   HA 
B Q H >   HB 
BAR Q H > HBAR 

; hamza + fathatan
H / a _ Q? AN  > Ha
;;H / [Iyi] _ Q? AN  > Hy 
H / _ Q? AN > Hy

;final hamzas
H / A _ Q? [VWL]? N? (#|^[LTR]) > HBAR
H / [Uw] _ Q? [VWL]? N? (#|^[LTR]) > HBAR
H / [Iy] _ Q? (i N?|u N?)? (#|^[LTR]) > HBAR ; not AN ...
H / a _ Q? (#|^[LTR]) > Ha ; not AN... 
H / a _ Q? (u|i) N? (#|^[LTR]) > Ha ; not AN... 
H / i _ Q? [VWL]? N? (#|^[LTR]) > Hy
H / u _ Q? [VWL]? N? (#|^[LTR]) > Hw

;;and more hamzas ...

; n°1: kasra/ya
H / [LTR] _ Q? [Iyi] ^N > Hy
H / [Iyi] _ > Hy

; n°2: damma/waw 
H / [LTR] _ Q? (U|u) ^N > Hw
H / (U|u) _ Q? ^[Iyi] > Hw
H / U _ > HBAR

; n°3: fatha/alif
H Q? A > HA
H / A _ Q? a > HBAR
H / ^[UI] _ Q? a > Ha
H / a _ Q? ^[UI] > Ha


pass(Unicode)

Class [Digits] = (U+0030..U+0039)
Class [ArDigits] = (U+0660..U+0669)
Class [ArEasternDigits] = (U+06F0..U+06F9)
Class [VWL]         = (U+0061  U+0065  U+0069  U+006F  U+0075 ) ;(a e i o u) 
Class [LTR]	    = (U+0061..U+007A U+0041 U+0045 U+0049 U+004F U+0055 U+0042 U+004E U+0054 U+0059 U+005E U+002E U+005F U+0022 U+0027 U+0060) ;(a..z A E I O U B N T Y ^ . _ " "" `)
Class [Iy]	    = (U+0049 U+0079) ;(I y)
Class [Uw]          = (U+0055 U+0077) ;(U w)




;normal code


(P z|U+1E93) (P z|U+1E93)  > U+0638 U+0651  ;;SHADDA
(P z|U+1E93)  > U+0638 
C z C z	  > U+0698 U+0651  ;;SHADDA
C z	  > U+0698 
V z V z   > U+0696 U+0651 ;;SHADDA
V z       > U+0696 ;pashto
z z	  > U+0632 U+0651  ;;SHADDA
z	  > U+0632 
Q Y Y  > U+0649 U+0651  ;;SHADDA
Q Y	  > U+0649 
Y Y	  > U+064E U+0649 U+0651  ;;SHADDA
Y	  > U+064E U+0649 
P I P I / _ # > U+0650 U+06CC U+0651 ;; SHADDA
P I / _ # > U+0650 U+06CC ;; workaround to have ya without dots in Arabic
y y  > U+064A U+0651  ;;SHADDA
y	  > U+064A 
v v  > U+06A4 U+0651  ;;SHADDA
v	  > U+06A4 
W 	  > U+0648 U+0627 ; waw + alif 
C U       > U+064F U+0648 U+0653; historical spelling : madda on waw
C w C w > U+06C9 U+0651 ;;SHADDA
C w > U+06C9
CO w CO w > U+06CA U+0651 ;,SHADDA
CO w > U+06CA
U A / _ # > U+064F U+0648 U+0627 
Q U [Uw]  > U+0648 U+0651  ;;SHADDA
Q U       > U+0648 
U [Uw]    > U+064F U+0648 U+0651  ;;SHADDA
U         > U+064F U+0648 
w w	  > U+0648 U+0651  ;;SHADDA
w	  > U+0648 
Q u N?    > 
u N	  > U+064C 
u	  > U+064F 
V t V t   > U+0679 U+0651 ;;SHADDA
V t 	  > U+0679 ;urdu
UL (t|s) UL (t|s) > U+062B U+0651  ;;SHADDA
UL (t|s)  > U+062B 
(P t|U+1E6D) (P t|U+1E6D)  > U+0637 U+0651  ;;SHADDA
(P t|U+1E6D)	  > U+0637 
t t	  > U+062A U+0651  ;;SHADDA
t	  > U+062A 
T T Q AN  > U+0629 U+0651   ;;SHADDA
T Q AN    > U+0629   
T AN      > U+0629 U+064B
T T	  > U+0629 U+0651  ;;SHADDA
T	  > U+0629 
C s C s   > U+0634 U+0651  ;;SHADDA
C s	  > U+0634 
(P s|U+1E63) (P s|U+1E63)  > U+0635 U+0651  ;;SHADDA
(P s|U+1E63)  > U+0635 
V s V s   > U+069A U+0651 ;;SHADDA
V s       > U+069A ;Pashto
CO s CO s > U+069B U+0651 ;;SHADDA
CO s      > U+069B ; sin with 3 dots below
s s	  > U+0633 U+0651  ;;SHADDA
s	  > U+0633 
C r C r	  > U+06EF U+0651 ;;SHADDA ; missing in ArabTeX  <<<
C r	  > U+06EF 
CO r CO r > U+0697 U+0651 ;;SHADDA ; missing in ArabTeX <<<
CO r      > U+0697 ; Dargwa language, Dagestan
P r P r	  > U+0694 U+0651 ;;SHADDA
P r	  > U+0694 ; r with dot below; in Kurdish .r -> 0694 
V r V r  > U+0691 U+0651 ;;SHADDA
V r 	  > U+0691 ;Urdu 
r r	  > U+0631 U+0651  ;;SHADDA
r	  > U+0631 
P q P q	  > U+066F U+0651  ;;SHADDA
P q	  > U+066F 
q q	  > U+0642 U+0651  ;;SHADDA
q	  > U+0642 
p p	  > U+067E U+0651  ;;SHADDA
p	  > U+067E 
CO n CO n > U+06B1 U+0651 ;;SHADDA
CO n > U+06B1
C n C n  > U+06AD U+0651  ;;SHADDA
C n      > U+06AD 
P n P n   > U+06BA U+0651  ;;SHADDA
P n 	  > U+06BA 	   ; urdu
V n V n   > U+06BC U+0651 ;;SHADDA 
V n       > U+06BC ; Pashto
n n	  > U+0646 U+0651  ;;SHADDA
n	  > U+0646 
m m	  > U+0645 U+0651  ;;SHADDA
m	  > U+0645 
P l P l	  > U+06B6 U+0651 ; 
P l 	  > U+06B6 ; ADDED <<< 
C l C l	  > U+06B5 U+0651 ;;SHADDA
C l	  > U+06B5 ;kurdish 
l l	  > U+0644 U+0651  ;;SHADDA
l	  > U+0644 
P k P k > U+06A9 U+0651 ;;SHADDA 
P k > U+06A9 ; pashto urdu 
UL k UL k > U+063A U+0651 ;;SHADDA
UL k  > U+063A
k k	  > U+0643 U+0651  ;;SHADDA
k	  > U+0643 
C g C g   > U+062C U+0651  ;;SHADDA
C g	  > U+062C 
CO j CO j > U+0684 U+0651 ;;SHADDA
CO j > U+0684
j j	  > U+062C U+0651  ;;SHADDA
j	  > U+062C 
Q I       > U+064A
I         > U+0650 U+064A
C I       > U+0650 U+064A U+0653 ; historical spelling : madda on ya
UL I 	  >  ; suggestion of O. Smrz (arabtex-plus project) URL = ###
Q i N?    >  ;
i N       > U+064D 
i         > U+0650 
x x	  > U+062E U+0651  ;;SHADDA
x	  > U+062E 
UL h UL h > U+062E U+0651  ;;SHADDA
UL h	  > U+062E 
(P h|U+1E25) (P h|U+1E25)  > U+062D U+0651  ;;SHADDA
(P h|U+1E25) > U+062D 
V h V h   > U+06C1 U+0651 ;;SHADDA
V h       > U+06C1
h h  > U+0647 U+0651  ;;SHADDA
h	  > U+0647 
CO g CO g > U+06B3 U+0651 ;;SHADDA
CO g > U+06B3
V g V g > U+06AC U+0651 ;;SHADDA
V g > U+06AC ; kaf with dot above
P g P g	  > U+063A U+0651  ;;SHADDA
P g	  > U+063A 
g g	  > U+06AF U+0651  ;;SHADDA
g	  > U+06AF 
G G       > U+06AB U+0651  ;;SHADDA
G         > U+06AB ;pashto
P f P f	  > U+06A1 U+0651  ;;SHADDA
P f	  > U+06A1 
f f	  > U+0641 U+0651  ;;SHADDA
f	  > U+0641 
V d V d > U+068A U+0651 ;;SHADDA
V d > U+068A
P V d P V d > U+068B U+0651 ;;SHADDA
P V d > U+068B ;; (like U+0688 with dot below, for Lahnda = Western Punjabi) 
C d C d > U+06EE U+0651 ;;SHADDA
C d > U+06EE ; not defined in ArabTeX
CO d CO d > U+068F U+0651 ;;SHADDA
CO d > U+068F
UL (d|z) UL (d|z) > U+0630 U+0651  ;;SHADDA
UL (d|z)  > U+0630 
(P d|U+1E0D) (P d|U+1E0D)  > U+0636 U+0651  ;;SHADDA
(P d|U+1E0D)  > U+0636 
d d	  > U+062F U+0651  ;;SHADDA
d	  > U+062F 
V c V c   > U+0685 U+0651 ;;SHADDA
V c	  > U+0685 ;pashto 
C c C c	  > U+0686 U+0651  ;;SHADDA
C c	  > U+0686
P C c P C c > U+06BF U+0651 ;;SHADDA
P C c > U+06BF ;; cheh with dot above <<<
c c  > U+0681 U+0651 ;;SHADDA
c	  > U+0681 ; ح with hamza above
CO c CO c > U+0682 U+0651 ;;SHADDA
CO c > U+0682 ; old pashto 
CO b CO b > U+067B U+0651 ;;SHADDA
CO b > U+067B
P b P b   > U+066E U+0651  ;;SHADDA
P b       > U+066E 
b b	  > U+0628 U+0651  ;;SHADDA
b	  > U+0628 
H A       > U+0622 
C A       > U+064E U+0622 ; historical spelling
UL A      > U+064E U+0649 
UL a	  > U+0670
UL u   > U+0657 ; inverted damma
UL i   > U+0656 ; subscript alif
Q AN / (A H|Ha) _ > ; 
AN / (A H|Ha) _ > U+064B
Q AN   > U+0627 
AN 	  > U+064B U+0627
Q a	  > 
a 	  > U+064E 
Q A	  > U+0627 
A	  > U+064E U+0627 
Ain Ain	  > U+0639 U+0651  ;;SHADDA
Ain	  > U+0639 
H H	  > U+0621 U+0651 ;;SHADDA
H	  > U+0621 
U+002C / [Digits] _ [Digits] > U+002C  ; comma in numerical context not an Arabic comma
Z Z / [Digits] _ [Digits] > U+2013  ; two hyphens in numerical context -> endash
(Z|U+005D|U+005B|U+0028|U+0029)=xx / [Digits] _ [Digits] > U+202D @xx U+202C ; hyphen or brackets in numerical context: surrounded by LRO & PDF marks
[Digits]  > [ArDigits]
Z Z Z / # _ #    > U+2014 
Z Z / # _ #    > U+2013 
Z Z       > U+0640
B         > U+0640
Z / # _   > U+200D  ; -x  > force initial form with "zero-width joiner"
Z / _ #   > U+200D  ;  x- > force final form    
U+003C U+003C  > U+00BB ;<<
U+003E U+003E  > U+00AB ;>>
U+00AB    > U+00BB 
U+00BB    > U+00AB
UL        > U+0640  ;_
U+003F    > U+061F  ;?
U+003B    > U+061B  ;;
U+003A U+003A > U+061E ;; '::' ligature 'DOTS' also defined above...
U+002C    > U+060C  ;,
U+005D / [LTR] _ [LTR]   > U+200D U+005D U+200D ; keep shaping when inserting ...[...]... ... 
U+005B / [LTR] _ [LTR]   > U+200D U+005B U+200D ; (but of course this screws up contextual 
                                                ; analysis, so quoting of hamzas etc may be necessary
;U+005D    > U+005B  ;]  these are automatically mirrored, following Unicode rules
;U+005B    > U+005D  ;[
Q [VWL]   >   ;stripoff quoted vowels
Q 	  > U+0652
U+00B0	  > U+0652 ; °
N         >   ;
C         >   ;^
BAR       >   ;| 

pass(Unicode)
 
Class [CONS] = (U+0621 U+0623 U+0624 U+0626 U+0628 U+062A..U+063A U+0641..U+0647 U+066E U+066F U+0672 U+0675 U+0676 U+0678..U+06BF U+06EE U+06EF U+06FA..U+06FF U+0750..U+076D) ; to be improved: not sure that all glyphs listed (esp for Urdu,Sindhi,etc are really "consonants" in the sense that they can carry a sukun, not to mention that some of these languages may not have a sukun at all ;-)
Class [VWLX] = (U+0627 U+0648..U+065E U+0670) ; long vowels, vowel signs, shadda, sukun etc.
Class [VWL]  = (U+0627 U+0648..U+0650 U+0670);
U+0627 [VWL]?  / [VWL] Z _ > U+0627 ; e.g. fa-isti_hraj -> initial alif has no vowel 
U+FDF2 > U+0644 U+0644 U+0647 ;
Z > ;