Current File : //usr/share/texlive/texmf-dist/fonts/misc/xetex/fontmapping/arabxetex/arabtex-malay-fullvoc.map |
LHSName "ArabTeX-malay-fullvocalized"
RHSName "UNICODE"
Define a U+0061 ;a
Define b U+0062 ;b
Define c U+0063 ;c
Define d U+0064 ;d
Define e U+0065 ;e
Define f U+0066 ;f
Define g U+0067 ;g
Define h U+0068 ;h
Define i U+0069 ;i
Define j U+006A ;j
Define k U+006B ;k
Define l U+006C ;l
Define m U+006D ;m
Define n U+006E ;n
Define o U+006F ;o
Define p U+0070 ;p
Define q U+0071 ;q
Define r U+0072 ;r
Define s U+0073 ;s
Define t U+0074 ;t
Define u U+0075 ;u
Define v U+0076 ;v
Define w U+0077 ;w
Define x U+0078 ;x
Define y U+0079 ;y
Define z U+007A ;z
Define A U+0041 ;A
Define B U+0042 ;B
Define E U+0045 ;E
Define G U+0047 ;G
Define I U+0049 ;I
Define N U+004E ;N
Define O U+004F ;O
Define T U+0054 ;T
Define U U+0055 ;U
Define W U+0057 ;W
Define Y U+0059 ;Y
Define C U+005E ;^ << CARET
Define UL U+005F ;_ << UNDERLINE
Define P U+002E ;. << DOT
Define V U+002C ;, << COMMA (_V_irgule)
Define CO U+003A ;: << COLON
Define SC U+003B ;; << SEMICOLON
Define Ain U+0060 ;`
Define H U+0027 ;' << HAMZA
Define Q U+0022 ;" << QUOTE
Define Z U+002D ;- << HYPHEN
Define BAR U+007C ;|
Define AN U+0061 U+004E ; aN
Define LLAH U+006C U+006C U+005F U+0061 U+0068 ; ll_ah
;Define NUL U+00A6;
Define Ha U+0623
Define Hi U+0625
Define Hy U+0626
Define Hw U+0624
Define Hh U+06C0 ;; = U+0647 U+0654
Define HA U+0622
Define HB U+0640 U+0654
Define HBAR U+0621 ; '|
pass(Unicode)
U+E000 > ; eliminate special code for capitalization in transliterations
pass(Unicode)
Class [VWL] = (U+0061 U+0065 U+0069 U+006F U+0075 ) ;(a e i o u)
Class [AVWL] = (U+064B..U+0650);
Class [LTR] = (U+0061..U+007A U+0041 U+0045 U+0047 U+0049 U+004F U+0055 U+0042 U+004E U+0054 U+0059 U+005E U+002E U+005F U+0022 U+0027 U+0060) ;a..z A E I O U B N T Y ^ . _ " "" `
Class [BRACKETS] = (U+003C U+003E U+00AB U+00BB U+0028 U+0029 U+005B U+005D U+2018 U+2019 U+201C U+201D); < > () « » [ ] ‘ ’ “ ”
Class [PUNCT] = (V SC P CO)
Class [Iy] = (U+0049 U+0079) ;(I y)
Class [Uw] = (U+0055 U+0077) ;(U w)
Class [Uwu] = (U+0055 U+0077 U+0075) ;(U w u)
;Class [ae] = (U+0061 U+0065) ;(a e)
;Class [ou] = (U+0075 U+006F) ;(u o)
Class [Iyi] = (U+0069 U+0079 U+0049)
Class [UI] = (U+0049 U+0055) ; U I
Class [Digits] = (U+0030..U+0039)
Define BEG (#|Z|[BRACKETS])
Define END (#|[BRACKETS]|[PUNCT] U+0020|[PUNCT] #)
Define ENDZ (#|[BRACKETS]|[PUNCT] U+0020|[PUNCT] #|Z)
; special ligatures
;; 1. Standard ArabTeX input
;; FDF2 is just used as temporary holder, and is converted back to ligature lam lam hah later (to be taken care of by the font)
;; The BAR is used in the DMG transliteration to suppress the initial A
(a|A) BAR? l Z LLAH > U+0627 U+FDF2 ;; 'al-ll_ah' or 'Al-ll_ah'
LLAH > U+FDF2 ;; 'll_ah' -> 'l l h'
;; UPPERCASE mnemonics for various ligatures or special glyphs
U+004C U+004C U+0048 > U+FDF2 ;; 'LLH' -> 'l l h'
U+0046 U+0041 U+004C U+0049 U+004C U+004C U+0041 U+0048 > U+0641 U+064E U+0644 U+0650 U+0644 U+0647 ;; 'FALILLAH'
U+0041 U+004C U+004C U+0041 U+0048 > U+0627 U+0644 U+0644 U+0647 ; 'ALLAH'
U+004C U+004C U+0041 U+0048 > U+0644 U+0644 U+0647 ; 'LLAH'
U+0053 U+004C U+004D > U+FDFA ; 'SLM' : .sallY al-ll_ah `alayhi wa-sallam
U+0028 U+0028 > U+FD3F ;; '((' : ornate right par
U+0029 U+0029 > U+FD3E ;; '))' : ornate left par
U+0052 U+0049 U+0059 U+0041 U+004C > U+FDFC ;; 'RIYAL' : Saudi currency sign
;; most fonts won't have the following ones:
U+0053 U+0041 U+004C U+004C U+0041 U+0053 U+0054 U+004F U+0050 > U+FDF0 ;; 'SALLASTOP' (.sallY with "ye-barree", Quranic stop sign)
U+0051 U+0041 U+004C U+0041 > U+FDF1 ;; 'QALA' : qalY with ye-barree, Quranic stop sign
U+0041 U+004B U+0042 U+0041 U+0052 > U+FDF3 ;; 'AKBAR'
U+004D U+0055 U+0048 U+0041 U+004D U+004D U+0041 U+0044 > U+FDF4 ;; 'MUHAMMAD'
U+0053 U+0041 U+004C U+0041 U+004D > U+FDF5 ;; 'SALAM'
U+0052 U+0041 U+0053 U+004F ? U+0055 U+004C > U+FDF6 ;; 'RASUL' (also unicode 'RASOUL')
U+0041 U+004C U+0041 U+0059 U+0048 (U+0049|U+0045) > U+FDF7 ;; 'ALAYHI' (also unicode 'ALAYHE')
U+0057 U+0041 U+0053 U+0041 U+004C U+004C U+0041 U+004D > U+FDF8 ;; 'WASALLAM'
U+0053 U+0041 U+004C U+004C U+0041 > U+FDF9 ;; 'SALLA'
U+004A U+0041 U+004C U+004C U+0041 > U+FDFB ;; 'JALLA' : "jalla jalAluhu"
U+0042 U+0041 U+0053 U+004D U+0041 U+004C U+0041 > U+FDFD ;; 'BASMALA' : very few fonts have this ligature afaik
U+005E U+0053 U+004C U+0059 > U+06D6 ;; ^SLY
U+005E U+0051 U+004C U+0059 > U+06D7 ;; ^QLY
U+005E U+004D U+0049 U+004D > U+06D8 ;; ^MIM-
U+005E U+004C U+0041 > U+06D9 ;; ^LA
U+005E U+004A U+0049 U+004D > U+06DA ;; ^JIM
U+005E U+0044 U+004F U+0054 U+0053 > U+06DB ;; ^DOTS
U+005E U+0053 U+0049 U+004E > U+06DC ;; ^SIN
U+0048 U+0049 U+005A U+0042 > U+06DE ;; HIZB
U+0043 U+0049 U+0052 U+0043 U+005A U+0045 U+0052 U+004F > U+06DF ;; CIRCZERO
U+0052 U+0045 U+0043 U+0054 U+005A U+0045 U+0052 U+004F > U+06E0 ;; RECTZERO
U+005E U+004A U+0041 U+005A U+004D > U+06E1 ;; ^JAZM
U+005E U+004D U+0049 U+004D > U+06E2 ;; ^MIM
U+005F U+0053 U+0049 U+004E > U+06E3 ;; _SIN
U+005E U+004D U+0041 U+0044 U+0044 U+0041 > U+06E4 ;; ^MADDA
U+0057 U+0041 U+0057 > U+06E5 ;; WAW
U+0059 U+0045 U+0048 > U+06E6 ;; YEH
U+005E U+0059 U+0045 U+0048 > U+06E7 ;; ^YEH
U+005E U+004E U+0055 U+004E > U+06E8 ;; ^NUN
U+0053 U+0041 U+004A U+0044 U+0041 > U+06E9 ;; SAJDA
U+005F U+0053 U+0054 U+004F U+0050 > U+06EA ;; _STOP
U+005E U+0053 U+0054 U+004F U+0050 > U+06EB ;; ^STOP
U+005E U+0052 U+0053 U+0054 U+004F U+0050 > U+06EC ;; ^RSTOP
U+005F U+004D U+0049 U+004D > U+06ED ;; _MIM
U+0044 U+004F U+0054 U+0053 > U+061E ;; DOTS (also possible by typing "::")
;; This is to enclose digits within glyph U+06DD
U+005B U+005B ([Digits]+)=dig U+005D U+005D > U+06DD @dig ; [[digits]]
; vowels
u u > U
i i > I
a a > A
CO O > U+06FC ;
a e > a y
a o > a w
CO U > U+06C7 ;
e > i ;; but not pashto > U+0659 and kashmiri > y + U+0658 (U+06CE is kurdish)
o > u
;; Persian ezafe (ALL BUT ARABIC AND UIGHUR)
U+0048 Z UL? (Q? (i|e))=vv / _ END > U+06C0 @vv U+200C ;H-i -> heh+hamza final; what about 06C2 in Urdu ?<<<
U+0048 Z Q?=qq (I|E)=vv / _ END > h U+0020 U+0627 @qq i @vv ; H-I
Q?=q1 (I|E) Z UL? Q?=q2 (i|e) / _ END > @q1 i U+0626 @q2 i U+200C ; ...I-e
Z Q?=qq I / (A|U) _ ENDZ > U+0626 @qq i y ; ...A-I or ...U-I
Z UL? (Q? (i|e))=vv / (A|U) _ END > U+0649 @vv ; ...A-i or ...U-i
Z Q?=q1 (e|i) / _ ENDZ > @q1 i ; ...-i
Z / U+0048 _ > U+0020 ;
b Q?=q1 (e|i) Q?=q2 U / # _ > b @q1 i U+0627 @q2 u w ;; special case of prep be without hyphen
;; initial characters ...
a l Z / BEG _ > U+0627 l Z
;; wasla on initial alif : NEW! NOT TESTED!
H / BEG _ l l > U+0671 ; e.g. alla_dI -> 'lla_dI
[VWL]=v1 / BEG _ > U+0627 @v1
Q?=q1 U / BEG _ > U+0627 @q1 u w ; -U
; Q?=q1 I / BEG _ > U+0627 @q1 i y ; -I
Q?=q1 I / BEG _ > U+0627 @q1 i y
;; Words with anomalous orthography:
; mi'aT -> ماية
;;; i H / m Q ? _ Q ? a ? (T|t) > i A Hy
m Q i H Q a / _ (T|t) > m U+0627 Hy
m Q i H a / _ (T|t) > m U+0627 Hy a
m i H Q a / _ (T|t) > m i U+0627 Hy
m i H a / _ (T|t) > m i U+0627 Hy a
;; contextual analysis of hamzas
;; THESE DO NOT APPLY FOR SINDHI
; initial hamzas
H / (#|^[LTR]) _ Q? [Iyi] > Hi
H Q A / (#|^[LTR]) _ > HA ; alif madda
H / (#|^[LTR]) _ Q? [Uwu] > Ha
H / (#|^[LTR]) _ Q? a > Ha
Q H / (#|^[LTR]) _ > HB ; quoted isolated hamza at beginning of word
; quoted hamzas in contextual mode, equivalent to verbatim mode
a Q H > Ha
i Q H > Hi
y Q H > Hy
w Q H > Hw
h Q H > Hh
A Q H > HA
B Q H > HB
BAR Q H > HBAR
; hamza + fathatan
H / a _ Q? AN > Ha
;;H / [Iyi] _ Q? AN > Hy
H / _ Q? AN > Hy
;final hamzas
H / A _ Q? [VWL]? N? (#|^[LTR]) > HBAR
H / [Uw] _ Q? [VWL]? N? (#|^[LTR]) > HBAR
H / [Iy] _ Q? (i N?|u N?)? (#|^[LTR]) > HBAR ; not AN ...
H / a _ Q? (#|^[LTR]) > Ha ; not AN...
H / a _ Q? (u|i) N? (#|^[LTR]) > Ha ; not AN...
H / i _ Q? [VWL]? N? (#|^[LTR]) > Hy
H / u _ Q? [VWL]? N? (#|^[LTR]) > Hw
;;and more hamzas ...
; n°1: kasra/ya
H / [LTR] _ Q? [Iyi] ^N > Hy
H / [Iyi] _ > Hy
; n°2: damma/waw
H / [LTR] _ Q? (U|u) ^N > Hw
H / (U|u) _ Q? ^[Iyi] > Hw
H / U _ > HBAR
; n°3: fatha/alif
H Q? A > HA
H / A _ Q? a > HBAR
H / ^[UI] _ Q? a > Ha
H / a _ Q? ^[UI] > Ha
pass(Unicode)
Class [Digits] = (U+0030..U+0039)
Class [ArDigits] = (U+0660..U+0669)
Class [ArEasternDigits] = (U+06F0..U+06F9)
Class [VWL] = (U+0061 U+0065 U+0069 U+006F U+0075 ) ;(a e i o u)
Class [LTR] = (U+0061..U+007A U+0041 U+0045 U+0049 U+004F U+0055 U+0042 U+004E U+0054 U+0059 U+005E U+002E U+005F U+0022 U+0027 U+0060) ;(a..z A E I O U B N T Y ^ . _ " "" `)
Class [Iy] = (U+0049 U+0079) ;(I y)
Class [Uw] = (U+0055 U+0077) ;(U w)
;normal code
(P z|U+1E93) (P z|U+1E93) > U+0638 U+0651 ;;SHADDA
(P z|U+1E93) > U+0638
C z C z > U+0698 U+0651 ;;SHADDA
C z > U+0698
V z V z > U+0696 U+0651 ;;SHADDA
V z > U+0696 ;pashto
z z > U+0632 U+0651 ;;SHADDA
z > U+0632
Q Y Y > U+0649 U+0651 ;;SHADDA
Q Y > U+0649
Y Y > U+064E U+0649 U+0651 ;;SHADDA
Y > U+064E U+0649
P I P I / _ # > U+0650 U+06CC U+0651 ;; SHADDA
P I / _ # > U+0650 U+06CC ;; workaround to have ya without dots in Arabic
y y > U+064A U+0651 ;;SHADDA
y > U+064A
v v > U+06CF U+0651 ;;SHADDA
v > U+06CF
W > U+0648 U+0627 ; waw + alif
C U > U+064F U+0648 U+0653; historical spelling : madda on waw
C w C w > U+06C9 U+0651 ;;SHADDA
C w > U+06C9
CO w CO w > U+06CA U+0651 ;,SHADDA
CO w > U+06CA
U A / _ # > U+064F U+0648 U+0627
Q U [Uw] > U+0648 U+0651 ;;SHADDA
Q U > U+0648
U [Uw] > U+064F U+0648 U+0651 ;;SHADDA
U > U+064F U+0648
w w > U+0648 U+0651 ;;SHADDA
w > U+0648
Q u N? >
u N > U+064C
u > U+064F
V t V t > U+0679 U+0651 ;;SHADDA
V t > U+0679 ;urdu
UL (t|s) UL (t|s) > U+062B U+0651 ;;SHADDA
UL (t|s) > U+062B
(P t|U+1E6D) (P t|U+1E6D) > U+0637 U+0651 ;;SHADDA
(P t|U+1E6D) > U+0637
t t > U+062A U+0651 ;;SHADDA
t > U+062A
T T Q AN > U+0629 U+0651 ;;SHADDA
T Q AN > U+0629
T AN > U+0629 U+064B
T T > U+0629 U+0651 ;;SHADDA
T > U+0629
C s C s > U+0634 U+0651 ;;SHADDA
C s > U+0634
(P s|U+1E63) (P s|U+1E63) > U+0635 U+0651 ;;SHADDA
(P s|U+1E63) > U+0635
V s V s > U+069A U+0651 ;;SHADDA
V s > U+069A ;Pashto
CO s CO s > U+069B U+0651 ;;SHADDA
CO s > U+069B ; sin with 3 dots below
s s > U+0633 U+0651 ;;SHADDA
s > U+0633
C r C r > U+06EF U+0651 ;;SHADDA ; missing in ArabTeX <<<
C r > U+06EF
CO r CO r > U+0697 U+0651 ;;SHADDA ; missing in ArabTeX <<<
CO r > U+0697 ; Dargwa language, Dagestan
P r P r > U+0694 U+0651 ;;SHADDA
P r > U+0694 ; r with dot below; in Kurdish .r -> 0694
V r V r > U+0691 U+0651 ;;SHADDA
V r > U+0691 ;Urdu
r r > U+0631 U+0651 ;;SHADDA
r > U+0631
P q P q > U+066F U+0651 ;;SHADDA
P q > U+066F
q q > U+0642 U+0651 ;;SHADDA
q > U+0642
p p > U+06A8 U+0651 ;;SHADDA
p > U+06A8 ;Old malay
CO n CO n > U+06B1 U+0651 ;;SHADDA
CO n > U+06B1
C n C n > U+06BD U+0651 ;;SHADDA
C n > U+06BD ; <<< ArabTeX yields nūn with three dots below, which is not in Unicode!
P n P n > U+06BA U+0651 ;;SHADDA
P n > U+06BA ; urdu
V n V n > U+06BC U+0651 ;;SHADDA
V n > U+06BC ; Pashto
n n > U+0646 U+0651 ;;SHADDA
n > U+0646
m m > U+0645 U+0651 ;;SHADDA
m > U+0645
P l P l > U+06B6 U+0651 ;
P l > U+06B6 ; ADDED <<<
C l C l > U+06B5 U+0651 ;;SHADDA
C l > U+06B5 ;kurdish
l l > U+0644 U+0651 ;;SHADDA
l > U+0644
P k P k > U+06A9 U+0651 ;;SHADDA
P k > U+06A9 ; pashto urdu
UL k UL k > U+063A U+0651 ;;SHADDA
UL k > U+063A
k k > U+0643 U+0651 ;;SHADDA
k > U+0643
C g C g > U+06A0 U+0651 ;;SHADDA
C g > U+06A0
CO j CO j > U+0684 U+0651 ;;SHADDA
CO j > U+0684
j j > U+062C U+0651 ;;SHADDA
j > U+062C
Q I > U+064A
I > U+0650 U+064A
C I > U+0650 U+064A U+0653 ; historical spelling : madda on ya
UL I > ; suggestion of O. Smrz (arabtex-plus project) URL = ###
Q i N? > ;
i N > U+064D
i > U+0650
x x > U+062E U+0651 ;;SHADDA
x > U+062E
UL h UL h > U+062E U+0651 ;;SHADDA
UL h > U+062E
(P h|U+1E25) (P h|U+1E25) > U+062D U+0651 ;;SHADDA
(P h|U+1E25) > U+062D
V h V h > U+06C1 U+0651 ;;SHADDA
V h > U+06C1
h h > U+0647 U+0651 ;;SHADDA
h > U+0647
CO g CO g > U+06B3 U+0651 ;;SHADDA
CO g > U+06B3
V g V g > U+06AC U+0651 ;;SHADDA
V g > U+06AC ; kaf with dot above
P g P g > U+063A U+0651 ;;SHADDA
P g > U+063A
g g > U+0762 U+0651 ;;SHADDA
g > U+0762
G G > U+06AB U+0651 ;;SHADDA
G > U+06AB ;pashto
P f P f > U+06A1 U+0651 ;;SHADDA
P f > U+06A1
f f > U+0641 U+0651 ;;SHADDA
f > U+0641
V d V d > U+068A U+0651 ;;SHADDA
V d > U+068A
P V d P V d > U+068B U+0651 ;;SHADDA
P V d > U+068B ;; (like U+0688 with dot below, for Lahnda = Western Punjabi)
C d C d > U+06EE U+0651 ;;SHADDA
C d > U+06EE ; not defined in ArabTeX
CO d CO d > U+068F U+0651 ;;SHADDA
CO d > U+068F
UL (d|z) UL (d|z) > U+0630 U+0651 ;;SHADDA
UL (d|z) > U+0630
(P d|U+1E0D) (P d|U+1E0D) > U+0636 U+0651 ;;SHADDA
(P d|U+1E0D) > U+0636
d d > U+062F U+0651 ;;SHADDA
d > U+062F
V c V c > U+0685 U+0651 ;;SHADDA
V c > U+0685 ;pashto
C c C c > U+0686 U+0651 ;;SHADDA
C c > U+0686
P C c P C c > U+06BF U+0651 ;;SHADDA
P C c > U+06BF ;; cheh with dot above <<<
c c > U+0686 U+0651 ;;SHADDA
c > U+0686
CO c CO c > U+0682 U+0651 ;;SHADDA
CO c > U+0682 ; old pashto
CO b CO b > U+067B U+0651 ;;SHADDA
CO b > U+067B
P b P b > U+066E U+0651 ;;SHADDA
P b > U+066E
b b > U+0628 U+0651 ;;SHADDA
b > U+0628
H A > U+0622
C A > U+064E U+0622 ; historical spelling
UL A > U+064E U+0649
UL a > U+0670
UL u > U+064F;
UL i > U+0650;
Q AN / (A H|Ha) _ > ;
AN / (A H|Ha) _ > U+064B
Q AN > U+0627
AN > U+064B U+0627
Q a >
a > U+064E
Q A > U+0627
A > U+064E U+0627
Ain Ain > U+0639 U+0651 ;;SHADDA
Ain > U+0639
H H > U+0621 U+0651 ;;SHADDA
H > U+0621
U+002C / [Digits] _ [Digits] > U+002C ; comma in numerical context not an Arabic comma
Z Z / [Digits] _ [Digits] > U+2013 ; two hyphens in numerical context -> endash
(Z|U+005D|U+005B|U+0028|U+0029)=xx / [Digits] _ [Digits] > U+202D @xx U+202C ; hyphen or brackets in numerical context: surrounded by LRO & PDF marks
[Digits] > [ArDigits]
Z Z Z / # _ # > U+2014
Z Z / # _ # > U+2013
Z Z > U+0640
B > U+0640
Z / # _ > U+200D ; -x > force initial form with "zero-width joiner"
Z / _ # > U+200D ; x- > force final form
U+003C U+003C > U+00BB ;<<
U+003E U+003E > U+00AB ;>>
U+00AB > U+00BB
U+00BB > U+00AB
UL > U+0640 ;_
U+003F > U+061F ;?
U+003B > U+061B ;;
U+003A U+003A > U+061E ;; '::' ligature 'DOTS' also defined above...
U+002C > U+060C ;,
U+005D / [LTR] _ [LTR] > U+200D U+005D U+200D ; keep shaping when inserting ...[...]... ...
U+005B / [LTR] _ [LTR] > U+200D U+005B U+200D ; (but of course this screws up contextual
; analysis, so quoting of hamzas etc may be necessary
;U+005D > U+005B ;] these are automatically mirrored, following Unicode rules
;U+005B > U+005D ;[
Q [VWL] > ;stripoff quoted vowels
U+00B0 > U+0652 ; °
N > ;
C > ;^
BAR > ;|
pass(Unicode)
Class [CONS] = (U+0621 U+0623 U+0624 U+0626 U+0628 U+062A..U+063A U+0641..U+0647 U+066E U+066F U+0672 U+0675 U+0676 U+0678..U+06BF U+06EE U+06EF U+06FA..U+06FF U+0750..U+076D) ; to be improved: not sure that all glyphs listed (esp for Urdu,Sindhi,etc are really "consonants" in the sense that they can carry a sukun, not to mention that some of these languages may not have a sukun at all ;-)
Class [VWLX] = (U+0627 U+0648..U+065E U+0670) ; long vowels, vowel signs, shadda, sukun etc.
Class [VWL] = (U+0627 U+0648..U+0650 U+0670);
[CONS]=k1 / _ Z? [CONS] ^U+0651 > @k1 U+0652 ; hack: 2nd consonant has no shadda
U+064E (U+064A|U+0648)=hc / _ (^[VWLX]|#) > U+064E @hc U+0652 ; ay and aw diphtongs
U+0627 / # _ U+0644 > U+0627 U+064E ; otherwise initial alif takes fatha
[CONS]=k1 / _ ([CONS]|#) > @k1 U+0652 ; consonant + (consonant or final)
;; [CONS]=k1 / _ # > @k1 U+0652 ; final consonant
U+FDF2 > U+0644 U+0644 U+0647 ;
Q > ;
Z > ;