Current File : //usr/share/texlive/texmf-dist/tex/generic/unicode-data/load-unicode-xetex-classes.tex |
% File load-unicode-xetex-classes.tex
%
% Copyright 2015-2019 The LaTeX3 Project
%
% It may be distributed and/or modified under the conditions of
% the LaTeX Project Public License (LPPL), either version 1.3c of
% this license or (at your option) any later version. The latest
% version of this license is in the file
% http://www.latex-project.org/lppl.txt.
%
% Issues with this file should be reported at
% https://github.com/latex3/unicode-data
%
% This file parses EastAsianWidth.txt and LineBreak.txt, provided by the
% Unicode Consortium, and when used with XeTeX sets \XeTeXcharclass for
% the following classes of code point:
% - "ID" (ideographic)
% - "CJ" (conditional Japanese starter)
% - "OP" (opener)
% - "CL" (closer)
% - "NS" (non-starter)
% - "EX" (exclamation)
% - "IS" (infix separator)
% - "CM" (combining marks)
%
% All code points of classes "ID" and "CJ" are assigned to a \XeTeXcharclass,
% but for other classes this only occurs when they fall into east Asian width
% type "F", "H" or "W" (full-, half- and wide-width).
%
% The following mappings between Unicode and XeTeX classes occur
% - "ID" and "CJ" are class 1
% - "OP" is class 2
% - "CL", "NS", "EX", "IS" are class 3
% - "CM" is class 256 (ignored)
% as standard: these may be over-ridden by defining \XeTeXcharclass<class>
% as required. (If classes "ID" or "CL" are explicitly set, the other members
% of the same groups above will inherit these values.)
%
% This file does _not_ activate XeTeX's inter-character token mechanism
% (\XeTeXinterchartokenstate is not set) nor does it install any material in
% the inter-character token registers.
%
% Note that this file is separate from the main loader as the data structure
% here may need more refinement at the macro level.
%
% =============================================================================
%
% The data loaded here can currently only be used by XeTeX: check for the
% appropriate primitive.
\ifx\XeTeXcharclass\undefined
\expandafter\endinput
\fi
% Just in case, check for the e-TeX extensions.
\ifx\eTeXversion\undefined
\expandafter\endinput
\fi
% This file can be loaded in IniTeX mode so the category codes of |{|, |}| and
% |#| may not be correct. Everything is done in a group so that only the
% settings we want to propagate are made available generally.
\begingroup
\catcode`\{=1 %
\catcode`\}=2 %
% Write some basic information to the log.
\catcode`\^=7 %
\newlinechar=`\^^J %
\message{^^J}%
\message{load-unicode-xetex-classes.tex v1.14 (2020-10-24)^^J}%
\message{Reading Unicode east Asian character class data^^J}%
% A string version of |#| will be needed to look for comment lines in the
% source. Once that is done proper parsing can begin.
\catcode`\#=12 %
\def\hash{#}%
\catcode`\#=6 %
\def\firsttoken#1#2\relax{#1}%
\def\parseunicodedataI#1\relax{%
\unless\if\hash\firsttoken#1?\relax
\parseunicodedataII#1\relax
\fi
}%
% Both files to be parsed here have potential ranges of code points: find the
% first entry and search for the second.
\def\parseunicodedataII#1;#2 #3\relax{%
\parseunicodedataIII#1....\relax{#2}%
}%
% From plain: may not be defined (yet).
\def\loop#1\repeat{\def\body{#1}\iterate}%
\def\iterate{%
\body
\let\next\iterate
\else
\let\next\relax
\fi
\next
}%
\let\repeat\fi
% A shared routine for reading the data files: only one part of the parser
% has to be altered.
\def\storedpar{\par}%
\def\readandparse#1{%
\openin0=#1.txt %
% Read two lines from the source file to extract the version information
\catcode`\#=12 %
\read0 to \unicodedataline
\message{\unicodedataline ^^J}%
\read0 to \unicodedataline
\message{\unicodedataline ^^J}%
\loop\unless\ifeof0 %
\read0 to \unicodedataline
\unless\ifx\unicodedataline\storedpar
\expandafter\parseunicodedataI\unicodedataline\relax
\fi
\repeat
\catcode`\#=6 %
\closein0 %
}%
% Set up the different line break classes recognised.
\ifdefined\XeTeXcharclassID
\else
\chardef\XeTeXcharclassID=1 %
\fi
\ifdefined\XeTeXcharclassCJ
\else
\let\XeTeXcharclassCJ\XeTeXcharclassID
\fi
\ifdefined\XeTeXcharclassOP
\else
\chardef\XeTeXcharclassOP=2 %
\fi
\ifdefined\XeTeXcharclassCL
\else
\chardef\XeTeXcharclassCL=3 %
\fi
\ifdefined\XeTeXcharclassEX
\else
\let\XeTeXcharclassEX\XeTeXcharclassCL
\fi
\ifdefined\XeTeXcharclassIS
\else
\let\XeTeXcharclassIS\XeTeXcharclassCL
\fi
\ifdefined\XeTeXcharclassNS
\else
\let\XeTeXcharclassNS\XeTeXcharclassCL
\fi
\ifdefined\XeTeXcharclassCM
\else
\chardef\XeTeXcharclassCM=256 %
\fi
% Check the line break class and if necessary the east Asian width for the
% current code point. For code points of class |ID| or |CJ| there may be a
% range to set, and these are always recorded. In other cases if the code point
% is one of those we may need to set up then save it for checking against the
% list of east Asian widths.
\def\ID{ID}%
\def\CJ{CJ}%
\def\parseunicodedataIII#1..#2..#3\relax#4{%
\def\temp{#4}%
\ifnum 0%
\ifx\temp\ID 1\fi
\ifx\temp\CJ 1\fi
>0 %
\ifx\relax#2\relax
\parseunicodedataIV{#1}{#1}{#4}%
\else
\parseunicodedataIV{#1}{#2}{#4}%
\fi
\else
\ifcsname XeTeXcharclass#4\endcsname
\ifx\relax#2\relax
\expandafter\def\csname LB@\number"#1\endcsname{#4}%
\else
\let\savedbody\body
\count0="#1 %
\loop
\unless\ifnum\count0>"#2 %
\expandafter\def\csname LB@\number\count0 \endcsname{#4}%
\advance\count0 by 1 %
\repeat
\let\body\savedbody
\fi
\fi
\fi
}%
% As we are inside a loop already, there needs to be a group here to preserve
% the iterator.
\def\parseunicodedataIV#1#2#3{%
\begingroup
\count0="#1 %
\loop
\unless\ifnum\count0>"#2 %
\global\XeTeXcharclass\count0=\csname XeTeXcharclass#3\endcsname
\advance\count0 by 1 %
\repeat
\endgroup
}%
\readandparse{LineBreak}%
% For |EastAsianWidth.txt|, action is only needed if the character has width
% |F|, |H| or |W|. Once again there may be a range of characters to handle.
\def\parseunicodedataIII#1..#2..#3\relax#4{%
\ifnum 0%
\if F\firsttoken#4\relax 1\fi
\if H\firsttoken#4\relax 1\fi
\if W\firsttoken#4\relax 1\fi
>0 %
\ifx\relax#2\relax
\parseunicodedataIV{"#1}%
\else
\begingroup
\count0="#1 %
\loop
\unless\ifnum\count0>"#2 %
\parseunicodedataIV{\count0}%
\advance\count0 by 1 %
\repeat
\endgroup
\fi
\fi
}%
% Only take action if a line breaking class was previously saved: that will
% map to the correct class number.
\def\parseunicodedataIV#1{%
\ifcsname LB@\number#1\endcsname
\global\XeTeXcharclass#1=
\csname XeTeXcharclass\csname LB@\number#1\endcsname\endcsname
\fi
}%
\readandparse{EastAsianWidth}%
\endgroup