% UTF-8 encoding for Plain TeX. % Copyright 2008, 2015, 2018 Dmitri Pavlov. % You may redistribute this file under the terms of GNU General Public License version 3. % Report bugs and suggestions to me by email (host: math.berkeley.edu, user: pavlov). % So far this file includes support for most of TeX symbols and Latin 1 letters. % Setup catcodes for UTF-8 characters: legitimate initial octets are active, intermediate octets are letters, other octets are illegal. \countdef\ch=253 \ch="80 \loop\ifnum\ch<"100 \lccode\ch=0 \uccode\ch=0 \advance\ch1 \repeat % for LaTeX \ch="80 \loop\ifnum\ch<"C0 \catcode\ch=11 \advance\ch1 \repeat % make all intermediate UTF-8 octets (top 2 bits are 10) letters \ch="C0 \loop\ifnum\ch<"100 \catcode\ch=15 \advance\ch1 \repeat % make all octets with the top 2 bits set illegal \ch="C2 \loop\ifnum\ch<"F5 \catcode\ch=\active \advance\ch1 \repeat % make all legitimate initial UTF-8 octets active \ch="C2 \loop\ifnum\ch<"E2 \uccode\ch=1 \advance\ch1 \repeat % make uccode=1 for all initial UTF-8 octets of letters, to allow macros to recognize Unicode letters \let\ch\underfined % \cdef\macro+ is like \def\macro{+}, but the catcode of + will be 11 \catcode0=12 \def\cdef#1#2{\begingroup\lccode0=`#2 \lowercase{\endgroup \def#1{^^@}}} \catcode0=9 % Preliminary macros for UTF-8: every legitimate UTF-8 sequence produces a control sequence whose name consists precisely of the octets in the sequence. \catcode`@=\active \def@#1{\cdef\chr#1 \edef#1##1{\noexpand\csname\chr##1\noexpand\endcsname}} % 2-byte @^^c2@^^c3@^^c4@^^c5@^^c6@^^c7@^^c8@^^c9@^^ca@^^cb@^^cc@^^cd@^^ce@^^cf@^^d0@^^d1@^^d2@^^d3@^^d4@^^d5@^^d6@^^d7@^^d8@^^d9@^^da@^^db@^^dc@^^dd@^^de@^^df \def@#1{\cdef\chr#1 \edef#1##1##2{\noexpand\csname\chr##1##2\noexpand\endcsname}} % 3-byte @^^e0@^^e1@^^e2@^^e3@^^e4@^^e5@^^e6@^^e7@^^e8@^^e9@^^ea@^^eb@^^ec@^^ed@^^ee@^^ef \def@#1{\cdef\chr#1 \edef#1##1##2##3{\noexpand\csname\chr##1##2##3\noexpand\endcsname}} % 4-byte @^^f0@^^f1@^^f2@^^f3@^^f4 \let\chr\undefined \let\cdef\undefined % The following three lines establish a concise syntax for defining characters: each line contains a UTF-8 character followed by its definition. \def\grabfuturelet{\futurelet\next\grabexamine} \def\grabexamine{\ifx\next\csname\expandafter\grab\fi} \obeylines \def\grab\csname#1\endcsname#2^^M{\expandafter\def\csname#1\endcsname{#2}\expandafter\grabfuturelet} \expandafter\grabfuturelet% % Coverage: all TeX symbols except those not present in Unicode and the part of Latin 1 and Latin Extended-A that is present in the Computer Modern fonts.  ~ ¢{\hbox{\rm\rlap/c}} £{\it\$} %¤ %¥ %¦ %¨ %ª «\leftguillemet ­\- %® %¯ %° %² %³ %´ %µ %·\cdotp %¸\c %¹ %º »\rightguillemet %¼ %½ %¾ À{\`A} Á{\'A} Â{\^A} Ã{\~A} Ä{\"A} Ç{\c C} È{\`E} É{\'E} Ê{\^E} Ë{\"E} Ì{\`I} Í{\'I} Î{\^I} Ï{\"I} % must define \ETH, \THORN, \eth, \thorn separately Ð\ETH Ñ{\~N} Ò{\`O} Ó{\'O} Ô{\^O} Õ{\~O} Ö{\"O} Ù{\`U} Ú{\'U} Û{\^U} Ü{\"U} Ý{\'Y} Þ\THORN à{\`a} á{\'a} â{\^a} ã{\~a} ä{\"a} ç{\c c} è{\`e} é{\'e} ê{\^e} ë{\"e} ì{\`\i} í{\'\i} î{\^\i} ï{\"\i} ð\eth ñ{\~n} ò{\`o} ó{\'o} ô{\^o} õ{\~o} ö{\"o} ù{\`u} ú{\'u} û{\^u} ü{\"u} ý{\'y} þ\thorn ÿ{\"y} % Latin Extended-A Ā{\=A} ā{\=a} Ă{\u A} ă{\u a} %Ą %ą Ć{\'C} ć{\'c} Ĉ{\^C} ĉ{\^c} Ċ{\.C} ċ{\.c} Č{\v C} č{\v c} Ď{\v D} ď{\v d} %Đ %đ Ē{\=E} ē{\=e} Ĕ{\u E} ĕ{\u e} Ė{\.E} ė{\.e} %Ę %ę Ě{\v E} ě{\v e} Ĝ{\^G} ĝ{\^g} Ğ{\u G} ğ{\u g} Ġ{\.G} ġ{\.g} Ģ{\c G} ģ{\c g} Ĥ{\^H} ĥ{\^h} %Ħ %ħ Ĩ{\~I} ĩ{\~\i} Ī{\=I} ī{\=\i} Ĭ{\u I} ĭ{\u\i} %Į %į İ{\.I} %ı\i IJIJ ijij Ĵ{\^J} ĵ{\^\j} Ķ{\c K} ķ{\c k} %ĸ Ĺ{\'L} ĺ{\'l} Ļ{\c L} ļ{\c l} Ľ{\v L} ľ{\v l} %Ŀ %ŀ %Ł\L %ł\l Ń{\'N} ń{\'n} Ņ{\c N} ņ{\c n} Ň{\v N} ň{\v n} %ʼn %Ŋ %ŋ Ō{\=O} ō{\=o} Ŏ{\u O} ŏ{\u o} Ő{\"O} ő{\"o} %Œ\OE %œ\oe Ŕ{\'R} ŕ{\'r} Ŗ{\c R} ŗ{\c r} Ř{\v R} ř{\v r} Ś{\'S} ś{\'s} Ŝ{\^S} ŝ{\^s} Ş{\c S} ş{\c s} Š{\v S} š{\v s} Ţ{\c T} ţ{\c t} Ť{\v T} ť{\v t} %Ŧ %ŧ Ũ{\~U} ũ{\~u} Ū{\=U} ū{\=u} Ŭ{\u U} ŭ{\u u} %Ů %ů Ű{\H U} ű{\H u} %Ų %ų Ŵ{\^W} ŵ{\^w} Ŷ{\^Y} ŷ{\^y} Ÿ{\"Y} Ź{\'Z} ź{\'z} Ż{\.Z} ż{\.z} Ž{\v Z} ž{\v z} %ſ ’' ‘` ”{''} “{``} ‐- –{--} —{---} % these ligatures should not be used in TeX text %ff{ff} %fi{fi} %fl{fl} %ffi{ffi} %ffl{ffl} ¡{!`} ¿{?`} −- ′' % these ligatures should not be used in TeX text %″{''} %‴{'''} %⁗{''''} ß\ss æ\ae Æ\AE œ\oe Œ\OE ø\o Ø\O å\aa Å\AA ł\l Ł\L % defined below %ı\i %ȷ\j †\dag ‡\ddag §\S ¶\P % combining characters are left undefined because they come after, not before the accented character %◌̣\d %◌̱\b %◌̧\c ©\copyright …\dots %◌̀\` %◌́\' %◌̌\v %◌̆\u %◌̄\= %◌̂\^ %◌̇\. %◌̋\H %◌̃\~ %◌̈\" %◌͡\t %\brace[lr][du] ı\relax\ifmmode\imath\else\i\fi ȷ\relax\ifmmode\jmath\else\j\fi ¬\neg △\bigtriangleup ÷\div ±\pm ◯\bigcirc % Latin 1 ×\times ‖\| ←\leftarrow →\rightarrow ∥\Vert %|\vert ⟩\rangle ⟨\langle %{\lbrace %}\rbrace ∣| ∮\oint %!\ointop ≐\dot= ↑\uparrow ↓\downarrow α\alpha β\beta γ\gamma δ\delta ϵ\epsilon ζ\zeta η\eta θ\theta ι\iota κ\kappa λ\lambda μ\mu ν\nu ξ\xi οo π\pi ρ\rho σ\sigma τ\tau υ\upsilon ϕ\phi χ\chi ψ\psi ω\omega ε\varepsilon ϑ\vartheta ϖ\varpi ϱ\varrho ς\varsigma φ\varphi Γ\Gamma Δ\Delta Θ\Theta Λ\Lambda Ξ\Xi Π\Pi Σ\Sigma Υ\Upsilon Φ\Phi Ψ\Psi Ω\Omega ℵ\aleph ℏ\hbar ℓ\ell ℘\wp ℜ\Re ℑ\Im ∂\partial ∞\infty %'\prime ∅\emptyset ∇\nabla √\surd ⊤\top ⊥\bot ∠\angle △\triangle ∀\forall ∃\exists ♭\flat ♮\natural ♯\sharp ♣\clubsuit ♢\diamondsuit ♡\heartsuit ♠\spadesuit ∐\coprod ⋁\bigvee ⋀\bigwedge ⨄\biguplus ⋂\bigcap ⋃\bigcup ∫\int %!\intop ∏\prod ∑\sum ⨂\bigotimes ⨁\bigoplus ⨀\bigodot ⨆\bigsqcup %!\smallint ◁\triangleleft ▷\triangleright ▽\bigtriangledown ∧\wedge ∨\vee ∩\cap ∪\cup %‡\ddagger : \ddag %†\dagger : \dag ⊓\sqcap ⊔\sqcup ⊎\uplus ⨿\amalg ⋄\diamond ∙\bullet ≀\wr ⊙\odot ⊘\oslash ⊗\otimes ⊖\ominus ⊕\oplus ∓\mp ∘\circ %white circle: ○\Orb %large circle: ∖\setminus ⋅\cdot ∗\ast % Unicode ⨯\times ⋆\star ∝\propto ⊑\sqsubseteq ⊒\sqsupseteq ∥\parallel ∣\divides %|\mid ⊣\dashv ⊢\vdash ↗\nearrow ↘\searrow ↖\nwarrow ↙\swarrow ⇔\Leftrightarrow ⇐\Leftarrow ⇒\Rightarrow ≠\neq ≤\leq ≥\geq ≻\succ ≺\prec ≈\approx ≽\succeq ≼\preceq ⊃\supset ⊂\subset ⊇\supseteq ⊆\subseteq ∈\in ∋\ni ≫\gg ≪\ll %◌̸\not ↔\leftrightarrow ↦\mapsto %↦\mapstochar : \mapsto ∼\sim ≃\simeq ⟂\perp ≡\equiv ≍\asymp ⌣\smile ⌢\frown ↼\leftharpoonup ↽\leftharpoondown ⇀\rightharpoonup ⇁\rightharpoondown ↪\hookrightarrow ↩\hookleftarrow %\lhook %\rhook ⋈\bowtie ⊨\models ⟹\Longrightarrow ⟶\longrightarrow ⟵\longleftarrow ⟸\Longleftarrow ⟼\longmapsto ⟷\longleftrightarrow ⟺\Longleftrightarrow %⇔\iff : \Longleftrightarrow %.\ldotp : . %⋅\cdotp : \cdot %:\colon : : %…\ldots : \dots ⋯\cdots ⋮\vdots ⋱\ddots %\acute, \grave, \ddot, \tilde, \bar, \breve, \check, \hat, \vec, \dot, \widetilde, \widehat %\overrightarrow, \overleftarrow, \overbrace, \underbrace %\lmoustache, \rmoustache, \lgroup, \rgroup, \arrowvert, \Arrowvert, \bracevert ↕\updownarrow ⇑\Uparrow ⇓\Downarrow ⇕\Updownarrow %\\backslash ⌉\rceil ⌈\lceil ⌋\rfloor ⌊\lfloor ≅\cong ∉\notin ⇌\rightleftharpoons ≐\doteq % Combined symbols from Unicode math blocks ∄\not\exists ∌\not\ni ∔\dot+ ∕/ ∤\not| ∦\not\| ∬\int\!\!\!\int ∭\int\!\!\!\int\!\!\!\int ∮\oint ∸\dot- ≁\not\sim ≄\not\simeq ≆\not\cong ≇\not\cong ≉\not\approx ≔:= ≕=: ≢\not\equiv ≭\not\asump ≮\not< ≯\not< ≰\not\le ≱\not\ge ⊀\not\prec ⊁\not\succ ⊄\not\subset ⊅\not\supset ⊈\not\subseteq ⊉\not\supseteq ⊦\vdash ⊧\models ⊬\not\vdash ⊭\not\models ⊲\triangleleft ⊳\triangleright ⋠\not\preceq ⋡\not\succeq ⋤\not\sqsubseteq ⋥\not\sqsupseteq ⋪\not\triangleleft ⋫\not\triangleright ◻\square \catcode`\^^M=5 % \let\grabfuturelet\undefined \let\grabexamine\undefined \let\grab\undefined % Generic macros for UTF-8: every legitimate UTF-8 sequence produces a control sequence whose name consists precisely of the octets in the sequence. % An undefined control sequence produces an error message. \let\xcsname=\csname \let\xendcsname=\endcsname \def@#1{\def#1##1{\expandafter\ifx\csname\string#1##1\endcsname\relax\errmessage{Undefined UTF-8 sequence \string#1##1}\else\xcsname\string#1##1\xendcsname\fi}} @^^c2@^^c3@^^c4@^^c5@^^c6@^^c7@^^c8@^^c9@^^ca@^^cb@^^cc@^^cd@^^ce@^^cf@^^d0@^^d1@^^d2@^^d3@^^d4@^^d5@^^d6@^^d7@^^d8@^^d9@^^da@^^db@^^dc@^^dd@^^de@^^df \def@#1{\def#1##1##2{\expandafter\ifx\csname\string#1##1##2\endcsname\relax\errmessage{Undefined UTF-8 sequence \string#1##1##2}\else\xcsname\string#1##1##2\xendcsname\fi}} @^^e0@^^e1@^^e2@^^e3@^^e4@^^e5@^^e6@^^e7@^^e8@^^e9@^^ea@^^eb@^^ec@^^ed@^^ee@^^ef \def@#1{\def#1##1##2##3{\expandafter\ifx\csname\string#1##1##2##3\endcsname\relax\errmessage{Undefined UTF-8 sequence \string#1##1##2##3}\else\xcsname\string#1##1##2##3\xendcsname\fi}} @^^f0@^^f1@^^f2@^^f3@^^f4 \let@\undefined \catcode`@=12