From b3da5c7f779d0451662b7f7fd8ae77daf3b254a5 Mon Sep 17 00:00:00 2001 From: Hironori Kitagawa Date: Sat, 24 Oct 2015 17:25:01 +0900 Subject: [PATCH] Regenerate ltj-unicode-ccfix.lua (Unicode 8.0.0) --- doc/luatexja.dtx | 48 +++--- src/ltj-unicode-ccfix.lua | 354 ++++++++++++++++++++-------------------- tool/ltj-unicode-ccfix_make2.pl | 14 +- 3 files changed, 210 insertions(+), 206 deletions(-) diff --git a/doc/luatexja.dtx b/doc/luatexja.dtx index 0f95747..6ac495c 100644 --- a/doc/luatexja.dtx +++ b/doc/luatexja.dtx @@ -24,7 +24,7 @@ %%%%%%%% fonts \usepackage{luatexja-otf} \directlua{luatexja.otf.enable_ivs()} -\usepackage[match]{luatexja-fontspec} +\usepackage[match,no-math]{luatexja-fontspec} \usepackage[kozuka-pr6n]{luatexja-preset} \usepackage{unicode-math} \setmainfont[Ligatures=TeX]{Linux Libertine O} @@ -2749,13 +2749,13 @@ For the detail, see Table~\ref{table-kcat}. %<*en> \bfseries meaning& \bfseries control word& -\bfseries widow penalty\hbox{}$^*$& +\bfseries widow penalty& \bfseries linebreak\\ % %<*ja> \bfseries 意図& \bfseries 制御綴中に使用& -\bfseries 文字ウィドウ処理\hbox{}$^*$& +\bfseries 文字ウィドウ処理& \bfseries 直後での改行\\ % \midrule @@ -2770,7 +2770,7 @@ For the detail, see Table~\ref{table-kcat}. \medskip %<*ja> -文字ウィドウ処理\hbox{}$^*$: 「漢字が一文字だけ次の行に行くのを防ぐ」\ +文字ウィドウ処理:「漢字が一文字だけ次の行に行くのを防ぐ」 \cs{jcharwidowpenalty} が, その文字の直前に挿入されうるか否か,を示す. % @@ -2842,17 +2842,22 @@ between \XeTeX\ and \LuaTeX, by the following reasons: \begin{itemize} \item (plain format) \texttt{luatex-unicode-letters.tex} is based on old \texttt{unicode-letters.tex}. \item The latter half of \texttt{unicode-letters.tex}~and~\texttt{unicode-letters.def} -sets\cs{catcode} of Kanji and kana characters to 11, via setting \cs{XeTeXcharclass}. - -However, this latter half does not exist (plain case), or not executed (\LaTeX~case) in \LuaTeX, hence -\cs{catcode} of Kanji and kana characters remains 12 in \LuaTeX. +sets\cs{catcode} of several characters to 11, via setting \cs{XeTeXcharclass}. +However, this latter half does not exist (plain case), or not executed (\LaTeX~case) in \LuaTeX. \end{itemize} -In other words, Kanji nor kana characters cannot be used in -a control word, in the default setting of \LuaTeX. +In other words, +\begin{description} +\item[plain \LuaTeX] +Kanji nor kana characters cannot be used in +a control word, in the default setting of plain \LuaTeX. +\item[\LuaLaTeX] +In recent (2015-10-01 or later) \LuaLaTeX, Kanji and kana characters in a control word is supported (these catcode are 11), +but not fullwidth alphanumerics and several other characters. +\end{description} This would be inconvenient for \pTeX~users to shifting to \LuaTeX-ja, -since several control words containing Kanji, such as \verb+\西暦+, -are used in \pTeX. +since several control words containing Kanji or other fullwidth characters, such as +\verb+\西暦+~or~\verb+\1年目西暦+ are used in \pTeX. Hence, \LuaTeX-ja have a counterpart of \texttt{unicode-letters.tex} for \LuaTeX, \emph{to match the \cs{catcode} setting with that of \XeTeX.} % @@ -2865,7 +2870,6 @@ for \LuaTeX, \emph{to match the \cs{catcode} setting with that of \XeTeX.} \LaTeX では \cs{catcode} の設定はカーネルに \texttt{unicode-letters.def} として統合され. このファイルを\XeLaTeX, \LuaLaTeX の両方が用いている. - だが,\XeTeX における \cs{catcode} の初期設定と \LuaTeX におけるそれは一致していない: \begin{itemize} @@ -2873,16 +2877,20 @@ for \LuaTeX, \emph{to match the \cs{catcode} setting with that of \XeTeX.} \texttt{unicode-letters.tex} が古い \item \texttt{unicode-letters.tex} 後半部や \texttt{unicode-letters.def} 後半部では \cs{XeTeXcharclass} の設定を行なっており, -それによって漢字や仮名の \cs{catcode} が11に設定されている. - +それによって漢字や仮名,および全角英数字の \cs{catcode} が11に設定されている. しかし,\texttt{luatex-unicode-letters.tex} ではこの「後半部」が まるごと省略されており,また\LuaLaTeX でも \texttt{unicode-letters.def} 後半部は実行されな - い.従って漢字や仮名の \cs{catcode} は12のままになっている. + い. \end{itemize} -言い換えると,\LuaTeX の初期状態では漢字や仮名を制御綴内に +言い換えると, +\begin{description} + \item[plain \LuaTeX] 漢字や仮名を制御綴内に 使用することはできない. - -これでは \pTeX で使用できた \verb+\西暦+ などが使えないこととなり, + \item[\LuaLaTeX] 最近の(2015-10-01以降の) +\LuaLaTeX では漢字や仮名を制御綴内に +使用することが可能になったが,全角英数字は相変わらず使用できない, +\end{description} +これでは \pTeX で使用できた \verb+\1年目西暦+\footnote{科研費\LaTeX で使用されているそうです.}などが使えないこととなり, \LuaTeX-jaへの移行で手間が生じる.そのため,\LuaTeX-jaでは \texttt{unicode-letters.tex} の後半部にあたる内容を自前でパッチし, 結果として\textbf{\XeTeX における初期設定と同じになるようにしている.} @@ -9133,7 +9141,7 @@ Letter, Other, Kanji, Open, Closeのどれに属するかは次によって決 % \begin{itemize} %\item \textbf{ALchar}s above or equal to~\texttt{U+0080} are classified as Letter. -%\item (\texttt{U+0080}以降の)\textbf{ALchar}は,すべてLetter扱いである. +%\item \<(\texttt{U+0080}以降の)\textbf{ALchar}は,すべてLetter扱いである. %\item \textbf{JAchar}s are classified in the order as follows: %\item \textbf{JAchar}については,以下の順序に従って文字種を決める: diff --git a/src/ltj-unicode-ccfix.lua b/src/ltj-unicode-ccfix.lua index b071b22..430bb39 100644 --- a/src/ltj-unicode-ccfix.lua +++ b/src/ltj-unicode-ccfix.lua @@ -1,183 +1,181 @@ -- Do not edit this file! --- Created from LineBreak.txt by ltj-unicode-ccfix_make.pl on 2015年 2月 12日 木曜日 13:36:49 JST. +-- Created from LineBreak.txt by ltj-unicode-ccfix_make.pl on 2015年 10月 24日 土曜日 16:45:46 JST. -- In case of errors, fix the Perl script instead. -if tex.getcatcode(0x6F22)==12 then - local tex_catcode = tex.setcatcode - local function set_letter(b,e) +local tex_catcode = tex.setcatcode +local tex_getcc = tex.getcatcode +local function set_letter(b,e) + if tex_getcc(b)~=11 then for i=b,e do tex_catcode('global', i, 11) end end - - set_letter(0x231A,0x231B) - set_letter(0x23F0,0x23F3) - set_letter(0x2600,0x2603) - set_letter(0x2614,0x2615) - set_letter(0x2618,0x2618) - set_letter(0x261A,0x261F) - set_letter(0x2639,0x263B) - set_letter(0x2668,0x2668) - set_letter(0x267F,0x267F) - set_letter(0x26BD,0x26C8) - set_letter(0x26CD,0x26CD) - set_letter(0x26CF,0x26D1) - set_letter(0x26D3,0x26D4) - set_letter(0x26D8,0x26D9) - set_letter(0x26DC,0x26DC) - set_letter(0x26DF,0x26E1) - set_letter(0x26EA,0x26EA) - set_letter(0x26F1,0x26F5) - set_letter(0x26F7,0x26FA) - set_letter(0x26FD,0x26FF) - set_letter(0x2700,0x2704) - set_letter(0x2708,0x270D) - set_letter(0x2E80,0x2E99) - set_letter(0x2E9B,0x2EF3) - set_letter(0x2F00,0x2FD5) - set_letter(0x2FF0,0x2FFB) - set_letter(0x3003,0x3003) - set_letter(0x3004,0x3004) - set_letter(0x3006,0x3006) - set_letter(0x3007,0x3007) - set_letter(0x3012,0x3013) - set_letter(0x3020,0x3020) - set_letter(0x3021,0x3029) - set_letter(0x3030,0x3030) - set_letter(0x3031,0x3034) - set_letter(0x3036,0x3037) - set_letter(0x3038,0x303A) - set_letter(0x303D,0x303D) - set_letter(0x303E,0x303F) - set_letter(0x3042,0x3042) - set_letter(0x3044,0x3044) - set_letter(0x3046,0x3046) - set_letter(0x3048,0x3048) - set_letter(0x304A,0x3062) - set_letter(0x3064,0x3082) - set_letter(0x3084,0x3084) - set_letter(0x3086,0x3086) - set_letter(0x3088,0x308D) - set_letter(0x308F,0x3094) - set_letter(0x309F,0x309F) - set_letter(0x30A2,0x30A2) - set_letter(0x30A4,0x30A4) - set_letter(0x30A6,0x30A6) - set_letter(0x30A8,0x30A8) - set_letter(0x30AA,0x30C2) - set_letter(0x30C4,0x30E2) - set_letter(0x30E4,0x30E4) - set_letter(0x30E6,0x30E6) - set_letter(0x30E8,0x30ED) - set_letter(0x30EF,0x30F4) - set_letter(0x30F7,0x30FA) - set_letter(0x30FF,0x30FF) - set_letter(0x3105,0x312D) - set_letter(0x3131,0x318E) - set_letter(0x3190,0x3191) - set_letter(0x3192,0x3195) - set_letter(0x3196,0x319F) - set_letter(0x31A0,0x31BA) - set_letter(0x31C0,0x31E3) - set_letter(0x3200,0x321E) - set_letter(0x3220,0x3229) - set_letter(0x322A,0x3247) - set_letter(0x3250,0x3250) - set_letter(0x3251,0x325F) - set_letter(0x3260,0x327F) - set_letter(0x3280,0x3289) - set_letter(0x328A,0x32B0) - set_letter(0x32B1,0x32BF) - set_letter(0x32C0,0x32FE) - set_letter(0x3300,0x33FF) - set_letter(0x3400,0x4DB5) - set_letter(0x4DB6,0x4DBF) - set_letter(0x4E00,0x9FCC) - set_letter(0x9FCD,0x9FFF) - set_letter(0xA000,0xA014) - set_letter(0xA016,0xA48C) - set_letter(0xA490,0xA4C6) - set_letter(0xF900,0xFA6D) - set_letter(0xFA6E,0xFA6F) - set_letter(0xFA70,0xFAD9) - set_letter(0xFADA,0xFAFF) - set_letter(0xFE30,0xFE30) - set_letter(0xFE31,0xFE32) - set_letter(0xFE33,0xFE34) - set_letter(0xFE45,0xFE46) - set_letter(0xFE49,0xFE4C) - set_letter(0xFE4D,0xFE4F) - set_letter(0xFE51,0xFE51) - set_letter(0xFE58,0xFE58) - set_letter(0xFE5F,0xFE61) - set_letter(0xFE62,0xFE62) - set_letter(0xFE63,0xFE63) - set_letter(0xFE64,0xFE66) - set_letter(0xFE68,0xFE68) - set_letter(0xFE6B,0xFE6B) - set_letter(0xFF02,0xFF03) - set_letter(0xFF06,0xFF07) - set_letter(0xFF0A,0xFF0A) - set_letter(0xFF0B,0xFF0B) - set_letter(0xFF0D,0xFF0D) - set_letter(0xFF0F,0xFF0F) - set_letter(0xFF10,0xFF19) - set_letter(0xFF1C,0xFF1E) - set_letter(0xFF20,0xFF20) - set_letter(0xFF21,0xFF3A) - set_letter(0xFF3C,0xFF3C) - set_letter(0xFF3E,0xFF3E) - set_letter(0xFF3F,0xFF3F) - set_letter(0xFF40,0xFF40) - set_letter(0xFF41,0xFF5A) - set_letter(0xFF5C,0xFF5C) - set_letter(0xFF5E,0xFF5E) - set_letter(0xFFE2,0xFFE2) - set_letter(0xFFE3,0xFFE3) - set_letter(0xFFE4,0xFFE4) - set_letter(0x1B000,0x1B001) - set_letter(0x1F000,0x1F02B) - set_letter(0x1F030,0x1F093) - set_letter(0x1F0A0,0x1F0AE) - set_letter(0x1F0B1,0x1F0BF) - set_letter(0x1F0C1,0x1F0CF) - set_letter(0x1F0D1,0x1F0F5) - set_letter(0x1F200,0x1F202) - set_letter(0x1F210,0x1F23A) - set_letter(0x1F240,0x1F248) - set_letter(0x1F250,0x1F251) - set_letter(0x1F300,0x1F32C) - set_letter(0x1F330,0x1F37D) - set_letter(0x1F380,0x1F39B) - set_letter(0x1F39E,0x1F3B4) - set_letter(0x1F3B7,0x1F3BB) - set_letter(0x1F3BD,0x1F3CE) - set_letter(0x1F3D4,0x1F3F7) - set_letter(0x1F400,0x1F49F) - set_letter(0x1F4A1,0x1F4A1) - set_letter(0x1F4A3,0x1F4A3) - set_letter(0x1F4A5,0x1F4AE) - set_letter(0x1F4B0,0x1F4B0) - set_letter(0x1F4B3,0x1F4FE) - set_letter(0x1F507,0x1F516) - set_letter(0x1F525,0x1F531) - set_letter(0x1F54A,0x1F54A) - set_letter(0x1F550,0x1F579) - set_letter(0x1F57B,0x1F5A3) - set_letter(0x1F5A5,0x1F5D3) - set_letter(0x1F5DC,0x1F5F3) - set_letter(0x1F5FA,0x1F5FF) - set_letter(0x1F600,0x1F642) - set_letter(0x1F645,0x1F64F) - set_letter(0x1F680,0x1F6CF) - set_letter(0x1F6E0,0x1F6EC) - set_letter(0x1F6F0,0x1F6F3) - set_letter(0x20000,0x2A6D6) - set_letter(0x2A6D7,0x2A6FF) - set_letter(0x2A700,0x2B734) - set_letter(0x2B735,0x2B73F) - set_letter(0x2B740,0x2B81D) - set_letter(0x2B81E,0x2F7FF) - set_letter(0x2F800,0x2FA1D) - set_letter(0x2FA1E,0x2FFFD) - set_letter(0x30000,0x3FFFD) - end - +set_letter(0x231A,0x231B) +set_letter(0x23F0,0x23F3) +set_letter(0x2600,0x2603) +set_letter(0x2614,0x2615) +set_letter(0x2618,0x2618) +set_letter(0x261A,0x261F) +set_letter(0x2639,0x263B) +set_letter(0x2668,0x2668) +set_letter(0x267F,0x267F) +set_letter(0x26BD,0x26C8) +set_letter(0x26CD,0x26CD) +set_letter(0x26CF,0x26D1) +set_letter(0x26D3,0x26D4) +set_letter(0x26D8,0x26D9) +set_letter(0x26DC,0x26DC) +set_letter(0x26DF,0x26E1) +set_letter(0x26EA,0x26EA) +set_letter(0x26F1,0x26F5) +set_letter(0x26F7,0x26FA) +set_letter(0x26FD,0x26FF) +set_letter(0x2700,0x2704) +set_letter(0x2708,0x270D) +set_letter(0x2E80,0x2E99) +set_letter(0x2E9B,0x2EF3) +set_letter(0x2F00,0x2FD5) +set_letter(0x2FF0,0x2FFB) +set_letter(0x3003,0x3003) +set_letter(0x3004,0x3004) +set_letter(0x3006,0x3006) +set_letter(0x3007,0x3007) +set_letter(0x3012,0x3013) +set_letter(0x3020,0x3020) +set_letter(0x3021,0x3029) +set_letter(0x3030,0x3030) +set_letter(0x3031,0x3034) +set_letter(0x3036,0x3037) +set_letter(0x3038,0x303A) +set_letter(0x303D,0x303D) +set_letter(0x303E,0x303F) +set_letter(0x3042,0x3042) +set_letter(0x3044,0x3044) +set_letter(0x3046,0x3046) +set_letter(0x3048,0x3048) +set_letter(0x304A,0x3062) +set_letter(0x3064,0x3082) +set_letter(0x3084,0x3084) +set_letter(0x3086,0x3086) +set_letter(0x3088,0x308D) +set_letter(0x308F,0x3094) +set_letter(0x309F,0x309F) +set_letter(0x30A2,0x30A2) +set_letter(0x30A4,0x30A4) +set_letter(0x30A6,0x30A6) +set_letter(0x30A8,0x30A8) +set_letter(0x30AA,0x30C2) +set_letter(0x30C4,0x30E2) +set_letter(0x30E4,0x30E4) +set_letter(0x30E6,0x30E6) +set_letter(0x30E8,0x30ED) +set_letter(0x30EF,0x30F4) +set_letter(0x30F7,0x30FA) +set_letter(0x30FF,0x30FF) +set_letter(0x3105,0x312D) +set_letter(0x3131,0x318E) +set_letter(0x3190,0x3191) +set_letter(0x3192,0x3195) +set_letter(0x3196,0x319F) +set_letter(0x31A0,0x31BA) +set_letter(0x31C0,0x31E3) +set_letter(0x3200,0x321E) +set_letter(0x3220,0x3229) +set_letter(0x322A,0x3247) +set_letter(0x3250,0x3250) +set_letter(0x3251,0x325F) +set_letter(0x3260,0x327F) +set_letter(0x3280,0x3289) +set_letter(0x328A,0x32B0) +set_letter(0x32B1,0x32BF) +set_letter(0x32C0,0x32FE) +set_letter(0x3300,0x33FF) +set_letter(0x3400,0x4DB5) +set_letter(0x4DB6,0x4DBF) +set_letter(0x4E00,0x9FD5) +set_letter(0x9FD6,0x9FFF) +set_letter(0xA000,0xA014) +set_letter(0xA016,0xA48C) +set_letter(0xA490,0xA4C6) +set_letter(0xF900,0xFA6D) +set_letter(0xFA6E,0xFA6F) +set_letter(0xFA70,0xFAD9) +set_letter(0xFADA,0xFAFF) +set_letter(0xFE30,0xFE30) +set_letter(0xFE31,0xFE32) +set_letter(0xFE33,0xFE34) +set_letter(0xFE45,0xFE46) +set_letter(0xFE49,0xFE4C) +set_letter(0xFE4D,0xFE4F) +set_letter(0xFE51,0xFE51) +set_letter(0xFE58,0xFE58) +set_letter(0xFE5F,0xFE61) +set_letter(0xFE62,0xFE62) +set_letter(0xFE63,0xFE63) +set_letter(0xFE64,0xFE66) +set_letter(0xFE68,0xFE68) +set_letter(0xFE6B,0xFE6B) +set_letter(0xFF02,0xFF03) +set_letter(0xFF06,0xFF07) +set_letter(0xFF0A,0xFF0A) +set_letter(0xFF0B,0xFF0B) +set_letter(0xFF0D,0xFF0D) +set_letter(0xFF0F,0xFF0F) +set_letter(0xFF10,0xFF19) +set_letter(0xFF1C,0xFF1E) +set_letter(0xFF20,0xFF20) +set_letter(0xFF21,0xFF3A) +set_letter(0xFF3C,0xFF3C) +set_letter(0xFF3E,0xFF3E) +set_letter(0xFF3F,0xFF3F) +set_letter(0xFF40,0xFF40) +set_letter(0xFF41,0xFF5A) +set_letter(0xFF5C,0xFF5C) +set_letter(0xFF5E,0xFF5E) +set_letter(0xFFE2,0xFFE2) +set_letter(0xFFE3,0xFFE3) +set_letter(0xFFE4,0xFFE4) +set_letter(0x1B000,0x1B001) +set_letter(0x1F000,0x1F02B) +set_letter(0x1F030,0x1F093) +set_letter(0x1F0A0,0x1F0AE) +set_letter(0x1F0B1,0x1F0BF) +set_letter(0x1F0C1,0x1F0CF) +set_letter(0x1F0D1,0x1F0F5) +set_letter(0x1F200,0x1F202) +set_letter(0x1F210,0x1F23A) +set_letter(0x1F240,0x1F248) +set_letter(0x1F250,0x1F251) +set_letter(0x1F300,0x1F39B) +set_letter(0x1F39E,0x1F3B4) +set_letter(0x1F3B7,0x1F3BB) +set_letter(0x1F3BD,0x1F3FA) +set_letter(0x1F400,0x1F49F) +set_letter(0x1F4A1,0x1F4A1) +set_letter(0x1F4A3,0x1F4A3) +set_letter(0x1F4A5,0x1F4AE) +set_letter(0x1F4B0,0x1F4B0) +set_letter(0x1F4B3,0x1F4FF) +set_letter(0x1F507,0x1F516) +set_letter(0x1F525,0x1F531) +set_letter(0x1F54A,0x1F579) +set_letter(0x1F57B,0x1F5A3) +set_letter(0x1F5A5,0x1F5D3) +set_letter(0x1F5DC,0x1F5F3) +set_letter(0x1F5FA,0x1F5FF) +set_letter(0x1F600,0x1F64F) +set_letter(0x1F680,0x1F6D0) +set_letter(0x1F6E0,0x1F6EC) +set_letter(0x1F6F0,0x1F6F3) +set_letter(0x1F910,0x1F918) +set_letter(0x1F980,0x1F984) +set_letter(0x1F9C0,0x1F9C0) +set_letter(0x20000,0x2A6D6) +set_letter(0x2A6D7,0x2A6FF) +set_letter(0x2A700,0x2B734) +set_letter(0x2B735,0x2B73F) +set_letter(0x2B740,0x2B81D) +set_letter(0x2B81E,0x2B81F) +set_letter(0x2B820,0x2CEA1) +set_letter(0x2CEA2,0x2F7FF) +set_letter(0x2F800,0x2FA1D) +set_letter(0x2FA1E,0x2FFFD) +set_letter(0x30000,0x3FFFD) diff --git a/tool/ltj-unicode-ccfix_make2.pl b/tool/ltj-unicode-ccfix_make2.pl index d5db964..f559629 100755 --- a/tool/ltj-unicode-ccfix_make2.pl +++ b/tool/ltj-unicode-ccfix_make2.pl @@ -74,12 +74,13 @@ __EOT__ print << '__EOT__'; -if tex.getcatcode(0x6F22)==12 then - local tex_catcode = tex.setcatcode - local function set_letter(b,e) +local tex_catcode = tex.setcatcode +local tex_getcc = tex.getcatcode +local function set_letter(b,e) + if tex_getcc(b)~=11 then for i=b,e do tex_catcode('global', i, 11) end end - +end __EOT__ open LineBreak, $ARGV[0] or die "can't read $ARGV[0]"; @@ -95,7 +96,7 @@ while () { if (exists $lineBreakClass{$lb}) { if ($lineBreakClass{$lb} == 1) { # ideographs: set whole range to class 1 - print " set_letter(0x$s,0x$e)\n"; + print "set_letter(0x$s,0x$e)\n"; } } } @@ -103,7 +104,4 @@ while () { close LineBreak; print << '__EOT__'; - -end - __EOT__ -- 2.11.0