From c9c993ffc2b617083c82b701495ce2143be5232e Mon Sep 17 00:00:00 2001 From: "NARUSE, Yui" Date: Sat, 15 Dec 2018 17:22:47 +0900 Subject: [PATCH] --fb-java doesn't follow java's spec Though --fb-java's name insista Java's behavior but it behaved as what Python does. [nkf-bug:38800] --- nkf.c | 26 +++++++++++++++++--------- nkf_test.pl | 5 +++++ 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/nkf.c b/nkf.c index 8d02923..f6a25dd 100644 --- a/nkf.c +++ b/nkf.c @@ -1111,18 +1111,26 @@ encode_fallback_java(nkf_char c) (*oconv)(0, '\\'); c &= VALUE_MASK; if(!nkf_char_unicode_bmp_p(c)){ - (*oconv)(0, 'U'); - (*oconv)(0, '0'); - (*oconv)(0, '0'); - (*oconv)(0, bin2hex(c>>20)); - (*oconv)(0, bin2hex(c>>16)); + int high = (c >> 10) + NKF_INT32_C(0xD7C0); /* high surrogate */ + int low = (c & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */ + (*oconv)(0, 'u'); + (*oconv)(0, bin2hex(high>>12)); + (*oconv)(0, bin2hex(high>> 8)); + (*oconv)(0, bin2hex(high>> 4)); + (*oconv)(0, bin2hex(high )); + (*oconv)(0, '\\'); + (*oconv)(0, 'u'); + (*oconv)(0, bin2hex(low>>12)); + (*oconv)(0, bin2hex(low>> 8)); + (*oconv)(0, bin2hex(low>> 4)); + (*oconv)(0, bin2hex(low )); }else{ (*oconv)(0, 'u'); + (*oconv)(0, bin2hex(c>>12)); + (*oconv)(0, bin2hex(c>> 8)); + (*oconv)(0, bin2hex(c>> 4)); + (*oconv)(0, bin2hex(c )); } - (*oconv)(0, bin2hex(c>>12)); - (*oconv)(0, bin2hex(c>> 8)); - (*oconv)(0, bin2hex(c>> 4)); - (*oconv)(0, bin2hex(c )); return; } diff --git a/nkf_test.pl b/nkf_test.pl index f9e1e07..571cf86 100644 --- a/nkf_test.pl +++ b/nkf_test.pl @@ -1353,6 +1353,11 @@ printf "%-40s", "[nkf-forum:65482] "; "\x1b\x24\x42\x7f\x21\x80\x21\x1b\x28\x42\n", "\xf0\x40\xf0\x9f\x0a"); +printf "%-40s", "[nkf-bug:38800] "; + &test("$nkf -W -e --fb-java", + "\xF0\xA0\xAE\xB7", + "\\uD842\\uDFB7"); + # [KNOWNBUG] # printf "%-40s", "[ruby-dev:47057] "; # &test("$nkf -jW -M --cp932", -- 2.11.0