'\" See the file "license.terms" for information on usage and redistribution
'\" of this file, and for a DISCLAIMER OF ALL WARRANTIES.
'\"
-'\" RCS: @(#) $Id: Utf.3,v 1.13.2.2 2003/07/18 22:15:45 dkf Exp $
-'\"
-'\" The definitions below are for supplemental macros used in Tcl/Tk
-'\" manual entries.
-'\"
-'\" .AP type name in/out ?indent?
-'\" Start paragraph describing an argument to a library procedure.
-'\" type is type of argument (int, etc.), in/out is either "in", "out",
-'\" or "in/out" to describe whether procedure reads or modifies arg,
-'\" and indent is equivalent to second arg of .IP (shouldn't ever be
-'\" needed; use .AS below instead)
-'\"
-'\" .AS ?type? ?name?
-'\" Give maximum sizes of arguments for setting tab stops. Type and
-'\" name are examples of largest possible arguments that will be passed
-'\" to .AP later. If args are omitted, default tab stops are used.
-'\"
-'\" .BS
-'\" Start box enclosure. From here until next .BE, everything will be
-'\" enclosed in one large box.
-'\"
-'\" .BE
-'\" End of box enclosure.
-'\"
-'\" .CS
-'\" Begin code excerpt.
-'\"
-'\" .CE
-'\" End code excerpt.
-'\"
-'\" .VS ?version? ?br?
-'\" Begin vertical sidebar, for use in marking newly-changed parts
-'\" of man pages. The first argument is ignored and used for recording
-'\" the version when the .VS was added, so that the sidebars can be
-'\" found and removed when they reach a certain age. If another argument
-'\" is present, then a line break is forced before starting the sidebar.
-'\"
-'\" .VE
-'\" End of vertical sidebar.
-'\"
-'\" .DS
-'\" Begin an indented unfilled display.
-'\"
-'\" .DE
-'\" End of indented unfilled display.
-'\"
-'\" .SO
-'\" Start of list of standard options for a Tk widget. The
-'\" options follow on successive lines, in four columns separated
-'\" by tabs.
-'\"
-'\" .SE
-'\" End of list of standard options for a Tk widget.
-'\"
-'\" .OP cmdName dbName dbClass
-'\" Start of description of a specific option. cmdName gives the
-'\" option's name as specified in the class command, dbName gives
-'\" the option's name in the option database, and dbClass gives
-'\" the option's class in the option database.
-'\"
-'\" .UL arg1 arg2
-'\" Print arg1 underlined, then print arg2 normally.
-'\"
-'\" RCS: @(#) $Id: man.macros,v 1.4 2000/08/25 06:18:32 ericm Exp $
-'\"
-'\" # Set up traps and other miscellaneous stuff for Tcl/Tk man pages.
+.TH Utf 3 "8.1" Tcl "Tcl Library Procedures"
+.\" The -*- nroff -*- definitions below are for supplemental macros used
+.\" in Tcl/Tk manual entries.
+.\"
+.\" .AP type name in/out ?indent?
+.\" Start paragraph describing an argument to a library procedure.
+.\" type is type of argument (int, etc.), in/out is either "in", "out",
+.\" or "in/out" to describe whether procedure reads or modifies arg,
+.\" and indent is equivalent to second arg of .IP (shouldn't ever be
+.\" needed; use .AS below instead)
+.\"
+.\" .AS ?type? ?name?
+.\" Give maximum sizes of arguments for setting tab stops. Type and
+.\" name are examples of largest possible arguments that will be passed
+.\" to .AP later. If args are omitted, default tab stops are used.
+.\"
+.\" .BS
+.\" Start box enclosure. From here until next .BE, everything will be
+.\" enclosed in one large box.
+.\"
+.\" .BE
+.\" End of box enclosure.
+.\"
+.\" .CS
+.\" Begin code excerpt.
+.\"
+.\" .CE
+.\" End code excerpt.
+.\"
+.\" .VS ?version? ?br?
+.\" Begin vertical sidebar, for use in marking newly-changed parts
+.\" of man pages. The first argument is ignored and used for recording
+.\" the version when the .VS was added, so that the sidebars can be
+.\" found and removed when they reach a certain age. If another argument
+.\" is present, then a line break is forced before starting the sidebar.
+.\"
+.\" .VE
+.\" End of vertical sidebar.
+.\"
+.\" .DS
+.\" Begin an indented unfilled display.
+.\"
+.\" .DE
+.\" End of indented unfilled display.
+.\"
+.\" .SO ?manpage?
+.\" Start of list of standard options for a Tk widget. The manpage
+.\" argument defines where to look up the standard options; if
+.\" omitted, defaults to "options". The options follow on successive
+.\" lines, in three columns separated by tabs.
+.\"
+.\" .SE
+.\" End of list of standard options for a Tk widget.
+.\"
+.\" .OP cmdName dbName dbClass
+.\" Start of description of a specific option. cmdName gives the
+.\" option's name as specified in the class command, dbName gives
+.\" the option's name in the option database, and dbClass gives
+.\" the option's class in the option database.
+.\"
+.\" .UL arg1 arg2
+.\" Print arg1 underlined, then print arg2 normally.
+.\"
+.\" .QW arg1 ?arg2?
+.\" Print arg1 in quotes, then arg2 normally (for trailing punctuation).
+.\"
+.\" .PQ arg1 ?arg2?
+.\" Print an open parenthesis, arg1 in quotes, then arg2 normally
+.\" (for trailing punctuation) and then a closing parenthesis.
+.\"
+.\" # Set up traps and other miscellaneous stuff for Tcl/Tk man pages.
.if t .wh -1.3i ^B
.nr ^l \n(.l
.ad b
-'\" # Start an argument description
+.\" # Start an argument description
.de AP
.ie !"\\$4"" .TP \\$4
.el \{\
.\}
.ta \\n()Au \\n()Bu
.ie !"\\$3"" \{\
-\&\\$1 \\fI\\$2\\fP (\\$3)
+\&\\$1 \\fI\\$2\\fP (\\$3)
.\".b
.\}
.el \{\
.\}
.\}
..
-'\" # define tabbing values for .AP
+.\" # define tabbing values for .AP
.de AS
.nr )A 10n
.if !"\\$1"" .nr )A \\w'\\$1'u+3n
.nr )C \\n()Bu+\\w'(in/out)'u+2n
..
.AS Tcl_Interp Tcl_CreateInterp in/out
-'\" # BS - start boxed text
-'\" # ^y = starting y location
-'\" # ^b = 1
+.\" # BS - start boxed text
+.\" # ^y = starting y location
+.\" # ^b = 1
.de BS
.br
.mk ^y
.if n \l'\\n(.lu\(ul'
.if n .fi
..
-'\" # BE - end boxed text (draw box now)
+.\" # BE - end boxed text (draw box now)
.de BE
.nf
.ti 0
.br
.nr ^b 0
..
-'\" # VS - start vertical sidebar
-'\" # ^Y = starting y location
-'\" # ^v = 1 (for troff; for nroff this doesn't matter)
+.\" # VS - start vertical sidebar
+.\" # ^Y = starting y location
+.\" # ^v = 1 (for troff; for nroff this doesn't matter)
.de VS
.if !"\\$2"" .br
.mk ^Y
.ie n 'mc \s12\(br\s0
.el .nr ^v 1u
..
-'\" # VE - end of vertical sidebar
+.\" # VE - end of vertical sidebar
.de VE
.ie n 'mc
.el \{\
.\}
.nr ^v 0
..
-'\" # Special macro to handle page bottom: finish off current
-'\" # box/sidebar if in box/sidebar mode, then invoked standard
-'\" # page bottom macro.
+.\" # Special macro to handle page bottom: finish off current
+.\" # box/sidebar if in box/sidebar mode, then invoked standard
+.\" # page bottom macro.
.de ^B
.ev 2
'ti 0
.mk ^Y
.\}
..
-'\" # DS - begin display
+.\" # DS - begin display
.de DS
.RS
.nf
.sp
..
-'\" # DE - end display
+.\" # DE - end display
.de DE
.fi
.RE
.sp
..
-'\" # SO - start of list of standard options
+.\" # SO - start of list of standard options
.de SO
+'ie '\\$1'' .ds So \\fBoptions\\fR
+'el .ds So \\fB\\$1\\fR
.SH "STANDARD OPTIONS"
.LP
.nf
.ta 5.5c 11c
.ft B
..
-'\" # SE - end of list of standard options
+.\" # SE - end of list of standard options
.de SE
.fi
.ft R
.LP
-See the \\fBoptions\\fR manual entry for details on the standard options.
+See the \\*(So manual entry for details on the standard options.
..
-'\" # OP - start of full description for a single option
+.\" # OP - start of full description for a single option
.de OP
.LP
.nf
.fi
.IP
..
-'\" # CS - begin code excerpt
+.\" # CS - begin code excerpt
.de CS
.RS
.nf
.ta .25i .5i .75i 1i
..
-'\" # CE - end code excerpt
+.\" # CE - end code excerpt
.de CE
.fi
.RE
..
+.\" # UL - underline word
.de UL
\\$1\l'|0\(ul'\\$2
..
-.TH Utf 3 "8.1" Tcl "Tcl Library Procedures"
+.\" # QW - apply quotation marks to word
+.de QW
+.ie '\\*(lq'"' ``\\$1''\\$2
+.\"" fix emacs highlighting
+.el \\*(lq\\$1\\*(rq\\$2
+..
+.\" # PQ - apply parens and quotation marks to word
+.de PQ
+.ie '\\*(lq'"' (``\\$1''\\$2)\\$3
+.\"" fix emacs highlighting
+.el (\\*(lq\\$1\\*(rq\\$2)\\$3
+..
+.\" # QR - quoted range
+.de QR
+.ie '\\*(lq'"' ``\\$1''\\-``\\$2''\\$3
+.\"" fix emacs highlighting
+.el \\*(lq\\$1\\*(rq\\-\\*(lq\\$2\\*(rq\\$3
+..
+.\" # MT - "empty" string
+.de MT
+.QW ""
+..
.BS
.SH NAME
-Tcl_UniChar, Tcl_UniCharCaseMatch, Tcl_UniCharNcasecmp, Tcl_UniCharToUtf, Tcl_UtfToUniChar, Tcl_UniCharToUtfDString, Tcl_UtfToUniCharDString, Tcl_UniCharLen, Tcl_UniCharNcmp, Tcl_UtfCharComplete, Tcl_NumUtfChars, Tcl_UtfFindFirst, Tcl_UtfFindLast, Tcl_UtfNext, Tcl_UtfPrev, Tcl_UniCharAtIndex, Tcl_UtfAtIndex, Tcl_UtfBackslash \- routines for manipulating UTF-8 strings.
+Tcl_UniChar, Tcl_UniCharCaseMatch, Tcl_UniCharNcasecmp, Tcl_UniCharToUtf, Tcl_UtfToUniChar, Tcl_UniCharToUtfDString, Tcl_UtfToUniCharDString, Tcl_UniCharLen, Tcl_UniCharNcmp, Tcl_UtfCharComplete, Tcl_NumUtfChars, Tcl_UtfFindFirst, Tcl_UtfFindLast, Tcl_UtfNext, Tcl_UtfPrev, Tcl_UniCharAtIndex, Tcl_UtfAtIndex, Tcl_UtfBackslash \- routines for manipulating UTF-8 strings
.SH SYNOPSIS
.nf
\fB#include <tcl.h>\fR
.sp
-typedef ... Tcl_UniChar;
+typedef ... \fBTcl_UniChar\fR;
.sp
int
\fBTcl_UniCharToUtf\fR(\fIch, buf\fR)
.sp
int
\fBTcl_UtfToUniChar\fR(\fIsrc, chPtr\fR)
-.VS 8.4
.sp
char *
-\fBTcl_UniCharToUtfDString\fR(\fIuniStr, numChars, dstPtr\fR)
+\fBTcl_UniCharToUtfDString\fR(\fIuniStr, uniLength, dsPtr\fR)
.sp
Tcl_UniChar *
-\fBTcl_UtfToUniCharDString\fR(\fIsrc, len, dstPtr\fR)
-.VE 8.4
+\fBTcl_UtfToUniCharDString\fR(\fIsrc, length, dsPtr\fR)
.sp
int
\fBTcl_UniCharLen\fR(\fIuniStr\fR)
.sp
int
-\fBTcl_UniCharNcmp\fR(\fIuniStr, uniStr, num\fR)
-.VS 8.4
+\fBTcl_UniCharNcmp\fR(\fIucs, uct, numChars\fR)
.sp
int
-\fBTcl_UniCharNcasecmp\fR(\fIuniStr, uniStr, num\fR)
+\fBTcl_UniCharNcasecmp\fR(\fIucs, uct, numChars\fR)
.sp
int
\fBTcl_UniCharCaseMatch\fR(\fIuniStr, uniPattern, nocase\fR)
-.VE 8.4
.sp
int
-\fBTcl_UtfNcmp\fR(\fIsrc, src, num\fR)
+\fBTcl_UtfNcmp\fR(\fIcs, ct, numChars\fR)
.sp
int
-\fBTcl_UtfNcasecmp\fR(\fIsrc, src, num\fR)
+\fBTcl_UtfNcasecmp\fR(\fIcs, ct, numChars\fR)
.sp
int
-\fBTcl_UtfCharComplete\fR(\fIsrc, len\fR)
+\fBTcl_UtfCharComplete\fR(\fIsrc, length\fR)
.sp
int
-\fBTcl_NumUtfChars\fR(\fIsrc, len\fR)
-.VS 8.4
+\fBTcl_NumUtfChars\fR(\fIsrc, length\fR)
.sp
-CONST char *
+const char *
\fBTcl_UtfFindFirst\fR(\fIsrc, ch\fR)
.sp
-CONST char *
+const char *
\fBTcl_UtfFindLast\fR(\fIsrc, ch\fR)
.sp
-CONST char *
+const char *
\fBTcl_UtfNext\fR(\fIsrc\fR)
.sp
-CONST char *
+const char *
\fBTcl_UtfPrev\fR(\fIsrc, start\fR)
-.VE 8.4
.sp
Tcl_UniChar
\fBTcl_UniCharAtIndex\fR(\fIsrc, index\fR)
-.VS 8.4
.sp
-CONST char *
+const char *
\fBTcl_UtfAtIndex\fR(\fIsrc, index\fR)
-.VE 8.4
.sp
int
\fBTcl_UtfBackslash\fR(\fIsrc, readPtr, dst\fR)
.SH ARGUMENTS
-.AS "CONST Tcl_UniChar" numChars in/out
+.AS "const Tcl_UniChar" *uniPattern in/out
.AP char *buf out
Buffer in which the UTF-8 representation of the Tcl_UniChar is stored. At most
-TCL_UTF_MAX bytes are stored in the buffer.
+\fBTCL_UTF_MAX\fR bytes are stored in the buffer.
.AP int ch in
The Tcl_UniChar to be converted or examined.
.AP Tcl_UniChar *chPtr out
Filled with the Tcl_UniChar represented by the head of the UTF-8 string.
-.AP "CONST char" *src in
+.AP "const char" *src in
+Pointer to a UTF-8 string.
+.AP "const char" *cs in
+Pointer to a UTF-8 string.
+.AP "const char" *ct in
Pointer to a UTF-8 string.
-.AP "CONST Tcl_UniChar" *uniStr in
+.AP "const Tcl_UniChar" *uniStr in
+A null-terminated Unicode string.
+.AP "const Tcl_UniChar" *ucs in
+A null-terminated Unicode string.
+.AP "const Tcl_UniChar" *uct in
A null-terminated Unicode string.
-.AP "CONST Tcl_UniChar" *uniPattern in
+.AP "const Tcl_UniChar" *uniPattern in
A null-terminated Unicode string.
-.AP int len in
+.AP int length in
The length of the UTF-8 string in bytes (not UTF-8 characters). If
negative, all bytes up to the first null byte are used.
-.AP int numChars in
+.AP int uniLength in
The length of the Unicode string in characters. Must be greater than or
equal to 0.
-.AP "Tcl_DString" *dstPtr in/out
-A pointer to a previously-initialized \fBTcl_DString\fR.
-.AP "unsigned long" num in
+.AP "Tcl_DString" *dsPtr in/out
+A pointer to a previously initialized \fBTcl_DString\fR.
+.AP "unsigned long" numChars in
The number of characters to compare.
-.AP "CONST char" *start in
+.AP "const char" *start in
Pointer to the beginning of a UTF-8 string.
.AP int index in
The index of a character (not byte) in the UTF-8 string.
including the backslash character.
.AP char *dst out
Buffer in which the bytes represented by the backslash sequence are stored.
-At most TCL_UTF_MAX bytes are stored in the buffer.
-.VS 8.4
+At most \fBTCL_UTF_MAX\fR bytes are stored in the buffer.
.AP int nocase in
Specifies whether the match should be done case-sensitive (0) or
case-insensitive (1).
-.VE 8.4
.BE
.SH DESCRIPTION
These routines convert between UTF-8 strings and Tcl_UniChars. A
Tcl_UniChar is a Unicode character represented as an unsigned, fixed-size
quantity. A UTF-8 character is a Unicode character represented as
-a varying-length sequence of up to TCL_UTF_MAX bytes. A multibyte UTF-8
+a varying-length sequence of up to \fBTCL_UTF_MAX\fR bytes. A multibyte UTF-8
sequence consists of a lead byte followed by some number of trail bytes.
.PP
\fBTCL_UTF_MAX\fR is the maximum number of bytes that it takes to
.PP
\fBTcl_UtfToUniChar\fR reads one UTF-8 character starting at \fIsrc\fR
and stores it as a Tcl_UniChar in \fI*chPtr\fR. The return value is the
-number of bytes read from \fIsrc\fR.. The caller must ensure that the
+number of bytes read from \fIsrc\fR. The caller must ensure that the
source buffer is long enough such that this routine does not run off the
end and dereference non-existent or random memory; if the source buffer
is known to be null-terminated, this will not happen. If the input is
0x00ff and return 1.
.PP
\fBTcl_UniCharToUtfDString\fR converts the given Unicode string
-to UTF-8, storing the result in a previously-initialized \fBTcl_DString\fR.
-You must specify the length of the given Unicode string.
+to UTF-8, storing the result in a previously initialized \fBTcl_DString\fR.
+You must specify \fIuniLength\fR, the length of the given Unicode string.
The return value is a pointer to the UTF-8 representation of the
Unicode string. Storage for the return value is appended to the
end of the \fBTcl_DString\fR.
.PP
\fBTcl_UtfToUniCharDString\fR converts the given UTF-8 string to Unicode,
-storing the result in the previously-initialized \fBTcl_DString\fR.
-you may either specify the length of the given UTF-8 string or "-1",
+storing the result in the previously initialized \fBTcl_DString\fR.
+In the argument \fIlength\fR, you may either specify the length of
+the given UTF-8 string in bytes or
+.QW \-1 ,
in which case \fBTcl_UtfToUniCharDString\fR uses \fBstrlen\fR to
calculate the length. The return value is a pointer to the Unicode
representation of the UTF-8 string. Storage for the return value
.PP
\fBTcl_UniCharNcmp\fR and \fBTcl_UniCharNcasecmp\fR correspond to
\fBstrncmp\fR and \fBstrncasecmp\fR, respectively, for Unicode characters.
-They accepts two null-terminated Unicode strings and the number of characters
-to compare. Both strings are assumed to be at least \fIlen\fR characters
+They accept two null-terminated Unicode strings and the number of characters
+to compare. Both strings are assumed to be at least \fInumChars\fR characters
long. \fBTcl_UniCharNcmp\fR compares the two strings character-by-character
according to the Unicode character ordering. It returns an integer greater
than, equal to, or less than 0 if the first string is greater than, equal
to, or less than the second string respectively. \fBTcl_UniCharNcasecmp\fR
is the Unicode case insensitive version.
.PP
-.VS 8.4
\fBTcl_UniCharCaseMatch\fR is the Unicode equivalent to
\fBTcl_StringCaseMatch\fR. It accepts a null-terminated Unicode string,
a Unicode pattern, and a boolean value specifying whether the match should
be case sensitive and returns whether the string matches the pattern.
-.VE 8.4
.PP
\fBTcl_UtfNcmp\fR corresponds to \fBstrncmp\fR for UTF-8 strings. It
accepts two null-terminated UTF-8 strings and the number of characters
-to compare. (Both strings are assumed to be at least \fIlen\fR
+to compare. (Both strings are assumed to be at least \fInumChars\fR
characters long.) \fBTcl_UtfNcmp\fR compares the two strings
character-by-character according to the Unicode character ordering.
It returns an integer greater than, equal to, or less than 0 if the
characters.
.PP
\fBTcl_UtfCharComplete\fR returns 1 if the source UTF-8 string \fIsrc\fR
-of length \fIlen\fR bytes is long enough to be decoded by
+of \fIlength\fR bytes is long enough to be decoded by
\fBTcl_UtfToUniChar\fR, or 0 otherwise. This function does not guarantee
that the UTF-8 string is properly formed. This routine is used by
procedures that are operating on a byte at a time and need to know if a
.PP
\fBTcl_NumUtfChars\fR corresponds to \fBstrlen\fR for UTF-8 strings. It
returns the number of Tcl_UniChars that are represented by the UTF-8 string
-\fIsrc\fR. The length of the source string is \fIlen\fR bytes. If the
+\fIsrc\fR. The length of the source string is \fIlength\fR bytes. If the
length is negative, all bytes up to the first null byte are used.
.PP
\fBTcl_UtfFindFirst\fR corresponds to \fBstrchr\fR for UTF-8 strings. It
\fBTcl_UtfBackslash\fR is a utility procedure used by several of the Tcl
commands. It parses a backslash sequence and stores the properly formed
UTF-8 character represented by the backslash sequence in the output
-buffer \fIdst\fR. At most TCL_UTF_MAX bytes are stored in the buffer.
+buffer \fIdst\fR. At most \fBTCL_UTF_MAX\fR bytes are stored in the buffer.
\fBTcl_UtfBackslash\fR modifies \fI*readPtr\fR to contain the number
of bytes in the backslash sequence, including the backslash character.
The return value is the number of bytes stored in the output buffer.