'\" See the file "license.terms" for information on usage and redistribution
'\" of this file, and for a DISCLAIMER OF ALL WARRANTIES.
'\"
-'\" RCS: @(#) $Id: Encoding.3,v 1.11.2.1 2003/07/18 16:56:24 dgp Exp $
-'\"
-'\" The definitions below are for supplemental macros used in Tcl/Tk
-'\" manual entries.
-'\"
-'\" .AP type name in/out ?indent?
-'\" Start paragraph describing an argument to a library procedure.
-'\" type is type of argument (int, etc.), in/out is either "in", "out",
-'\" or "in/out" to describe whether procedure reads or modifies arg,
-'\" and indent is equivalent to second arg of .IP (shouldn't ever be
-'\" needed; use .AS below instead)
-'\"
-'\" .AS ?type? ?name?
-'\" Give maximum sizes of arguments for setting tab stops. Type and
-'\" name are examples of largest possible arguments that will be passed
-'\" to .AP later. If args are omitted, default tab stops are used.
-'\"
-'\" .BS
-'\" Start box enclosure. From here until next .BE, everything will be
-'\" enclosed in one large box.
-'\"
-'\" .BE
-'\" End of box enclosure.
-'\"
-'\" .CS
-'\" Begin code excerpt.
-'\"
-'\" .CE
-'\" End code excerpt.
-'\"
-'\" .VS ?version? ?br?
-'\" Begin vertical sidebar, for use in marking newly-changed parts
-'\" of man pages. The first argument is ignored and used for recording
-'\" the version when the .VS was added, so that the sidebars can be
-'\" found and removed when they reach a certain age. If another argument
-'\" is present, then a line break is forced before starting the sidebar.
-'\"
-'\" .VE
-'\" End of vertical sidebar.
-'\"
-'\" .DS
-'\" Begin an indented unfilled display.
-'\"
-'\" .DE
-'\" End of indented unfilled display.
-'\"
-'\" .SO
-'\" Start of list of standard options for a Tk widget. The
-'\" options follow on successive lines, in four columns separated
-'\" by tabs.
-'\"
-'\" .SE
-'\" End of list of standard options for a Tk widget.
-'\"
-'\" .OP cmdName dbName dbClass
-'\" Start of description of a specific option. cmdName gives the
-'\" option's name as specified in the class command, dbName gives
-'\" the option's name in the option database, and dbClass gives
-'\" the option's class in the option database.
-'\"
-'\" .UL arg1 arg2
-'\" Print arg1 underlined, then print arg2 normally.
-'\"
-'\" RCS: @(#) $Id: man.macros,v 1.4 2000/08/25 06:18:32 ericm Exp $
-'\"
-'\" # Set up traps and other miscellaneous stuff for Tcl/Tk man pages.
+.TH Tcl_GetEncoding 3 "8.1" Tcl "Tcl Library Procedures"
+.\" The -*- nroff -*- definitions below are for supplemental macros used
+.\" in Tcl/Tk manual entries.
+.\"
+.\" .AP type name in/out ?indent?
+.\" Start paragraph describing an argument to a library procedure.
+.\" type is type of argument (int, etc.), in/out is either "in", "out",
+.\" or "in/out" to describe whether procedure reads or modifies arg,
+.\" and indent is equivalent to second arg of .IP (shouldn't ever be
+.\" needed; use .AS below instead)
+.\"
+.\" .AS ?type? ?name?
+.\" Give maximum sizes of arguments for setting tab stops. Type and
+.\" name are examples of largest possible arguments that will be passed
+.\" to .AP later. If args are omitted, default tab stops are used.
+.\"
+.\" .BS
+.\" Start box enclosure. From here until next .BE, everything will be
+.\" enclosed in one large box.
+.\"
+.\" .BE
+.\" End of box enclosure.
+.\"
+.\" .CS
+.\" Begin code excerpt.
+.\"
+.\" .CE
+.\" End code excerpt.
+.\"
+.\" .VS ?version? ?br?
+.\" Begin vertical sidebar, for use in marking newly-changed parts
+.\" of man pages. The first argument is ignored and used for recording
+.\" the version when the .VS was added, so that the sidebars can be
+.\" found and removed when they reach a certain age. If another argument
+.\" is present, then a line break is forced before starting the sidebar.
+.\"
+.\" .VE
+.\" End of vertical sidebar.
+.\"
+.\" .DS
+.\" Begin an indented unfilled display.
+.\"
+.\" .DE
+.\" End of indented unfilled display.
+.\"
+.\" .SO ?manpage?
+.\" Start of list of standard options for a Tk widget. The manpage
+.\" argument defines where to look up the standard options; if
+.\" omitted, defaults to "options". The options follow on successive
+.\" lines, in three columns separated by tabs.
+.\"
+.\" .SE
+.\" End of list of standard options for a Tk widget.
+.\"
+.\" .OP cmdName dbName dbClass
+.\" Start of description of a specific option. cmdName gives the
+.\" option's name as specified in the class command, dbName gives
+.\" the option's name in the option database, and dbClass gives
+.\" the option's class in the option database.
+.\"
+.\" .UL arg1 arg2
+.\" Print arg1 underlined, then print arg2 normally.
+.\"
+.\" .QW arg1 ?arg2?
+.\" Print arg1 in quotes, then arg2 normally (for trailing punctuation).
+.\"
+.\" .PQ arg1 ?arg2?
+.\" Print an open parenthesis, arg1 in quotes, then arg2 normally
+.\" (for trailing punctuation) and then a closing parenthesis.
+.\"
+.\" # Set up traps and other miscellaneous stuff for Tcl/Tk man pages.
.if t .wh -1.3i ^B
.nr ^l \n(.l
.ad b
-'\" # Start an argument description
+.\" # Start an argument description
.de AP
.ie !"\\$4"" .TP \\$4
.el \{\
.\}
.ta \\n()Au \\n()Bu
.ie !"\\$3"" \{\
-\&\\$1 \\fI\\$2\\fP (\\$3)
+\&\\$1 \\fI\\$2\\fP (\\$3)
.\".b
.\}
.el \{\
.\}
.\}
..
-'\" # define tabbing values for .AP
+.\" # define tabbing values for .AP
.de AS
.nr )A 10n
.if !"\\$1"" .nr )A \\w'\\$1'u+3n
.nr )C \\n()Bu+\\w'(in/out)'u+2n
..
.AS Tcl_Interp Tcl_CreateInterp in/out
-'\" # BS - start boxed text
-'\" # ^y = starting y location
-'\" # ^b = 1
+.\" # BS - start boxed text
+.\" # ^y = starting y location
+.\" # ^b = 1
.de BS
.br
.mk ^y
.if n \l'\\n(.lu\(ul'
.if n .fi
..
-'\" # BE - end boxed text (draw box now)
+.\" # BE - end boxed text (draw box now)
.de BE
.nf
.ti 0
.br
.nr ^b 0
..
-'\" # VS - start vertical sidebar
-'\" # ^Y = starting y location
-'\" # ^v = 1 (for troff; for nroff this doesn't matter)
+.\" # VS - start vertical sidebar
+.\" # ^Y = starting y location
+.\" # ^v = 1 (for troff; for nroff this doesn't matter)
.de VS
.if !"\\$2"" .br
.mk ^Y
.ie n 'mc \s12\(br\s0
.el .nr ^v 1u
..
-'\" # VE - end of vertical sidebar
+.\" # VE - end of vertical sidebar
.de VE
.ie n 'mc
.el \{\
.\}
.nr ^v 0
..
-'\" # Special macro to handle page bottom: finish off current
-'\" # box/sidebar if in box/sidebar mode, then invoked standard
-'\" # page bottom macro.
+.\" # Special macro to handle page bottom: finish off current
+.\" # box/sidebar if in box/sidebar mode, then invoked standard
+.\" # page bottom macro.
.de ^B
.ev 2
'ti 0
.mk ^Y
.\}
..
-'\" # DS - begin display
+.\" # DS - begin display
.de DS
.RS
.nf
.sp
..
-'\" # DE - end display
+.\" # DE - end display
.de DE
.fi
.RE
.sp
..
-'\" # SO - start of list of standard options
+.\" # SO - start of list of standard options
.de SO
+'ie '\\$1'' .ds So \\fBoptions\\fR
+'el .ds So \\fB\\$1\\fR
.SH "STANDARD OPTIONS"
.LP
.nf
.ta 5.5c 11c
.ft B
..
-'\" # SE - end of list of standard options
+.\" # SE - end of list of standard options
.de SE
.fi
.ft R
.LP
-See the \\fBoptions\\fR manual entry for details on the standard options.
+See the \\*(So manual entry for details on the standard options.
..
-'\" # OP - start of full description for a single option
+.\" # OP - start of full description for a single option
.de OP
.LP
.nf
.fi
.IP
..
-'\" # CS - begin code excerpt
+.\" # CS - begin code excerpt
.de CS
.RS
.nf
.ta .25i .5i .75i 1i
..
-'\" # CE - end code excerpt
+.\" # CE - end code excerpt
.de CE
.fi
.RE
..
+.\" # UL - underline word
.de UL
\\$1\l'|0\(ul'\\$2
..
-.TH Tcl_GetEncoding 3 "8.1" Tcl "Tcl Library Procedures"
+.\" # QW - apply quotation marks to word
+.de QW
+.ie '\\*(lq'"' ``\\$1''\\$2
+.\"" fix emacs highlighting
+.el \\*(lq\\$1\\*(rq\\$2
+..
+.\" # PQ - apply parens and quotation marks to word
+.de PQ
+.ie '\\*(lq'"' (``\\$1''\\$2)\\$3
+.\"" fix emacs highlighting
+.el (\\*(lq\\$1\\*(rq\\$2)\\$3
+..
+.\" # QR - quoted range
+.de QR
+.ie '\\*(lq'"' ``\\$1''\\-``\\$2''\\$3
+.\"" fix emacs highlighting
+.el \\*(lq\\$1\\*(rq\\-\\*(lq\\$2\\*(rq\\$3
+..
+.\" # MT - "empty" string
+.de MT
+.QW ""
+..
.BS
.SH NAME
-Tcl_GetEncoding, Tcl_FreeEncoding, Tcl_ExternalToUtfDString, Tcl_ExternalToUtf, Tcl_UtfToExternalDString, Tcl_UtfToExternal, Tcl_WinTCharToUtf, Tcl_WinUtfToTChar, Tcl_GetEncodingName, Tcl_SetSystemEncoding, Tcl_GetEncodingNames, Tcl_CreateEncoding, Tcl_GetDefaultEncodingDir, Tcl_SetDefaultEncodingDir \- procedures for creating and using encodings.
+Tcl_GetEncoding, Tcl_FreeEncoding, Tcl_GetEncodingFromObj, Tcl_ExternalToUtfDString, Tcl_ExternalToUtf, Tcl_UtfToExternalDString, Tcl_UtfToExternal, Tcl_WinTCharToUtf, Tcl_WinUtfToTChar, Tcl_GetEncodingName, Tcl_SetSystemEncoding, Tcl_GetEncodingNameFromEnvironment, Tcl_GetEncodingNames, Tcl_CreateEncoding, Tcl_GetEncodingSearchPath, Tcl_SetEncodingSearchPath, Tcl_GetDefaultEncodingDir, Tcl_SetDefaultEncodingDir \- procedures for creating and using encodings
.SH SYNOPSIS
.nf
\fB#include <tcl.h>\fR
void
\fBTcl_FreeEncoding\fR(\fIencoding\fR)
.sp
+int
+\fBTcl_GetEncodingFromObj\fR(\fIinterp, objPtr, encodingPtr\fR)
+.sp
char *
\fBTcl_ExternalToUtfDString\fR(\fIencoding, src, srcLen, dstPtr\fR)
.sp
-int
-\fBTcl_ExternalToUtf\fR(\fIinterp, encoding, src, srcLen, flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr,
- dstCharsPtr\fR)
-.sp
-char *
+char *
\fBTcl_UtfToExternalDString\fR(\fIencoding, src, srcLen, dstPtr\fR)
.sp
int
-\fBTcl_UtfToExternal\fR(\fIinterp, encoding, src, srcLen, flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr,
- dstCharsPtr\fR)
+\fBTcl_ExternalToUtf\fR(\fIinterp, encoding, src, srcLen, flags, statePtr,
+ dst, dstLen, srcReadPtr, dstWrotePtr, dstCharsPtr\fR)
+.sp
+int
+\fBTcl_UtfToExternal\fR(\fIinterp, encoding, src, srcLen, flags, statePtr,
+ dst, dstLen, srcReadPtr, dstWrotePtr, dstCharsPtr\fR)
.sp
char *
\fBTcl_WinTCharToUtf\fR(\fItsrc, srcLen, dstPtr\fR)
TCHAR *
\fBTcl_WinUtfToTChar\fR(\fIsrc, srcLen, dstPtr\fR)
.sp
-CONST char *
+const char *
\fBTcl_GetEncodingName\fR(\fIencoding\fR)
.sp
int
\fBTcl_SetSystemEncoding\fR(\fIinterp, name\fR)
.sp
+const char *
+\fBTcl_GetEncodingNameFromEnvironment\fR(\fIbufPtr\fR)
+.sp
void
\fBTcl_GetEncodingNames\fR(\fIinterp\fR)
.sp
Tcl_Encoding
\fBTcl_CreateEncoding\fR(\fItypePtr\fR)
.sp
-CONST char *
+Tcl_Obj *
+\fBTcl_GetEncodingSearchPath\fR()
+.sp
+int
+\fBTcl_SetEncodingSearchPath\fR(\fIsearchPath\fR)
+.sp
+const char *
\fBTcl_GetDefaultEncodingDir\fR(\fIvoid\fR)
.sp
void
\fBTcl_SetDefaultEncodingDir\fR(\fIpath\fR)
-
-
.SH ARGUMENTS
-.AS Tcl_EncodingState *dstWrotePtr
+.AS "const Tcl_EncodingType" *dstWrotePtr in/out
.AP Tcl_Interp *interp in
Interpreter to use for error reporting, or NULL if no error reporting is
desired.
-.AP "CONST char" *name in
+.AP "const char" *name in
Name of encoding to load.
.AP Tcl_Encoding encoding in
The encoding to query, free, or use for converting text. If \fIencoding\fR is
NULL, the current system encoding is used.
-.AP "CONST char" *src in
+.AP Tcl_Obj *objPtr in
+Name of encoding to get token for.
+.AP Tcl_Encoding *encodingPtr out
+Points to storage where encoding token is to be written.
+.AP "const char" *src in
For the \fBTcl_ExternalToUtf\fR functions, an array of bytes in the
specified encoding that are to be converted to UTF-8. For the
\fBTcl_UtfToExternal\fR and \fBTcl_WinUtfToTChar\fR functions, an array of
UTF-8 characters to be converted to the specified encoding.
-.AP "CONST TCHAR" *tsrc in
+.AP "const TCHAR" *tsrc in
An array of Windows TCHAR characters to convert to UTF-8.
.AP int srcLen in
Length of \fIsrc\fR or \fItsrc\fR in bytes. If the length is negative, the
result will be stored.
.AP int flags in
Various flag bits OR-ed together.
-TCL_ENCODING_START signifies that the
+\fBTCL_ENCODING_START\fR signifies that the
source buffer is the first block in a (potentially multi-block) input
stream, telling the conversion routine to reset to an initial state and
perform any initialization that needs to occur before the first byte is
-converted. TCL_ENCODING_END signifies that the source buffer is the last
+converted. \fBTCL_ENCODING_END\fR signifies that the source buffer is the last
block in a (potentially multi-block) input stream, telling the conversion
routine to perform any finalization that needs to occur after the last
byte is converted and then to reset to an initial state.
-TCL_ENCODING_STOPONERROR signifies that the conversion routine should
-return immediately upon reading a source character that doesn't exist in
+\fBTCL_ENCODING_STOPONERROR\fR signifies that the conversion routine should
+return immediately upon reading a source character that does not exist in
the target encoding; otherwise a default fallback character will
automatically be substituted.
.AP Tcl_EncodingState *statePtr in/out
Used when converting a (generally long or indefinite length) byte stream
-in a piece by piece fashion. The conversion routine stores its current
+in a piece-by-piece fashion. The conversion routine stores its current
state in \fI*statePtr\fR after \fIsrc\fR (the buffer containing the
current piece) has been converted; that state information must be passed
back when converting the next piece of the stream so the conversion
.AP int *dstCharsPtr out
Filled with the number of characters that correspond to the number of bytes
stored in the output buffer. May be NULL.
-.AP Tcl_EncodingType *typePtr in
+.AP Tcl_DString *bufPtr out
+Storage for the prescribed system encoding name.
+.AP "const Tcl_EncodingType" *typePtr in
Structure that defines a new type of encoding.
-.AP "CONST char" *path in
+.AP Tcl_Obj *searchPath in
+List of filesystem directories in which to search for encoding data files.
+.AP "const char" *path in
A path to the location of the encoding file.
.BE
.SH INTRODUCTION
.SH DESCRIPTION
.PP
\fBTcl_GetEncoding\fR finds an encoding given its \fIname\fR. The name may
-refer to a builtin Tcl encoding, a user-defined encoding registered by
+refer to a built-in Tcl encoding, a user-defined encoding registered by
calling \fBTcl_CreateEncoding\fR, or a dynamically-loadable encoding
file. The return value is a token that represents the encoding and can be
used in subsequent calls to procedures such as \fBTcl_GetEncodingName\fR,
\fBTcl_FreeEncoding\fR will release all storage the encoding was using
and delete it from the database.
.PP
+\fBTcl_GetEncodingFromObj\fR treats the string representation of
+\fIobjPtr\fR as an encoding name, and finds an encoding with that
+name, just as \fBTcl_GetEncoding\fR does. When an encoding is found,
+it is cached within the \fBobjPtr\fR value for future reference, the
+\fBTcl_Encoding\fR token is written to the storage pointed to by
+\fIencodingPtr\fR, and the value \fBTCL_OK\fR is returned. If no such
+encoding is found, the value \fBTCL_ERROR\fR is returned, and no
+writing to \fB*\fR\fIencodingPtr\fR takes place. Just as with
+\fBTcl_GetEncoding\fR, the caller should call \fBTcl_FreeEncoding\fR
+on the resulting encoding token when that token will no longer be
+used.
+.PP
\fBTcl_ExternalToUtfDString\fR converts a source buffer \fIsrc\fR from the
specified \fIencoding\fR into UTF-8. The converted bytes are stored in
\fIdstPtr\fR, which is then null-terminated. The caller should eventually
The destination buffer was not large enough for all of the converted data; as
many characters as could fit were converted though.
.IP \fBTCL_CONVERT_MULTIBYTE\fR 29
-The last fews bytes in the source buffer were the beginning of a multibyte
+The last few bytes in the source buffer were the beginning of a multibyte
sequence, but more bytes were needed to complete this sequence. A
subsequent call to the conversion routine should pass a buffer containing
the unconverted bytes that remained in \fIsrc\fR plus some further bytes
misidentified.
.IP \fBTCL_CONVERT_UNKNOWN\fR 29
The source buffer contained a character that could not be represented in
-the target encoding and TCL_ENCODING_STOPONERROR was specified.
+the target encoding and \fBTCL_ENCODING_STOPONERROR\fR was specified.
.RE
.LP
\fBTcl_UtfToExternalDString\fR converts a source buffer \fIsrc\fR from UTF-8
.PP
\fBTcl_WinUtfToTChar\fR and \fBTcl_WinTCharToUtf\fR are
Windows-only convenience
-functions for converting between UTF-8 and Windows strings. On Windows 95
-(as with the Macintosh and Unix operating systems),
-all strings exchanged between Tcl and the operating system are "char"
-based. On Windows NT, some strings exchanged between Tcl and the
-operating system are "char" oriented while others are in Unicode. By
-convention, in Windows a TCHAR is a character in the ANSI code page
-on Windows 95 and a Unicode character on Windows NT.
-.PP
-If you planned to use the same "char" based interfaces on both Windows
-95 and Windows NT, you could use \fBTcl_UtfToExternal\fR and
-\fBTcl_ExternalToUtf\fR (or their \fBTcl_DString\fR equivalents) with an
-encoding of NULL (the current system encoding). On the other hand,
-if you planned to use the Unicode interface when running on Windows NT
-and the "char" interfaces when running on Windows 95, you would have
-to perform the following type of test over and over in your program
-(as represented in pseudo-code):
-.CS
-if (running NT) {
- encoding <- Tcl_GetEncoding("unicode");
- nativeBuffer <- Tcl_UtfToExternal(encoding, utfBuffer);
- Tcl_FreeEncoding(encoding);
-} else {
- nativeBuffer <- Tcl_UtfToExternal(NULL, utfBuffer);
-.CE
-\fBTcl_WinUtfToTChar\fR and \fBTcl_WinTCharToUtf\fR automatically
-handle this test and use the proper encoding based on the current
-operating system. \fBTcl_WinUtfToTChar\fR returns a pointer to
-a TCHAR string, and \fBTcl_WinTCharToUtf\fR expects a TCHAR string
-pointer as the \fIsrc\fR string. Otherwise, these functions
-behave identically to \fBTcl_UtfToExternalDString\fR and
-\fBTcl_ExternalToUtfDString\fR.
+functions for converting between UTF-8 and Windows strings
+based on the TCHAR type which is by convention
+a Unicode character on Windows NT.
+These functions are essentially wrappers around
+\fBTcl_UtfToExternalDString\fR and
+\fBTcl_ExternalToUtfDString\fR that convert to and from the
+Unicode encoding.
.PP
\fBTcl_GetEncodingName\fR is roughly the inverse of \fBTcl_GetEncoding\fR.
Given an \fIencoding\fR, the return value is the \fIname\fR argument that
whenever the user passes a NULL value for the \fIencoding\fR argument to
any of the other encoding functions. If \fIname\fR is NULL, the system
encoding is reset to the default system encoding, \fBbinary\fR. If the
-name did not refer to any known or loadable encoding, TCL_ERROR is
+name did not refer to any known or loadable encoding, \fBTCL_ERROR\fR is
returned and an error message is left in \fIinterp\fR. Otherwise, this
procedure increments the reference count of the new system encoding,
decrements the reference count of the old system encoding, and returns
-TCL_OK.
+\fBTCL_OK\fR.
+.PP
+\fBTcl_GetEncodingNameFromEnvironment\fR provides a means for the Tcl
+library to report the encoding name it believes to be the correct one
+to use as the system encoding, based on system calls and examination of
+the environment suitable for the platform. It accepts \fIbufPtr\fR,
+a pointer to an uninitialized or freed \fBTcl_DString\fR and writes
+the encoding name to it. The \fBTcl_DStringValue\fR is returned.
.PP
\fBTcl_GetEncodingNames\fR sets the \fIinterp\fR result to a list
consisting of the names of all the encodings that are currently defined
.PP
.CS
typedef struct Tcl_EncodingType {
- CONST char *\fIencodingName\fR;
- Tcl_EncodingConvertProc *\fItoUtfProc\fR;
- Tcl_EncodingConvertProc *\fIfromUtfProc\fR;
- Tcl_EncodingFreeProc *\fIfreeProc\fR;
- ClientData \fIclientData\fR;
- int \fInullSize\fR;
-} Tcl_EncodingType;
+ const char *\fIencodingName\fR;
+ Tcl_EncodingConvertProc *\fItoUtfProc\fR;
+ Tcl_EncodingConvertProc *\fIfromUtfProc\fR;
+ Tcl_EncodingFreeProc *\fIfreeProc\fR;
+ ClientData \fIclientData\fR;
+ int \fInullSize\fR;
+} \fBTcl_EncodingType\fR;
.CE
.PP
The \fIencodingName\fR provides a string name for the encoding, by
type \fBTcl_EncodingConvertProc\fR:
.PP
.CS
-typedef int Tcl_EncodingConvertProc(
- ClientData \fIclientData\fR,
- CONST char *\fIsrc\fR,
- int \fIsrcLen\fR,
- int \fIflags\fR,
- Tcl_Encoding *\fIstatePtr\fR,
- char *\fIdst\fR,
- int \fIdstLen\fR,
- int *\fIsrcReadPtr\fR,
- int *\fIdstWrotePtr\fR,
- int *\fIdstCharsPtr\fR);
+typedef int \fBTcl_EncodingConvertProc\fR(
+ ClientData \fIclientData\fR,
+ const char *\fIsrc\fR,
+ int \fIsrcLen\fR,
+ int \fIflags\fR,
+ Tcl_EncodingState *\fIstatePtr\fR,
+ char *\fIdst\fR,
+ int \fIdstLen\fR,
+ int *\fIsrcReadPtr\fR,
+ int *\fIdstWrotePtr\fR,
+ int *\fIdstCharsPtr\fR);
.CE
.PP
The \fItoUtfProc\fR and \fIfromUtfProc\fR procedures are called by the
.PP
The callback procedure \fIfreeProc\fR, if non-NULL, should match the type
\fBTcl_EncodingFreeProc\fR:
+.PP
.CS
-typedef void Tcl_EncodingFreeProc(
- ClientData \fIclientData\fR);
+typedef void \fBTcl_EncodingFreeProc\fR(
+ ClientData \fIclientData\fR);
.CE
.PP
This \fIfreeProc\fR function is called when the encoding is deleted. The
\fIclientData\fR parameter is the same as the \fIclientData\fR field
specified to \fBTcl_CreateEncoding\fR when the encoding was created.
.PP
-
+\fBTcl_GetEncodingSearchPath\fR and \fBTcl_SetEncodingSearchPath\fR
+are called to access and set the list of filesystem directories searched
+for encoding data files.
+.PP
+The value returned by \fBTcl_GetEncodingSearchPath\fR
+is the value stored by the last successful call to
+\fBTcl_SetEncodingSearchPath\fR. If no calls to
+\fBTcl_SetEncodingSearchPath\fR have occurred, Tcl will compute an initial
+value based on the environment. There is one encoding search path for the
+entire process, shared by all threads in the process.
+.PP
+\fBTcl_SetEncodingSearchPath\fR stores \fIsearchPath\fR and returns
+\fBTCL_OK\fR, unless \fIsearchPath\fR is not a valid Tcl list, which
+causes \fBTCL_ERROR\fR to be returned. The elements of \fIsearchPath\fR
+are not verified as existing readable filesystem directories. When
+searching for encoding data files takes place, and non-existent or
+non-readable filesystem directories on the \fIsearchPath\fR are silently
+ignored.
+.PP
\fBTcl_GetDefaultEncodingDir\fR and \fBTcl_SetDefaultEncodingDir\fR
-access and set the directory to use when locating the default encoding
-files. If this value is not NULL, the \fBTclpInitLibraryPath\fR routine
-appends the path to the head of the search path, and uses this path as
-the first place to look into when trying to locate the encoding file.
-
+are obsolete interfaces best replaced with calls to
+\fBTcl_GetEncodingSearchPath\fR and \fBTcl_SetEncodingSearchPath\fR.
+They are called to access and set the first element of the \fIsearchPath\fR
+list. Since Tcl searches \fIsearchPath\fR for encoding data files in
+list order, these routines establish the
+.QW default
+directory in which to find encoding data files.
.SH "ENCODING FILES"
Space would prohibit precompiling into Tcl every possible encoding
algorithm, so many encodings are stored on disk as dynamically-loadable
characters.
.PP
Each dynamically-loadable encoding is represented as a text file. The
-initial line of the file, beginning with a ``#'' symbol, is a comment
+initial line of the file, beginning with a
+.QW #
+symbol, is a comment
that provides a human-readable description of the file. The next line
identifies the type of encoding file. It can be one of the following
letters:
-.IP "[1] \fBS\fR"
+.IP "[1] \fBS\fR"
A single-byte encoding, where one character is always one byte long in the
encoding. An example is \fBiso8859-1\fR, used by many European languages.
-.IP "[2] \fBD\fR"
+.IP "[2] \fBD\fR"
A double-byte encoding, where one character is always two bytes long in the
encoding. An example is \fBbig5\fR, used for Chinese text.
-.IP "[3] \fBM\fR"
+.IP "[3] \fBM\fR"
A multi-byte encoding, where one character may be either one or two bytes long.
-Certain bytes are a lead bytes, indicating that another byte must follow
+Certain bytes are lead bytes, indicating that another byte must follow
and that together the two bytes represent one character. Other bytes are not
lead bytes and represent themselves. An example is \fBshiftjis\fR, used by
many Japanese computers.
-.IP "[4] \fBE\fR"
+.IP "[4] \fBE\fR"
An escape-sequence encoding, specifying that certain sequences of bytes
do not represent characters, but commands that describe how following bytes
should be interpreted.
files. The lines in a table-based encoding file are in the same
format as this example taken from the \fBshiftjis\fR encoding (this is not
the complete file):
+.PP
.CS
# Encoding file: shiftjis, multi-byte
M
Following the first page will be all the other pages, each in the same
format as the first: one number identifying the page followed by 256
double-byte Unicode characters. If a character in the encoding maps to the
-Unicode character 0000, it means that the character doesn't actually exist.
+Unicode character 0000, it means that the character does not actually exist.
If all characters on a page would map to 0000, that page can be omitted.
.PP
Case [4] is the escape-sequence encoding file. The lines in an this type of
file are in the same format as this example taken from the \fBiso2022-jp\fR
encoding:
+.PP
.CS
.ta 1.5i
# Encoding file: iso2022-jp, escape-driven
E
init {}
final {}
-iso8859-1 \\x1b(B
-jis0201 \\x1b(J
-jis0208 \\x1b$@
-jis0208 \\x1b$B
-jis0212 \\x1b$(D
-gb2312 \\x1b$A
-ksc5601 \\x1b$(C
+iso8859-1 \ex1b(B
+jis0201 \ex1b(J
+jis0208 \ex1b$@
+jis0208 \ex1b$B
+jis0212 \ex1b$(D
+gb2312 \ex1b$A
+ksc5601 \ex1b$(C
.CE
.PP
In the file, the first column represents an option and the second column
or expect after the last character. All other options are names of
table-based encodings; the associated value is the escape-sequence that
marks that encoding. Tcl syntax is used for the values; in the above
-example, for instance, ``\fB{}\fR'' represents the empty string and
-``\fB\\x1b\fR'' represents character 27.
+example, for instance,
+.QW \fB{}\fR
+represents the empty string and
+.QW \fB\ex1b\fR
+represents character 27.
.PP
When \fBTcl_GetEncoding\fR encounters an encoding \fIname\fR that has not
been loaded, it attempts to load an encoding file called \fIname\fB.enc\fR
-from the \fBencoding\fR subdirectory of each directory specified in the
-library path \fB$tcl_libPath\fR. If the encoding file exists, but is
+from the \fBencoding\fR subdirectory of each directory that Tcl searches
+for its script library. If the encoding file exists, but is
malformed, an error message will be left in \fIinterp\fR.
.SH KEYWORDS
utf, encoding, convert
-
-
-