original/man3/scanf.3

   1 .\" Copyright (c) 1990, 1991 The Regents of the University of California.
   2 .\" All rights reserved.
   3 .\"
   4 .\" This code is derived from software contributed to Berkeley by
   5 .\" Chris Torek and the American National Standards Committee X3,
   6 .\" on Information Processing Systems.
   7 .\"
   8 .\" Redistribution and use in source and binary forms, with or without
   9 .\" modification, are permitted provided that the following conditions
  10 .\" are met:
  11 .\" 1. Redistributions of source code must retain the above copyright
  12 .\"    notice, this list of conditions and the following disclaimer.
  13 .\" 2. Redistributions in binary form must reproduce the above copyright
  14 .\"    notice, this list of conditions and the following disclaimer in the
  15 .\"    documentation and/or other materials provided with the distribution.
  16 .\" 3. All advertising materials mentioning features or use of this software
  17 .\"    must display the following acknowledgement:
  18 .\"     This product includes software developed by the University of
  19 .\"     California, Berkeley and its contributors.
  20 .\" 4. Neither the name of the University nor the names of its contributors
  21 .\"    may be used to endorse or promote products derived from this software
  22 .\"    without specific prior written permission.
  23 .\"
  24 .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  25 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  26 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  27 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  28 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  29 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  30 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  31 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  32 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  33 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  34 .\" SUCH DAMAGE.
  35 .\"
  36 .\"     @(#)scanf.3     6.14 (Berkeley) 1/8/93
  37 .\"
  38 .\" Converted for Linux, Mon Nov 29 15:22:01 1993, faith@cs.unc.edu
  39 .\" modified to resemble the GNU libio setup used in the Linux libc
  40 .\" used in versions 4.x (x>4) and 5   Helmut.Geyer@iwr.uni-heidelberg.de
  41 .\" Modified, aeb, 970121
  42 .\" 2005-07-14, mtk, added description of %n$ form; various text
  43 .\"     incorporated from the GNU C library documentation ((C) The
  44 .\"     Free Software Foundation); other parts substantially rewritten.
  45 .\"
  46 .\" 2008-06-23, mtk
  47 .\"     Add ERRORS section.
  48 .\"     Document the 'a' and 'm' modifiers for dynamic string allocation.
  49 .\"
  50 .TH SCANF 3  2011-09-28 "GNU" "Linux Programmer's Manual"
  51 .SH NAME
  52 scanf, fscanf, sscanf, vscanf, vsscanf, vfscanf \- input format conversion
  53 .SH SYNOPSIS
  54 .nf
  55 .B #include <stdio.h>
  56
  57 .BI "int scanf(const char *" format ", ...);"
  58 .BI "int fscanf(FILE *" stream ", const char *" format ", ...);"
  59 .BI "int sscanf(const char *" str ", const char *" format ", ...);"
  60 .sp
  61 .B #include <stdarg.h>
  62
  63 .BI "int vscanf(const char *" format ", va_list " ap );
  64 .BI "int vsscanf(const char *" str ", const char *" format ", va_list " ap );
  65 .BI "int vfscanf(FILE *" stream ", const char *" format ", va_list " ap );
  66 .fi
  67 .sp
  68 .in -4n
  69 Feature Test Macro Requirements for glibc (see
  70 .BR feature_test_macros (7)):
  71 .in
  72 .ad l
  73 .sp
  74 .BR vscanf (),
  75 .BR vsscanf (),
  76 .BR vfscanf ():
  77 .RS 4
  78 _XOPEN_SOURCE\ >=\ 600 || _ISOC99_SOURCE ||
  79 _POSIX_C_SOURCE\ >=\ 200112L;
  80 .br
  81 or
  82 .I "cc -std=c99"
  83 .ad
  84 .RE
  85 .SH DESCRIPTION
  86 The
  87 .BR scanf ()
  88 family of functions scans input according to
  89 .I format
  90 as described below.
  91 This format may contain
  92 .IR "conversion specifications" ;
  93 the results from such conversions, if any,
  94 are stored in the locations pointed to by the
  95 .I pointer
  96 arguments that follow
  97 .IR format .
  98 Each
  99 .I pointer
 100 argument must be of a type that is appropriate for the value returned
 101 by the corresponding conversion specification.
 102
 103 If the number of conversion specifications in
 104 .I format
 105 exceeds the number of
 106 .I pointer
 107 arguments, the results are undefined.
 108 If the number of
 109 .I pointer
 110 arguments exceeds the number of conversion specifications, then the excess
 111 .I pointer
 112 arguments are evaluated, but are otherwise ignored.
 113
 114 The
 115 .BR scanf ()
 116 function reads input from the standard input stream
 117 .IR stdin ,
 118 .BR fscanf ()
 119 reads input from the stream pointer
 120 .IR stream ,
 121 and
 122 .BR sscanf ()
 123 reads its input from the character string pointed to by
 124 .IR str .
 125 .PP
 126 The
 127 .BR vfscanf ()
 128 function is analogous to
 129 .BR vfprintf (3)
 130 and reads input from the stream pointer
 131 .I stream
 132 using a variable argument list of pointers (see
 133 .BR stdarg (3).
 134 The
 135 .BR vscanf ()
 136 function scans a variable argument list from the standard input and the
 137 .BR vsscanf ()
 138 function scans it from a string; these are analogous to the
 139 .BR vprintf (3)
 140 and
 141 .BR vsprintf (3)
 142 functions respectively.
 143 .PP
 144 The
 145 .I format
 146 string consists of a sequence of
 147 .I directives
 148 which describe how to process the sequence of input characters.
 149 If processing of a directive fails, no further input is read, and
 150 .BR scanf ()
 151 returns.
 152 A "failure" can be either of the following:
 153 .IR "input failure" ,
 154 meaning that input characters were unavailable, or
 155 .IR "matching failure" ,
 156 meaning that the input was inappropriate (see below).
 157
 158 A directive is one of the following:
 159 .TP
 160 \(bu
 161 A sequence of white-space characters (space, tab, newline, etc.; see
 162 .BR isspace (3)).
 163 This directive matches any amount of white space,
 164 including none, in the input.
 165 .TP
 166 \(bu
 167 An ordinary character (i.e., one other than white space or \(aq%\(aq).
 168 This character must exactly match the next character of input.
 169 .TP
 170 \(bu
 171 A conversion specification,
 172 which commences with a \(aq%\(aq (percent) character.
 173 A sequence of characters from the input is converted according to
 174 this specification, and the result is placed in the corresponding
 175 .I pointer
 176 argument.
 177 If the next item of input does not match the conversion specification,
 178 the conversion fails\(emthis is a
 179 .IR "matching failure" .
 180 .PP
 181 Each
 182 .I conversion specification
 183 in
 184 .I format
 185 begins with either the character \(aq%\(aq or the character sequence
 186 "\fB%\fP\fIn\fP\fB$\fP"
 187 (see below for the distinction) followed by:
 188 .TP
 189 \(bu
 190 An optional \(aq*\(aq assignment-suppression character:
 191 .BR scanf ()
 192 reads input as directed by the conversion specification,
 193 but discards the input.
 194 No corresponding
 195 .I pointer
 196 argument is required, and this specification is not
 197 included in the count of successful assignments returned by
 198 .BR scanf ().
 199 .TP
 200 \(bu
 201 An optional \(aqa\(aq character.
 202 This is used with string conversions, and relieves the caller of the
 203 need to allocate a corresponding buffer to hold the input: instead,
 204 .BR scanf ()
 205 allocates a buffer of sufficient size,
 206 and assigns the address of this buffer to the corresponding
 207 .I pointer
 208 argument, which should be a pointer to a
 209 .I "char *"
 210 variable (this variable does not need to be initialized before the call).
 211 The caller should subsequently
 212 .BR free (3)
 213 this buffer when it is no longer required.
 214 This is a GNU extension;
 215 C99 employs the \(aqa\(aq character as a conversion specifier (and
 216 it can also be used as such in the GNU implementation).
 217 .TP
 218 \(bu
 219 An optional decimal integer which specifies the
 220 .IR "maximum field width" .
 221 Reading of characters stops either when this maximum is reached or
 222 when a nonmatching character is found, whichever happens first.
 223 Most conversions discard initial white space characters (the exceptions
 224 are noted below),
 225 and these discarded characters don't count toward the maximum field width.
 226 String input conversions store a terminating null byte (\(aq\\0\(aq)
 227 to mark the end of the input;
 228 the maximum field width does not include this terminator.
 229 .TP
 230 \(bu
 231 An optional
 232 .IR "type modifier character" .
 233 For example, the
 234 .B l
 235 type modifier is used with integer conversions such as
 236 .B %d
 237 to specify that the corresponding
 238 .I pointer
 239 argument refers to a
 240 .I "long int"
 241 rather than a pointer to an
 242 .IR int .
 243 .TP
 244 \(bu
 245 A
 246 .I "conversion specifier"
 247 that specifies the type of input conversion to be performed.
 248 .PP
 249 The conversion specifications in
 250 .I format
 251 are of two forms, either beginning with \(aq%\(aq or beginning with
 252 "\fB%\fP\fIn\fP\fB$\fP".
 253 The two forms should not be mixed in the same
 254 .I format
 255 string, except that a string containing
 256 "\fB%\fP\fIn\fP\fB$\fP"
 257 specifications can include
 258 .B %%
 259 and
 260 .BR %* .
 261 If
 262 .I format
 263 contains \(aq%\(aq
 264 specifications then these correspond in order with successive
 265 .I pointer
 266 arguments.
 267 In the
 268 "\fB%\fP\fIn\fP\fB$\fP"
 269 form (which is specified in POSIX.1-2001, but not C99),
 270 .I n
 271 is a decimal integer that specifies that the converted input should
 272 be placed in the location referred to by the
 273 .IR n -th
 274 .I pointer
 275 argument following
 276 .IR format .
 277 .SS Conversions
 278 The following
 279 .I "type modifier characters"
 280 can appear in a conversion specification:
 281 .TP
 282 .B h
 283 Indicates that the conversion will be one of
 284 \fBd\fP, \fBi\fP, \fBo\fP, \fBu\fP, \fBx\fP, \fBX\fP, or \fBn\fP
 285 and the next pointer is a pointer to a
 286 .I short int
 287 or
 288 .I unsigned short int
 289 (rather than
 290 .IR int ).
 291 .TP
 292 .B hh
 293 As for
 294 .BR h ,
 295 but the next pointer is a pointer to a
 296 .I signed char
 297 or
 298 .IR "unsigned char" .
 299 .TP
 300 .B j
 301 As for
 302 .BR h ,
 303 but the next pointer is a pointer to an
 304 .I intmax_t
 305 or a
 306 .IR uintmax_t .
 307 This modifier was introduced in C99.
 308 .TP
 309 .B l
 310 Indicates either that the conversion will be one of
 311 \fBd\fP, \fBi\fP, \fBo\fP, \fBu\fP, \fBx\fP, \fBX\fP, or \fBn\fP
 312 and the next pointer is a pointer to a
 313 .I long int
 314 or
 315 .I unsigned long int
 316 (rather than
 317 .IR int ),
 318 or that the conversion will be one of
 319 \fBe\fP, \fBf\fP, or \fBg\fP
 320 and the next pointer is a pointer to
 321 .I double
 322 (rather than
 323 .IR float ).
 324 Specifying two
 325 .B l
 326 characters is equivalent to
 327 .BR L .
 328 If used with
 329 .B %c
 330 or
 331 .B %s
 332 the corresponding parameter is considered
 333 as a pointer to a wide character or wide-character string respectively.
 334 .\" This use of l was introduced in Amendment 1 to ISO C90.
 335 .TP
 336 .B L
 337 Indicates that the conversion will be either
 338 \fBe\fP, \fBf\fP, or \fBg\fP
 339 and the next pointer is a pointer to
 340 .I "long double"
 341 or the conversion will be
 342 \fBd\fP, \fBi\fP, \fBo\fP, \fBu\fP, or \fBx\fP
 343 and the next pointer is a pointer to
 344 .IR "long long" .
 345 .\" MTK, Jul 05: The following is no longer true for modern
 346 .\" ANSI C (i.e., C99):
 347 .\" (Note that long long is not an
 348 .\" ANSI C
 349 .\" type. Any program using this will not be portable to all
 350 .\" architectures).
 351 .TP
 352 .B q
 353 equivalent to
 354 .BR L .
 355 This specifier does not exist in ANSI C.
 356 .TP
 357 .B t
 358 As for
 359 .BR h ,
 360 but the next pointer is a pointer to a
 361 .IR ptrdiff_t .
 362 This modifier was introduced in C99.
 363 .TP
 364 .B z
 365 As for
 366 .BR h ,
 367 but the next pointer is a pointer to a
 368 .IR size_t .
 369 This modifier was introduced in C99.
 370 .PP
 371 The following
 372 .I "conversion specifiers"
 373 are available:
 374 .TP
 375 .B %
 376 Matches a literal \(aq%\(aq.
 377 That is,
 378 .B %\&%
 379 in the format string matches a
 380 single input \(aq%\(aq character.
 381 No conversion is done (but initial white space characters are discarded),
 382 and assignment does not occur.
 383 .TP
 384 .B d
 385 Matches an optionally signed decimal integer;
 386 the next pointer must be a pointer to
 387 .IR int .
 388 .TP
 389 .B D
 390 Equivalent to
 391 .IR ld ;
 392 this exists only for backward compatibility.
 393 (Note: thus only in libc4.
 394 In libc5 and glibc the
 395 .B %D
 396 is silently ignored, causing old programs to fail mysteriously.)
 397 .TP
 398 .B i
 399 Matches an optionally signed integer; the next pointer must be a pointer to
 400 .IR int .
 401 The integer is read in base 16 if it begins with
 402 .I 0x
 403 or
 404 .IR 0X ,
 405 in base 8 if it begins with
 406 .IR 0 ,
 407 and in base 10 otherwise.
 408 Only characters that correspond to the base are used.
 409 .TP
 410 .B o
 411 Matches an unsigned octal integer; the next pointer must be a pointer to
 412 .IR "unsigned int" .
 413 .TP
 414 .B u
 415 Matches an unsigned decimal integer; the next pointer must be a
 416 pointer to
 417 .IR "unsigned int" .
 418 .TP
 419 .B x
 420 Matches an unsigned hexadecimal integer; the next pointer must
 421 be a pointer to
 422 .IR "unsigned int" .
 423 .TP
 424 .B X
 425 Equivalent to
 426 .BR x .
 427 .TP
 428 .B f
 429 Matches an optionally signed floating-point number; the next pointer must
 430 be a pointer to
 431 .IR float .
 432 .TP
 433 .B e
 434 Equivalent to
 435 .BR f .
 436 .TP
 437 .B g
 438 Equivalent to
 439 .BR f .
 440 .TP
 441 .B E
 442 Equivalent to
 443 .BR f .
 444 .TP
 445 .B a
 446 (C99) Equivalent to
 447 .BR f .
 448 .TP
 449 .B s
 450 Matches a sequence of non-white-space characters;
 451 the next pointer must be a pointer to character array that is
 452 long enough to hold the input sequence and
 453 the terminating null byte (\(aq\\0\(aq), which is added automatically.
 454 The input string stops at white space or at the maximum field
 455 width, whichever occurs first.
 456 .TP
 457 .B c
 458 Matches a sequence of characters whose length is specified by the
 459 .I maximum field width
 460 (default 1); the next pointer must be a pointer to
 461 .IR char ,
 462 and there must be enough room for all the characters (no terminating
 463 null byte
 464 is added).
 465 The usual skip of leading white space is suppressed.
 466 To skip white space first, use an explicit space in the format.
 467 .TP
 468 .B \&[
 469 Matches a nonempty sequence of characters from the specified set of
 470 accepted characters; the next pointer must be a pointer to
 471 .IR char ,
 472 and there must be enough room for all the characters in the string, plus a
 473 terminating null byte.
 474 The usual skip of leading white space is suppressed.
 475 The string is to be made up of characters in (or not in) a particular set;
 476 the set is defined by the characters between the open bracket
 477 .B [
 478 character and a close bracket
 479 .B ]
 480 character.
 481 The set
 482 .I excludes
 483 those characters if the first character after the open bracket is a
 484 circumflex
 485 .RB ( ^ ).
 486 To include a close bracket in the set, make it the first character after
 487 the open bracket or the circumflex; any other position will end the set.
 488 The hyphen character
 489 .B \-
 490 is also special; when placed between two other characters, it adds all
 491 intervening characters to the set.
 492 To include a hyphen, make it the last
 493 character before the final close bracket.
 494 For instance,
 495 .B [^]0\-9\-]
 496 means
 497 the set "everything except close bracket, zero through nine, and hyphen".
 498 The string ends with the appearance of a character not in the (or, with a
 499 circumflex, in) set or when the field width runs out.
 500 .TP
 501 .B p
 502 Matches a pointer value (as printed by
 503 .B %p
 504 in
 505 .BR printf (3);
 506 the next pointer must be a pointer to a pointer to
 507 .IR void .
 508 .TP
 509 .B n
 510 Nothing is expected; instead, the number of characters consumed thus far
 511 from the input is stored through the next pointer, which must be a pointer
 512 to
 513 .IR int .
 514 This is
 515 .I not
 516 a conversion, although it can be suppressed with the
 517 .B *
 518 assignment-suppression character.
 519 The C standard says: "Execution of a
 520 .B %n
 521 directive does not increment
 522 the assignment count returned at the completion of execution"
 523 but the Corrigendum seems to contradict this.
 524 Probably it is wise
 525 not to make any assumptions on the effect of
 526 .B %n
 527 conversions on the return value.
 528 .SH "RETURN VALUE"
 529 These functions return the number of input items
 530 successfully matched and assigned,
 531 which can be fewer than provided for,
 532 or even zero in the event of an early matching failure.
 533
 534 The value
 535 .B EOF
 536 is returned if the end of input is reached before either the first
 537 successful conversion or a matching failure occurs.
 538 .B EOF
 539 is also returned if a read error occurs,
 540 in which case the error indicator for the stream (see
 541 .BR ferror (3))
 542 is set, and
 543 .I errno
 544 is set indicate the error.
 545 .SH ERRORS
 546 .TP
 547 .B EAGAIN
 548 The file descriptor underlying
 549 .I stream
 550 is marked nonblocking, and the read operation would block.
 551 .TP
 552 .B EBADF
 553 The file descriptor underlying
 554 .I stream
 555 is invalid, or not open for reading.
 556 .TP
 557 .B EILSEQ
 558 Input byte sequence does not form a valid character.
 559 .TP
 560 .B EINTR
 561 The read operation was interrupted by a signal; see
 562 .BR signal (7).
 563 .TP
 564 .B EINVAL
 565 Not enough arguments; or
 566 .I format
 567 is NULL.
 568 .TP
 569 .B ENOMEM
 570 Out of memory.
 571 .TP
 572 .B ERANGE
 573 The result of an integer conversion would exceed the size
 574 that can be stored in the corresponding integer type.
 575 .SH "CONFORMING TO"
 576 The functions
 577 .BR fscanf (),
 578 .BR scanf (),
 579 and
 580 .BR sscanf ()
 581 conform to C89 and C99 and POSIX.1-2001.
 582 These standards do not specify the
 583 .B ERANGE
 584 error.
 585 .PP
 586 The
 587 .B q
 588 specifier is the 4.4BSD notation for
 589 .IR "long long" ,
 590 while
 591 .B ll
 592 or the usage of
 593 .B L
 594 in integer conversions is the GNU notation.
 595 .PP
 596 The Linux version of these functions is based on the
 597 .I GNU
 598 .I libio
 599 library.
 600 Take a look at the
 601 .I info
 602 documentation of
 603 .I GNU
 604 .I libc (glibc-1.08)
 605 for a more concise description.
 606 .SH NOTES
 607 The GNU C library supports a nonstandard extension that causes
 608 the library to dynamically allocate a string of sufficient size
 609 for input strings for the
 610 .B %s
 611 and
 612 \fB%a[\fP\fIrange\fP\fB]\fP
 613 conversion specifiers.
 614 .\" This feature seems to be present at least as far back as glibc 2.0.
 615 To make use of this feature, specify
 616 .B a
 617 as a length modifier (thus
 618 .B %as
 619 or
 620 \fB%a[\fP\fIrange\fP\fB]\fP).
 621 The caller must
 622 .BR free (3)
 623 the returned string, as in the following example:
 624 .in +4n
 625 .nf
 626
 627 char *p;
 628 int n;
 629
 630 errno = 0;
 631 n = scanf("%a[a-z]", &p);
 632 if (n == 1) {
 633     printf("read: %s\\n", p);
 634     free(p);
 635 } else if (errno != 0) {
 636     perror("scanf");
 637 } else {
 638     fprintf(stderr, "No matching characters\\n");
 639 }
 640 .fi
 641 .in
 642 .PP
 643 As shown in the above example, it is only necessary to call
 644 .BR free (3)
 645 if the
 646 .BR scanf ()
 647 call successfully read a string.
 648 .PP
 649 The
 650 .B a
 651 modifier is not available if the program is compiled with
 652 .I "gcc -std=c99"
 653 or
 654 .IR "gcc -D_ISOC99_SOURCE"
 655 (unless
 656 .B _GNU_SOURCE
 657 is also specified), in which case the
 658 .B a
 659 is interpreted as a specifier for floating-point numbers (see above).
 660
 661 Since version 2.7, glibc also provides the
 662 .B m
 663 modifier for the same purpose as the
 664 .BR a
 665 modifier.
 666 The
 667 .B m
 668 modifier has the following advantages:
 669 .IP * 2
 670 It may also be applied to
 671 .B %c
 672 conversion specifiers (e.g.,
 673 .BR %3mc ).
 674 .IP *
 675 It avoids ambiguity with respect to the
 676 .B %a
 677 floating-point conversion specifier (and is unaffected by
 678 .IR "gcc -std=c99"
 679 etc.)
 680 .IP *
 681 It is specified in the upcoming revision of the POSIX.1 standard.
 682 .SH BUGS
 683 All functions are fully C89 conformant, but provide the
 684 additional specifiers
 685 .B q
 686 and
 687 .B a
 688 as well as an additional behavior of the
 689 .B L
 690 and
 691 .B l
 692 specifiers.
 693 The latter may be considered to be a bug, as it changes the
 694 behavior of specifiers defined in C89.
 695 .PP
 696 Some combinations of the type modifiers and conversion
 697 specifiers defined by ANSI C do not make sense
 698 (e.g.
 699 .BR "%Ld" ).
 700 While they may have a well-defined behavior on Linux, this need not
 701 to be so on other architectures.
 702 Therefore it usually is better to use
 703 modifiers that are not defined by ANSI C at all, that is, use
 704 .B q
 705 instead of
 706 .B L
 707 in combination with
 708 \fBd\fP, \fBi\fP, \fBo\fP, \fBu\fP, \fBx\fP, and \fBX\fP
 709 conversions or
 710 .BR ll .
 711 .PP
 712 The usage of
 713 .B q
 714 is not the same as on 4.4BSD,
 715 as it may be used in float conversions equivalently to
 716 .BR L .
 717 .SH "SEE ALSO"
 718 .BR getc (3),
 719 .BR printf (3),
 720 .BR setlocale (3),
 721 .BR strtod (3),
 722 .BR strtol (3),
 723 .BR strtoul (3)