.\" Automatically generated by Pod::Man 2.16 (Pod::Simple 3.05) .\" .\" Standard preamble: .\" ======================================================================== .de Sh \" Subsection heading .br .if t .Sp .ne 5 .PP \fB\\$1\fR .PP .. .de Sp \" Vertical space (when we can't use .PP) .if t .sp .5v .if n .sp .. .de Vb \" Begin verbatim text .ft CW .nf .ne \\$1 .. .de Ve \" End verbatim text .ft R .fi .. .\" Set up some character translations and predefined strings. \*(-- will .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left .\" double quote, and \*(R" will give a right double quote. \*(C+ will .\" give a nicer C++. Capital omega is used to do unbreakable dashes and .\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff, .\" nothing in troff, for use with C<>. .tr \(*W- .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' .ie n \{\ . ds -- \(*W- . ds PI pi . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch . ds L" "" . ds R" "" . ds C` . ds C' 'br\} .el\{\ . ds -- \|\(em\| . ds PI \(*p . ds L" `` . ds R" '' 'br\} .\" .\" Escape single quotes in literal strings from groff's Unicode transform. .ie \n(.g .ds Aq \(aq .el .ds Aq ' .\" .\" If the F register is turned on, we'll generate index entries on stderr for .\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index .\" entries marked with X<> in POD. Of course, you'll have to process the .\" output yourself in some meaningful fashion. .ie \nF \{\ . de IX . tm Index:\\$1\t\\n%\t"\\$2" .. . nr % 0 . rr F .\} .el \{\ . de IX .. .\} .\" .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). .\" Fear. Run. Save yourself. No user-serviceable parts. . \" fudge factors for nroff and troff .if n \{\ . ds #H 0 . ds #V .8m . ds #F .3m . ds #[ \f1 . ds #] \fP .\} .if t \{\ . ds #H ((1u-(\\\\n(.fu%2u))*.13m) . ds #V .6m . ds #F 0 . ds #[ \& . ds #] \& .\} . \" simple accents for nroff and troff .if n \{\ . ds ' \& . ds ` \& . ds ^ \& . ds , \& . ds ~ ~ . ds / .\} .if t \{\ . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' .\} . \" troff and (daisy-wheel) nroff accents .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' .ds 8 \h'\*(#H'\(*b\h'-\*(#H' .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] .ds ae a\h'-(\w'a'u*4/10)'e .ds Ae A\h'-(\w'A'u*4/10)'E . \" corrections for vroff .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' . \" for low resolution devices (crt and lpr) .if \n(.H>23 .if \n(.V>19 \ \{\ . ds : e . ds 8 ss . ds o a . ds d- d\h'-1'\(ga . ds D- D\h'-1'\(hy . ds th \o'bp' . ds Th \o'LP' . ds ae ae . ds Ae AE .\} .rm #[ #] #H #V #F C .\" ======================================================================== .\" .IX Title "nvcc 1" .TH nvcc 1 "Dec 2008" "NVIDIA CUDA" "NVIDIA CUDA Documentation" .\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" way too many mistakes in technical documents. .if n .ad l .nh .SH "NAME" \&\fBnvcc\fR \- \s-1NVIDIA\s0 \s-1CUDA\s0 compiler driver .SH "SYNOPSIS" .IX Header "SYNOPSIS" \&\f(CW\*(C`nvcc [options] \*(C'\fR .SH "OPTIONS" .IX Header "OPTIONS" .Sh "Options for specifying the compilation phase" .IX Subsection "Options for specifying the compilation phase" More exactly, this option specifies up to which stage the input files must be compiled, according to the following compilation trajectories for different input file types: .PP .Vb 6 \& .c/.cc/.cpp/.cxx : preprocess, compile, link \& .i/.ii : compile, link \& .cu : preprocess, cuda frontend, ptxassemble, \& merge with host C code, compile, link \& .gpu : nvopencc compile into cubin \& .ptx : ptxassemble into cubin. .Ve .IP "\-\-cuda (\-cuda)" 4 .IX Item "--cuda (-cuda)" Compile all .cu input files to .cu.c output. .IP "\-\-cubin (\-cubin)" 4 .IX Item "--cubin (-cubin)" Compile all .cu/.ptx/.gpu input files to device-only .cubin files. This step discards the host code for each .cu input file. .IP "\-\-fatbin (\-fatbin)" 4 .IX Item "--fatbin (-fatbin)" Compile all .cu/.ptx/.gpu input files to ptx or device\- only .cubin files (depending on the values specified for options '\-arch' and/or '\-code') and place the result into the fat binary file specified with option \-o. This step discards the host code for each .cu input file. .IP "\-\-ptx (\-ptx)" 4 .IX Item "--ptx (-ptx)" Compile all .cu/.gpu input files to device\- only .ptx files. This step discards the host code for each of these input file. .IP "\-\-gpu (\-gpu)" 4 .IX Item "--gpu (-gpu)" Compile all .cu input files to device\- only .gpu files. This step discards the host code for each .cu input file. .IP "\-\-preprocess (\-E)" 4 .IX Item "--preprocess (-E)" Preprocess all .c/.cc/.cpp/.cxx/.cu input files. .IP "\-\-generate\-dependencies (\-M)" 4 .IX Item "--generate-dependencies (-M)" Generate for the one .c/.cc/.cpp/.cxx/.cu input file (more than one input file is not allowed in this mode) a dependency file that can be included in a make file. .IP "\-\-compile (\-c)" 4 .IX Item "--compile (-c)" Compile each .c/.cc/.cpp/.cxx/.cu input file into an object file. .IP "\-\-link (\-link)" 4 .IX Item "--link (-link)" This option specifies the default behavior: compile and link all inputs. .IP "\-\-lib (\-lib)" 4 .IX Item "--lib (-lib)" Compile all inputs into object files (if necessary) and add the results to the specified output library file. .IP "\-\-run (\-run)" 4 .IX Item "--run (-run)" This option compiles and links all inputs into an executable, and executes it. Or, when the input is a single executable, it is executed without any compilation or linking. This step is intended for developers who do not want to be bothered with setting the necessary cuda dll search paths (these will be set temporarily by nvcc). .Sh "File and path specifications" .IX Subsection "File and path specifications" .IP "\-\-output\-file (\-o)" 4 .IX Item "--output-file (-o)" Specify name and location of the output file. Only a single input file is allowed when this option is present in nvcc non\- linking/archiving mode. .IP "\-\-pre\-include ,... (\-include)" 4 .IX Item "--pre-include ,... (-include)" Specify header files that must be preincluded during preprocessing. .IP "\-\-library ,... (\-l)" 4 .IX Item "--library ,... (-l)" Specify libraries to be used in the linking stage. The libraries are searched for on the library search paths that have been specified using option '\-L'. .IP "\-\-define\-macro ,... (\-D)" 4 .IX Item "--define-macro ,... (-D)" Specify macro definitions to define for use during preprocessing or compilation. .IP "\-\-undefine\-macro ,... (\-U)" 4 .IX Item "--undefine-macro ,... (-U)" Specify macro definitions to undefine for use during preprocessing or compilation. .IP "\-\-include\-path ,... (\-I)" 4 .IX Item "--include-path ,... (-I)" Specify include search paths. .IP "\-\-system\-include ,... (\-isystem)" 4 .IX Item "--system-include ,... (-isystem)" Specify system include search paths. .IP "\-\-library\-path ,... (\-L)" 4 .IX Item "--library-path ,... (-L)" Specify library search paths. .IP "\-\-output\-directory (\-odir)" 4 .IX Item "--output-directory (-odir)" Specify the directory of the output file. This option is intended for letting the dependency generation step (option '\-\-generate\-dependencies') generate a rule that defines the target object file in the proper directory. .IP "\-\-compiler\-bindir (\-ccbin)" 4 .IX Item "--compiler-bindir (-ccbin)" Specify the directory in which the compiler executable (Microsoft Visual Studion cl, or a gcc derivative) resides. By default, this executable is expected in the current executable search path. .Sh "Options for specifying behaviour of compiler/linker" .IX Subsection "Options for specifying behaviour of compiler/linker" .IP "\-\-profile (\-pg)" 4 .IX Item "--profile (-pg)" Instrument generated code/executable for use by gprof (Linux only). .IP "\-\-debug (\-g)" 4 .IX Item "--debug (-g)" Generate debug information for host code. .IP "\-\-device\-debug (\-G)" 4 .IX Item "--device-debug (-G)" Generate debug information for device code, plus also specify the optimization level for the device code in order to control its 'debuggability'. .Sp Allowed values for this option: 0,1,2,3. .IP "\-\-extern\-debug\-info (\-extdeb)" 4 .IX Item "--extern-debug-info (-extdeb)" Only valid when generating debug information. Convert the dwarf information corresponding with the device code in an Elf file into the fat binary into which also the code binary will be placed. This mode is default on Windows but the default on Linux is to link this dwarf information against the host executable, which supports integrated host/target debugging by cuda-gdb. .IP "\-\-optimize (\-O)" 4 .IX Item "--optimize (-O)" Specify optimization level for host code. .IP "\-\-shared (\-shared)" 4 .IX Item "--shared (-shared)" Generate a shared library during linking. Note: when other linker options are required for controlling dll generation, use option \-Xlinker. .IP "\-\-machine (\-m)" 4 .IX Item "--machine (-m)" Specify 32 vs 64 bit architecture. .Sp Allowed values for this option: 32,64. .Sp Default value: 32. .Sh "Options for passing specific phase options" .IX Subsection "Options for passing specific phase options" These allow for passing options directly to the intended compilation phase. Using these, users have the ability to pass options to the lower level compilation tools, without the need for nvcc to know about each and every such option. .IP "\-\-compiler\-options ,... (\-Xcompiler)" 4 .IX Item "--compiler-options ,... (-Xcompiler)" Specify options directly to the compiler/preprocessor. .IP "\-\-linker\-options ,... (\-Xlinker)" 4 .IX Item "--linker-options ,... (-Xlinker)" Specify options directly to the linker. .IP "\-\-opencc\-options ,... (\-Xopencc)" 4 .IX Item "--opencc-options ,... (-Xopencc)" Specify options directly to nvopencc. .IP "\-\-cudafe\-options ,... (\-Xcudafe)" 4 .IX Item "--cudafe-options ,... (-Xcudafe)" Specify options directly to cudafe. .IP "\-\-ptxas\-options ,... (\-Xptxas)" 4 .IX Item "--ptxas-options ,... (-Xptxas)" Specify options directly to the ptx optimizing assembler. .IP "\-\-fatbin\-options ,... (\-Xfatbin)" 4 .IX Item "--fatbin-options ,... (-Xfatbin)" Specify options directly to the fatbin generator. .Sh "Miscellaneous options for guiding the compiler driver" .IX Subsection "Miscellaneous options for guiding the compiler driver" .IP "\-\-dont\-use\-profile (\-noprof)" 4 .IX Item "--dont-use-profile (-noprof)" This is intended for use during the cuda build, when no profile is present yet. .IP "\-\-foreign (\-foreign)" 4 .IX Item "--foreign (-foreign)" This option is for test purposes only. By default, on Gnu platforms gcc/g++ is assumed to be the compiler that is to be used. On pure windows platforms, the compiler to be used is expected to be cl. This option reverses this assumption. .IP "\-\-dryrun (\-dryrun)" 4 .IX Item "--dryrun (-dryrun)" Do not execute the compilation commands generated by nvcc. Instead, list them. .IP "\-\-verbose (\-v)" 4 .IX Item "--verbose (-v)" List the compilation commands generated by this compiler driver, but do not suppress their execution. .IP "\-\-keep (\-keep)" 4 .IX Item "--keep (-keep)" Keep all intermediate files that are generated during internal compilation steps. .IP "\-\-save\-temps (\-save\-temps)" 4 .IX Item "--save-temps (-save-temps)" This option is an alias of '\-\-keep'. .IP "\-\-clean\-targets (\-clean)" 4 .IX Item "--clean-targets (-clean)" This option reverses the behaviour of nvcc. When specified, none of the compilation phases will be executed. Instead, all of the non-temporary files that nvcc would otherwise create will be deleted. .IP "\-\-run\-args ,... (\-run\-args)" 4 .IX Item "--run-args ,... (-run-args)" Used in combination with option \-R, to specify command line arguments for the executable. .IP "\-\-input\-drive\-prefix (\-idp)" 4 .IX Item "--input-drive-prefix (-idp)" On Windows platforms, all command line arguments that refer to file names must be converted to Windows native format before they are passed to pure Windows executables. This option specifies how the 'current' development environment represents absolute paths. Use '\-idp /cygwin/' for CygWin build environments, and '\-idp /' for Mingw. .IP "\-\-dependency\-drive\-prefix (\-ddp)" 4 .IX Item "--dependency-drive-prefix (-ddp)" On Windows platforms, when generating dependency files (option \-M), all file names must be converted to whatever the used instance of 'make' will recognize. Some instances of 'make' have trouble with the colon in absolute paths in native Windows format, which depends on the environment in which this 'make' instance has been compiled. Use '\-ddp /cygwin/' for a CygWin make, and \&'\-ddp/' for Mingw. Or leave these file names in native Windows format by specifying nothing. .IP "\-\-drive\-prefix (\-dp)" 4 .IX Item "--drive-prefix (-dp)" Specifies as both input-drive-prefix and dependency-drive-prefix. .IP "\-\-no\-align\-double" 4 .IX Item "--no-align-double" Specifies that \-malign\-double should not be passed as a compiler argument on 32\-bit platforms. \s-1WARNING:\s0 this makes the \s-1ABI\s0 incompatible with the cuda kernel \s-1ABI\s0 for certain 64\-bit types. .Sh "Options for steering \s-1GPU\s0 code generation" .IX Subsection "Options for steering GPU code generation" .IP "\-\-gpu\-name (\-arch)" 4 .IX Item "--gpu-name (-arch)" Specify the name of the class of nVidia \s-1GPU\s0 architectures for which the cuda input files must be compiled. .Sp With the exception as described for the shorthand below, the architecture specified with this option must be a virtual architecture (such as compute_10), and it will be the assumed architecture during the nvopencc compilation stage. .Sp This option will cause no code to be generated (that is the role of nvcc option '\-\-gpu\-code', see below); rather, its purpose is to steer the nvopencc stage, influencing the architecture of the generated ptx intermediate. .Sp For convenience in case of simple nvcc compilations the following shorthand is supported: if no value for option '\-\-gpu\-code' is specified, then the value of this option defaults to the value of '\-\-gpu\-architecture'. In this situation, as only exception to the description above, the value specified for '\-\-gpu\-architecture' may be a 'real' architecture (such as a sm_13), in which case nvcc uses the closest virtual architecture as effective architecture value. For example, 'nvcc \-arch=sm_13' is equivalent to 'nvcc \-arch=compute_13 \&\-code=sm_13'. .Sp Allowed values for this option: 'compute_10','compute_11','compute_12','compute_13', \&'sm_10','sm_11','sm_12','sm_13'. .IP "\-\-gpu\-code ,... (\-code)" 4 .IX Item "--gpu-code ,... (-code)" Specify the names of nVidia gpus to generate code for. .Sp Unless option \-export\-dir is specified (see below), nvcc will embed a compiled code image in the resulting executable for each specified 'code' architecture. This code image will be a true binary load image for each 'real' architecture (such as a sm_13), and ptx intermediate code for each virtual architecture (such as compute_10). During runtime, in case no better binary load image is found, and provided that the ptx architecture is compatible with the 'current' \&\s-1GPU\s0, such embedded ptx code will be dynamically translated for this current \&\s-1GPU\s0 by the cuda runtime system. .Sp Architectures specified for this option can be virtual as well as real, but each of these 'code' architectures must be compatible with the architecture specified with option '\-\-gpu\-architecture'. .Sp For instance, 'arch'=compute_13 is not compatible with 'code'=sm_10, because the generated ptx code will assume the availability of compute_13 features that are not present on sm_10. .Sp Allowed values for this option: 'compute_10','compute_11','compute_12','compute_13', \&'sm_10','sm_11','sm_12','sm_13'. .IP "\-\-generate\-code (\-gencode)" 4 .IX Item "--generate-code (-gencode)" This option provides a generalization of the '\-\-gpu\-architecture= \-\-gpu\-code=code, \&...' option combination for specifying nvcc behavior with respect to code generation. Where use of the previous options generates different code for a fixed virtual architecture, option '\-\-generate\-code' allows multiple nvopencc invocations, iterating over different virtual architectures. In fact, .Sp .Vb 1 \& \*(Aq\-\-gpu\-architecture=EarchE \-\-gpu\-code=EcodeE,...\*(Aq .Ve .Sp is equivalent to .Sp .Vb 1 \& \*(Aq\-\-generate\-code arch=EarchE,code=EcodeE,...\*(Aq. .Ve .Sp \&'\-\-generate\-code' options may be repeated for different virtual architectures. .Sp Allowed keywords for this option: 'arch','code'. .IP "\-\-export\-dir (\-dir)" 4 .IX Item "--export-dir (-dir)" Specify the name of a file to which all 'external' code images will be copied, intended as a device code repository that can be inspected by the cuda driver at application runtime when it occurs in the appropriate device code search paths. .Sp This file can be either a directory or a zip file. In either case, this tool will maintain a directory structure in order to facilitate code lookup by the cuda driver. When this option is not used, all 'external' images will be silently discarded. When a directory is specified, but does not currently exist, then it will be created as a common directory (not a zip file). .IP "\-\-extern\-mode (\-ext)" 4 .IX Item "--extern-mode (-ext)" Specify which of the listed images will be copied into the directory specified with option 'export\-dir'. .Sp If this option is not specified, the behavior is as follows: if option 'intern\-mode' is specified then all listed images that are not defined as intern will be considered extern. Otherwise, if neither of these options are specified, then all listed images will be considered as intern. Note that it is allowed to both embed code images and keep them extern. .Sp Allowed values for this option: 'all','none','real','virtual'. .IP "\-\-intern\-mode (\-int)" 4 .IX Item "--intern-mode (-int)" Specify which of the listed images will be copied into the embedded fat binary structure (option 'embedded\-fatbin'). .Sp If this option is not specified, the behavior is as follows: if option 'extern\-mode' is specified then all listed images that are not defined as extern will be considered extern. Otherwise, if neither of these options are specified, then all listed images will be considered as intern. Note that it is allowed to both embed code images and keep them extern. .Sp Allowed values for this option: 'all','none','real','virtual'. .IP "\-\-maxrregcount (\-maxrregcount)" 4 .IX Item "--maxrregcount (-maxrregcount)" Specify the maximum amount of registers that \s-1GPU\s0 functions can use. Until a function\- specific limit, a higher value will generally increase the performance of individual \s-1GPU\s0 threads that execute this function. However, because thread registers are allocated from a global register pool on each \s-1GPU\s0, a higher value of this option will also reduce the maximum thread block size, thereby reducing the amount of thread parallelism. Hence, a good maxrregcount value is the result of a trade-off. .Sp If this option is not specified, then no maximum is assumed. Otherwise the specified value will be rounded to the next multiple of 4 registers until the \s-1GPU\s0 specific maximum of 128 registers. .Sh "Options for steering cuda compilation" .IX Subsection "Options for steering cuda compilation" .IP "\-\-device\-emulation (\-deviceemu)" 4 .IX Item "--device-emulation (-deviceemu)" Generate code for the \s-1GPGPU\s0 emulation library. .IP "\-\-use_fast_math (\-use_fast_math)" 4 .IX Item "--use_fast_math (-use_fast_math)" Make use of fast math library. .IP "\-\-host\-compilation " 4 .IX Item "--host-compilation " Specify C vs. \*(C+ language for host code in \s-1CUDA\s0 source files. .Sp Allowed values for this option: 'C','\*(C+','c','c++'. .Sp Default value: '\*(C+'. .Sh "Generic tool options" .IX Subsection "Generic tool options" .IP "\-\-help (\-h)" 4 .IX Item "--help (-h)" Print help information on this tool. .IP "\-\-version (\-V)" 4 .IX Item "--version (-V)" Print version information on this tool. .IP "\-\-options\-file ,... (\-optf)" 4 .IX Item "--options-file ,... (-optf)" Include command line options from specified file.