2 * Copyright © 2010 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
26 * GLSL linker implementation
28 * Given a set of shaders that are to be linked to generate a final program,
29 * there are three distinct stages.
31 * In the first stage shaders are partitioned into groups based on the shader
32 * type. All shaders of a particular type (e.g., vertex shaders) are linked
35 * - Undefined references in each shader are resolve to definitions in
37 * - Types and qualifiers of uniforms, outputs, and global variables defined
38 * in multiple shaders with the same name are verified to be the same.
39 * - Initializers for uniforms and global variables defined
40 * in multiple shaders with the same name are verified to be the same.
42 * The result, in the terminology of the GLSL spec, is a set of shader
43 * executables for each processing unit.
45 * After the first stage is complete, a series of semantic checks are performed
46 * on each of the shader executables.
48 * - Each shader executable must define a \c main function.
49 * - Each vertex shader executable must write to \c gl_Position.
50 * - Each fragment shader executable must write to either \c gl_FragData or
53 * In the final stage individual shader executables are linked to create a
54 * complete exectuable.
56 * - Types of uniforms defined in multiple shader stages with the same name
57 * are verified to be the same.
58 * - Initializers for uniforms defined in multiple shader stages with the
59 * same name are verified to be the same.
60 * - Types and qualifiers of outputs defined in one stage are verified to
61 * be the same as the types and qualifiers of inputs defined with the same
62 * name in a later stage.
64 * \author Ian Romanick <ian.d.romanick@intel.com>
69 #include "main/mtypes.h"
70 #include "glsl_symbol_table.h"
71 #include "glsl_parser_extras.h"
73 #include "ir_optimization.h"
75 #include "hash_table.h"
78 * Visitor that determines whether or not a variable is ever written.
80 class find_assignment_visitor : public ir_hierarchical_visitor {
82 find_assignment_visitor(const char *name)
83 : name(name), found(false)
88 virtual ir_visitor_status visit_enter(ir_assignment *ir)
90 ir_variable *const var = ir->lhs->variable_referenced();
92 if (strcmp(name, var->name) == 0) {
97 return visit_continue_with_parent;
100 bool variable_found()
106 const char *name; /**< Find writes to a variable with this name. */
107 bool found; /**< Was a write to the variable found? */
112 invalidate_variable_locations(glsl_shader *sh, enum ir_variable_mode mode,
115 foreach_list(node, &sh->ir) {
116 ir_variable *const var = ((ir_instruction *) node)->as_variable();
118 if ((var == NULL) || (var->mode != (unsigned) mode))
121 /* Only assign locations for generic attributes / varyings / etc.
123 if (var->location >= generic_base)
130 * Determine the number of attribute slots required for a particular type
132 * This code is here because it implements the language rules of a specific
133 * GLSL version. Since it's a property of the language and not a property of
134 * types in general, it doesn't really belong in glsl_type.
137 count_attribute_slots(const glsl_type *t)
139 /* From page 31 (page 37 of the PDF) of the GLSL 1.50 spec:
141 * "A scalar input counts the same amount against this limit as a vec4,
142 * so applications may want to consider packing groups of four
143 * unrelated float inputs together into a vector to better utilize the
144 * capabilities of the underlying hardware. A matrix input will use up
145 * multiple locations. The number of locations used will equal the
146 * number of columns in the matrix."
148 * The spec does not explicitly say how arrays are counted. However, it
149 * should be safe to assume the total number of slots consumed by an array
150 * is the number of entries in the array multiplied by the number of slots
151 * consumed by a single element of the array.
155 return t->array_size() * count_attribute_slots(t->element_type());
158 return t->matrix_columns;
165 * Verify that a vertex shader executable meets all semantic requirements
167 * \param shader Vertex shader executable to be verified
170 validate_vertex_shader_executable(struct glsl_shader *shader)
175 if (!shader->symbols->get_function("main")) {
176 printf("error: vertex shader lacks `main'\n");
180 find_assignment_visitor find("gl_Position");
181 find.run(&shader->ir);
182 if (!find.variable_found()) {
183 printf("error: vertex shader does not write to `gl_Position'\n");
192 * Verify that a fragment shader executable meets all semantic requirements
194 * \param shader Fragment shader executable to be verified
197 validate_fragment_shader_executable(struct glsl_shader *shader)
202 if (!shader->symbols->get_function("main")) {
203 printf("error: fragment shader lacks `main'\n");
207 find_assignment_visitor frag_color("gl_FragColor");
208 find_assignment_visitor frag_data("gl_FragData");
210 frag_color.run(&shader->ir);
211 frag_data.run(&shader->ir);
213 if (!frag_color.variable_found() && !frag_data.variable_found()) {
214 printf("error: fragment shader does not write to `gl_FragColor' or "
219 if (frag_color.variable_found() && frag_data.variable_found()) {
220 printf("error: fragment shader write to both `gl_FragColor' and "
230 * Perform validation of uniforms used across multiple shader stages
233 cross_validate_uniforms(struct glsl_shader **shaders, unsigned num_shaders)
235 /* Examine all of the uniforms in all of the shaders and cross validate
238 glsl_symbol_table uniforms;
239 for (unsigned i = 0; i < num_shaders; i++) {
240 foreach_list(node, &shaders[i]->ir) {
241 ir_variable *const var = ((ir_instruction *) node)->as_variable();
243 if ((var == NULL) || (var->mode != ir_var_uniform))
246 /* If a uniform with this name has already been seen, verify that the
247 * new instance has the same type. In addition, if the uniforms have
248 * initializers, the values of the initializers must be the same.
250 ir_variable *const existing = uniforms.get_variable(var->name);
251 if (existing != NULL) {
252 if (var->type != existing->type) {
253 printf("error: uniform `%s' declared as type `%s' and "
255 var->name, var->type->name, existing->type->name);
259 if (var->constant_value != NULL) {
260 if (existing->constant_value != NULL) {
261 if (!var->constant_value->has_value(existing->constant_value)) {
262 printf("error: initializers for uniform `%s' have "
263 "differing values\n",
268 /* If the first-seen instance of a particular uniform did not
269 * have an initializer but a later instance does, copy the
270 * initializer to the version stored in the symbol table.
272 existing->constant_value = var->constant_value->clone();
275 uniforms.add_variable(var->name, var);
284 * Validate that outputs from one stage match inputs of another
287 cross_validate_outputs_to_inputs(glsl_shader *producer, glsl_shader *consumer)
289 glsl_symbol_table parameters;
290 /* FINISHME: Figure these out dynamically. */
291 const char *const producer_stage = "vertex";
292 const char *const consumer_stage = "fragment";
294 /* Find all shader outputs in the "producer" stage.
296 foreach_list(node, &producer->ir) {
297 ir_variable *const var = ((ir_instruction *) node)->as_variable();
299 /* FINISHME: For geometry shaders, this should also look for inout
300 * FINISHME: variables.
302 if ((var == NULL) || (var->mode != ir_var_out))
305 parameters.add_variable(var->name, var);
309 /* Find all shader inputs in the "consumer" stage. Any variables that have
310 * matching outputs already in the symbol table must have the same type and
313 foreach_list(node, &consumer->ir) {
314 ir_variable *const input = ((ir_instruction *) node)->as_variable();
316 /* FINISHME: For geometry shaders, this should also look for inout
317 * FINISHME: variables.
319 if ((input == NULL) || (input->mode != ir_var_in))
322 ir_variable *const output = parameters.get_variable(input->name);
323 if (output != NULL) {
324 /* Check that the types match between stages.
326 if (input->type != output->type) {
327 printf("error: %s shader output `%s' delcared as type `%s', but "
328 "%s shader input declared as type `%s'\n",
329 producer_stage, output->name, output->type->name,
330 consumer_stage, input->type->name);
334 /* Check that all of the qualifiers match between stages.
336 if (input->centroid != output->centroid) {
337 printf("error: %s shader output `%s' %s centroid qualifier, but "
338 "%s shader input %s centroid qualifier\n",
341 (output->centroid) ? "has" : "lacks",
343 (input->centroid) ? "has" : "lacks");
347 if (input->invariant != output->invariant) {
348 printf("error: %s shader output `%s' %s invariant qualifier, but "
349 "%s shader input %s invariant qualifier\n",
352 (output->invariant) ? "has" : "lacks",
354 (input->invariant) ? "has" : "lacks");
358 if (input->interpolation != output->interpolation) {
359 printf("error: %s shader output `%s' specifies %s interpolation "
361 "but %s shader input specifies %s interpolation "
365 output->interpolation_string(),
367 input->interpolation_string());
377 struct uniform_node {
379 struct gl_uniform *u;
384 assign_uniform_locations(struct glsl_program *prog)
388 unsigned total_uniforms = 0;
389 hash_table *ht = hash_table_ctor(32, hash_table_string_hash,
390 hash_table_string_compare);
392 for (unsigned i = 0; i < prog->_NumLinkedShaders; i++) {
393 unsigned next_position = 0;
395 foreach_list(node, &prog->_LinkedShaders[i]->ir) {
396 ir_variable *const var = ((ir_instruction *) node)->as_variable();
398 if ((var == NULL) || (var->mode != ir_var_uniform))
401 const unsigned vec4_slots = (var->component_slots() + 3) / 4;
402 assert(vec4_slots != 0);
404 uniform_node *n = (uniform_node *) hash_table_find(ht, var->name);
406 n = (uniform_node *) calloc(1, sizeof(struct uniform_node));
407 n->u = (gl_uniform *) calloc(vec4_slots, sizeof(struct gl_uniform));
408 n->slots = vec4_slots;
410 n->u[0].Name = strdup(var->name);
411 for (unsigned j = 1; j < vec4_slots; j++)
412 n->u[j].Name = n->u[0].Name;
414 hash_table_insert(ht, n, n->u[0].Name);
415 uniforms.push_tail(& n->link);
416 total_uniforms += vec4_slots;
419 if (var->constant_value != NULL)
420 for (unsigned j = 0; j < vec4_slots; j++)
421 n->u[j].Initialized = true;
423 var->location = next_position;
425 for (unsigned j = 0; j < vec4_slots; j++) {
426 switch (prog->_LinkedShaders[i]->Type) {
427 case GL_VERTEX_SHADER:
428 n->u[j].VertPos = next_position;
430 case GL_FRAGMENT_SHADER:
431 n->u[j].FragPos = next_position;
433 case GL_GEOMETRY_SHADER:
434 /* FINISHME: Support geometry shaders. */
435 assert(prog->_LinkedShaders[i]->Type != GL_GEOMETRY_SHADER);
444 gl_uniform_list *ul = (gl_uniform_list *)
445 calloc(1, sizeof(gl_uniform_list));
447 ul->Size = total_uniforms;
448 ul->NumUniforms = total_uniforms;
449 ul->Uniforms = (gl_uniform *) calloc(total_uniforms, sizeof(gl_uniform));
453 for (uniform_node *node = (uniform_node *) uniforms.head
454 ; node->link.next != NULL
456 next = (uniform_node *) node->link.next;
459 memcpy(&ul->Uniforms[idx], node->u, sizeof(gl_uniform) * node->slots);
473 * Find a contiguous set of available bits in a bitmask
475 * \param used_mask Bits representing used (1) and unused (0) locations
476 * \param needed_count Number of contiguous bits needed.
479 * Base location of the available bits on success or -1 on failure.
482 find_available_slots(unsigned used_mask, unsigned needed_count)
484 unsigned needed_mask = (1 << needed_count) - 1;
485 const int max_bit_to_test = (8 * sizeof(used_mask)) - needed_count;
487 /* The comparison to 32 is redundant, but without it GCC emits "warning:
488 * cannot optimize possibly infinite loops" for the loop below.
490 if ((needed_count == 0) || (max_bit_to_test < 0) || (max_bit_to_test > 32))
493 for (int i = 0; i <= max_bit_to_test; i++) {
494 if ((needed_mask & ~used_mask) == needed_mask)
505 assign_attribute_locations(glsl_shader *sh,
506 struct gl_program_parameter_list *attrib,
507 unsigned max_attribute_index)
509 /* Mark invalid attribute locations as being used.
511 unsigned used_locations = (max_attribute_index >= 32)
512 ? ~0 : ~((1 << max_attribute_index) - 1);
514 assert(sh->Type == GL_VERTEX_SHADER);
516 /* Operate in a total of four passes.
518 * 1. Invalidate the location assignments for all vertex shader inputs.
520 * 2. Assign locations for inputs that have user-defined (via
521 * glBindVertexAttribLocation) locatoins.
523 * 3. Sort the attributes without assigned locations by number of slots
524 * required in decreasing order. Fragmentation caused by attribute
525 * locations assigned by the application may prevent large attributes
526 * from having enough contiguous space.
528 * 4. Assign locations to any inputs without assigned locations.
531 invalidate_variable_locations(sh, ir_var_in, VERT_ATTRIB_GENERIC0);
533 if (attrib != NULL) {
534 for (unsigned i = 0; i < attrib->NumParameters; i++) {
535 ir_variable *const var =
536 sh->symbols->get_variable(attrib->Parameters[i].Name);
538 /* Note: attributes that occupy multiple slots, such as arrays or
539 * matrices, may appear in the attrib array multiple times.
541 if ((var == NULL) || (var->location != -1))
544 /* From page 61 of the OpenGL 4.0 spec:
546 * "LinkProgram will fail if the attribute bindings assigned by
547 * BindAttribLocation do not leave not enough space to assign a
548 * location for an active matrix attribute or an active attribute
549 * array, both of which require multiple contiguous generic
552 * Previous versions of the spec contain similar language but omit the
553 * bit about attribute arrays.
555 * Page 61 of the OpenGL 4.0 spec also says:
557 * "It is possible for an application to bind more than one
558 * attribute name to the same location. This is referred to as
559 * aliasing. This will only work if only one of the aliased
560 * attributes is active in the executable program, or if no path
561 * through the shader consumes more than one attribute of a set
562 * of attributes aliased to the same location. A link error can
563 * occur if the linker determines that every path through the
564 * shader consumes multiple aliased attributes, but
565 * implementations are not required to generate an error in this
568 * These two paragraphs are either somewhat contradictory, or I don't
569 * fully understand one or both of them.
571 /* FINISHME: The code as currently written does not support attribute
572 * FINISHME: location aliasing (see comment above).
574 const int attr = attrib->Parameters[i].StateIndexes[0];
575 const unsigned slots = count_attribute_slots(var->type);
577 /* Mask representing the contiguous slots that will be used by this
580 const unsigned use_mask = (1 << slots) - 1;
582 /* Generate a link error if the set of bits requested for this
583 * attribute overlaps any previously allocated bits.
585 if ((~(use_mask << attr) & used_locations) != used_locations) {
586 printf("error: insufficient contiguous attribute locations "
587 "available for vertex shader input `%s'",
592 var->location = VERT_ATTRIB_GENERIC0 + attr;
593 used_locations |= (use_mask << attr);
597 /* Temporary storage for the set of attributes that need locations assigned.
603 /* Used below in the call to qsort. */
604 static int compare(const void *a, const void *b)
606 const temp_attr *const l = (const temp_attr *) a;
607 const temp_attr *const r = (const temp_attr *) b;
609 /* Reversed because we want a descending order sort below. */
610 return r->slots - l->slots;
614 unsigned num_attr = 0;
616 foreach_list(node, &sh->ir) {
617 ir_variable *const var = ((ir_instruction *) node)->as_variable();
619 if ((var == NULL) || (var->mode != ir_var_in))
622 /* The location was explicitly assigned, nothing to do here.
624 if (var->location != -1)
627 to_assign[num_attr].slots = count_attribute_slots(var->type);
628 to_assign[num_attr].var = var;
632 /* If all of the attributes were assigned locations by the application (or
633 * are built-in attributes with fixed locations), return early. This should
634 * be the common case.
639 qsort(to_assign, num_attr, sizeof(to_assign[0]), temp_attr::compare);
641 for (unsigned i = 0; i < num_attr; i++) {
642 /* Mask representing the contiguous slots that will be used by this
645 const unsigned use_mask = (1 << to_assign[i].slots) - 1;
647 int location = find_available_slots(used_locations, to_assign[i].slots);
650 printf("error: insufficient contiguous attribute locations "
651 "available for vertex shader input `%s'",
652 to_assign[i].var->name);
656 to_assign[i].var->location = VERT_ATTRIB_GENERIC0 + location;
657 used_locations |= (use_mask << location);
665 link_shaders(struct glsl_program *prog)
667 prog->LinkStatus = false;
668 prog->Validated = false;
671 /* Separate the shaders into groups based on their type.
673 struct glsl_shader **vert_shader_list;
674 unsigned num_vert_shaders = 0;
675 struct glsl_shader **frag_shader_list;
676 unsigned num_frag_shaders = 0;
678 vert_shader_list = (struct glsl_shader **)
679 calloc(2 * prog->NumShaders, sizeof(struct glsl_shader *));
680 frag_shader_list = &vert_shader_list[prog->NumShaders];
682 for (unsigned i = 0; i < prog->NumShaders; i++) {
683 switch (prog->Shaders[i]->Type) {
684 case GL_VERTEX_SHADER:
685 vert_shader_list[num_vert_shaders] = prog->Shaders[i];
688 case GL_FRAGMENT_SHADER:
689 frag_shader_list[num_frag_shaders] = prog->Shaders[i];
692 case GL_GEOMETRY_SHADER:
693 /* FINISHME: Support geometry shaders. */
694 assert(prog->Shaders[i]->Type != GL_GEOMETRY_SHADER);
699 /* FINISHME: Implement intra-stage linking. */
700 assert(num_vert_shaders <= 1);
701 assert(num_frag_shaders <= 1);
703 /* Verify that each of the per-target executables is valid.
705 if (!validate_vertex_shader_executable(vert_shader_list[0])
706 || !validate_fragment_shader_executable(frag_shader_list[0]))
710 /* FINISHME: Perform inter-stage linking. */
711 prog->_LinkedShaders = (struct glsl_shader **)
712 calloc(2, sizeof(struct glsl_shader *));
713 prog->_NumLinkedShaders = 0;
715 if (num_vert_shaders > 0) {
716 prog->_LinkedShaders[prog->_NumLinkedShaders] = vert_shader_list[0];
717 prog->_NumLinkedShaders++;
720 if (num_frag_shaders > 0) {
721 prog->_LinkedShaders[prog->_NumLinkedShaders] = frag_shader_list[0];
722 prog->_NumLinkedShaders++;
725 if (cross_validate_uniforms(prog->_LinkedShaders, prog->_NumLinkedShaders)) {
726 /* Validate the inputs of each stage with the output of the preceeding
729 for (unsigned i = 1; i < prog->_NumLinkedShaders; i++) {
730 if (!cross_validate_outputs_to_inputs(prog->_LinkedShaders[i - 1],
731 prog->_LinkedShaders[i]))
735 prog->LinkStatus = true;
738 /* FINISHME: Perform whole-program optimization here. */
740 assign_uniform_locations(prog);
742 if (prog->_LinkedShaders[0]->Type == GL_VERTEX_SHADER)
743 /* FINISHME: The value of the max_attribute_index parameter is
744 * FINISHME: implementation dependent based on the value of
745 * FINISHME: GL_MAX_VERTEX_ATTRIBS. GL_MAX_VERTEX_ATTRIBS must be
746 * FINISHME: at least 16, so hardcode 16 for now.
748 if (!assign_attribute_locations(prog->_LinkedShaders[0],
753 /* FINISHME: Assign vertex shader output / fragment shader input
754 * FINISHME: locations.
757 /* FINISHME: Assign fragment shader output locations. */
759 /* FINISHME: Generate code here. */
762 free(vert_shader_list);