2 * Copyright 2014 VMware, Inc.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
13 * The above copyright notice and this permission notice (including the
14 * next paragraph) shall be included in all copies or substantial portions
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
20 * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
21 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
22 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
23 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 * This utility transforms the geometry shader to emulate point sprite by
29 * drawing a quad. It also adds an extra output for the original point position
30 * if the point position is to be written to a stream output buffer.
31 * Note: It assumes the driver will add a constant for the inverse viewport
32 * after the user defined constants.
35 #include "util/u_debug.h"
36 #include "util/u_math.h"
37 #include "tgsi_info.h"
38 #include "tgsi_point_sprite.h"
39 #include "tgsi_transform.h"
40 #include "pipe/p_state.h"
42 #define INVALID_INDEX 9999
44 /* Set swizzle based on the immediates (0, 1, 0, -1) */
45 static inline unsigned
46 set_swizzle(int x, int y, int z, int w)
48 static const unsigned map[3] = {TGSI_SWIZZLE_W, TGSI_SWIZZLE_X,
59 return map[x+1] | (map[y+1] << 2) | (map[z+1] << 4) | (map[w+1] << 6);
62 static inline unsigned
63 get_swizzle(unsigned swizzle, unsigned component)
65 assert(component < 4);
66 return (swizzle >> (component * 2)) & 0x3;
69 struct psprite_transform_context
71 struct tgsi_transform_context base;
74 unsigned num_orig_out;
77 unsigned point_size_in; // point size input
78 unsigned point_size_out; // point size output
79 unsigned point_size_tmp; // point size temp
80 unsigned point_pos_in; // point pos input
81 unsigned point_pos_out; // point pos output
82 unsigned point_pos_sout; // original point pos for streamout
83 unsigned point_pos_tmp; // point pos temp
84 unsigned point_scale_tmp; // point scale temp
85 unsigned point_color_out; // point color output
86 unsigned point_color_tmp; // point color temp
87 unsigned point_imm; // point immediates
88 unsigned point_ivp; // point inverseViewport constant
89 unsigned point_dir_swz[4]; // point direction swizzle
90 unsigned point_coord_swz[4]; // point coord swizzle
91 unsigned point_coord_enable; // point coord enable mask
92 unsigned point_coord_decl; // point coord output declared mask
93 unsigned point_coord_out; // point coord output starting index
94 unsigned point_coord_aa; // aa point coord semantic index
95 unsigned point_coord_k; // aa point coord threshold distance
96 unsigned stream_out_point_pos:1; // set if to stream out original point pos
97 unsigned aa_point:1; // set if doing aa point
98 unsigned out_tmp_index[PIPE_MAX_SHADER_OUTPUTS];
102 static inline struct psprite_transform_context *
103 psprite_transform_context(struct tgsi_transform_context *ctx)
105 return (struct psprite_transform_context *) ctx;
110 * TGSI declaration transform callback.
113 psprite_decl(struct tgsi_transform_context *ctx,
114 struct tgsi_full_declaration *decl)
116 struct psprite_transform_context *ts = psprite_transform_context(ctx);
118 if (decl->Declaration.File == TGSI_FILE_INPUT) {
119 if (decl->Semantic.Name == TGSI_SEMANTIC_PSIZE) {
120 ts->point_size_in = decl->Range.First;
122 else if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
123 ts->point_pos_in = decl->Range.First;
126 else if (decl->Declaration.File == TGSI_FILE_OUTPUT) {
127 if (decl->Semantic.Name == TGSI_SEMANTIC_PSIZE) {
128 ts->point_size_out = decl->Range.First;
130 else if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
131 ts->point_pos_out = decl->Range.First;
133 else if (decl->Semantic.Name == TGSI_SEMANTIC_GENERIC &&
134 decl->Semantic.Index < 32) {
135 ts->point_coord_decl |= 1 << decl->Semantic.Index;
136 ts->max_generic = MAX2(ts->max_generic, decl->Semantic.Index);
138 ts->num_out = MAX2(ts->num_out, decl->Range.Last + 1);
140 else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
141 ts->num_tmp = MAX2(ts->num_tmp, decl->Range.Last + 1);
143 else if (decl->Declaration.File == TGSI_FILE_CONSTANT) {
144 ts->num_const = MAX2(ts->num_const, decl->Range.Last + 1);
147 ctx->emit_declaration(ctx, decl);
151 * TGSI immediate declaration transform callback.
154 psprite_immediate(struct tgsi_transform_context *ctx,
155 struct tgsi_full_immediate *imm)
157 struct psprite_transform_context *ts = psprite_transform_context(ctx);
159 ctx->emit_immediate(ctx, imm);
165 * TGSI transform prolog callback.
168 psprite_prolog(struct tgsi_transform_context *ctx)
170 struct psprite_transform_context *ts = psprite_transform_context(ctx);
171 unsigned point_coord_enable, en;
174 /* Replace output registers with temporary registers */
175 for (i = 0; i < ts->num_out; i++) {
176 ts->out_tmp_index[i] = ts->num_tmp++;
178 ts->num_orig_out = ts->num_out;
180 /* Declare a tmp register for point scale */
181 ts->point_scale_tmp = ts->num_tmp++;
183 if (ts->point_size_out != INVALID_INDEX)
184 ts->point_size_tmp = ts->out_tmp_index[ts->point_size_out];
186 ts->point_size_tmp = ts->num_tmp++;
188 assert(ts->point_pos_out != INVALID_INDEX);
189 ts->point_pos_tmp = ts->out_tmp_index[ts->point_pos_out];
190 ts->out_tmp_index[ts->point_pos_out] = INVALID_INDEX;
192 /* Declare one more tmp register for point coord threshold distance
193 * if we are generating anti-aliased point.
196 ts->point_coord_k = ts->num_tmp++;
198 tgsi_transform_temps_decl(ctx, ts->point_size_tmp, ts->num_tmp-1);
200 /* Declare an extra output for the original point position for stream out */
201 if (ts->stream_out_point_pos) {
202 ts->point_pos_sout = ts->num_out++;
203 tgsi_transform_output_decl(ctx, ts->point_pos_sout,
204 TGSI_SEMANTIC_GENERIC, 0, 0);
207 /* point coord outputs to be declared */
208 point_coord_enable = ts->point_coord_enable & ~ts->point_coord_decl;
210 /* Declare outputs for those point coord that are enabled but are not
211 * already declared in this shader.
213 ts->point_coord_out = ts->num_out;
214 if (point_coord_enable) {
215 for (i = 0, en = point_coord_enable; en; en>>=1, i++) {
217 tgsi_transform_output_decl(ctx, ts->num_out++,
218 TGSI_SEMANTIC_GENERIC, i, 0);
219 ts->max_generic = MAX2(ts->max_generic, i);
224 /* add an extra generic output for aa point texcoord */
226 ts->point_coord_aa = ts->max_generic + 1;
227 assert((ts->point_coord_enable & (1 << ts->point_coord_aa)) == 0);
228 ts->point_coord_enable |= 1 << (ts->point_coord_aa);
229 tgsi_transform_output_decl(ctx, ts->num_out++, TGSI_SEMANTIC_GENERIC,
230 ts->point_coord_aa, 0);
233 /* Declare extra immediates */
234 ts->point_imm = ts->num_imm;
235 tgsi_transform_immediate_decl(ctx, 0, 1, 0.5, -1);
237 /* Declare point constant -
238 * constant.xy -- inverseViewport
239 * constant.z -- current point size
240 * constant.w -- max point size
241 * The driver needs to add this constant to the constant buffer
243 ts->point_ivp = ts->num_const++;
244 tgsi_transform_const_decl(ctx, ts->point_ivp, ts->point_ivp);
246 /* If this geometry shader does not specify point size,
247 * get the current point size from the point constant.
249 if (ts->point_size_out == INVALID_INDEX) {
250 struct tgsi_full_instruction inst;
252 inst = tgsi_default_full_instruction();
253 inst.Instruction.Opcode = TGSI_OPCODE_MOV;
254 inst.Instruction.NumDstRegs = 1;
255 tgsi_transform_dst_reg(&inst.Dst[0], TGSI_FILE_TEMPORARY,
256 ts->point_size_tmp, TGSI_WRITEMASK_XYZW);
257 inst.Instruction.NumSrcRegs = 1;
258 tgsi_transform_src_reg(&inst.Src[0], TGSI_FILE_CONSTANT,
259 ts->point_ivp, TGSI_SWIZZLE_Z,
260 TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z);
261 ctx->emit_instruction(ctx, &inst);
267 * Add the point sprite emulation instructions at the emit vertex instruction
270 psprite_emit_vertex_inst(struct tgsi_transform_context *ctx,
271 struct tgsi_full_instruction *vert_inst)
273 struct psprite_transform_context *ts = psprite_transform_context(ctx);
274 struct tgsi_full_instruction inst;
275 unsigned point_coord_enable, en;
278 /* new point coord outputs */
279 point_coord_enable = ts->point_coord_enable & ~ts->point_coord_decl;
281 /* OUTPUT[pos_sout] = TEMP[pos] */
282 if (ts->point_pos_sout != INVALID_INDEX) {
283 tgsi_transform_op1_inst(ctx, TGSI_OPCODE_MOV,
284 TGSI_FILE_OUTPUT, ts->point_pos_sout,
286 TGSI_FILE_TEMPORARY, ts->point_pos_tmp);
290 * Set up the point scale vector
291 * scale = pointSize * pos.w * inverseViewport
294 /* MUL point_scale.x, point_size.x, point_pos.w */
295 tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MUL,
296 TGSI_FILE_TEMPORARY, ts->point_scale_tmp, TGSI_WRITEMASK_X,
297 TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_SWIZZLE_X,
298 TGSI_FILE_TEMPORARY, ts->point_pos_tmp, TGSI_SWIZZLE_W);
300 /* MUL point_scale.xy, point_scale.xx, inverseViewport.xy */
301 inst = tgsi_default_full_instruction();
302 inst.Instruction.Opcode = TGSI_OPCODE_MUL;
303 inst.Instruction.NumDstRegs = 1;
304 tgsi_transform_dst_reg(&inst.Dst[0], TGSI_FILE_TEMPORARY,
305 ts->point_scale_tmp, TGSI_WRITEMASK_XY);
306 inst.Instruction.NumSrcRegs = 2;
307 tgsi_transform_src_reg(&inst.Src[0], TGSI_FILE_TEMPORARY,
308 ts->point_scale_tmp, TGSI_SWIZZLE_X,
309 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
310 tgsi_transform_src_reg(&inst.Src[1], TGSI_FILE_CONSTANT,
311 ts->point_ivp, TGSI_SWIZZLE_X,
312 TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z);
313 ctx->emit_instruction(ctx, &inst);
316 * Set up the point coord threshold distance
317 * k = 0.5 - 1 / pointsize
320 tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_DIV,
321 TGSI_FILE_TEMPORARY, ts->point_coord_k,
323 TGSI_FILE_IMMEDIATE, ts->point_imm,
325 TGSI_FILE_TEMPORARY, ts->point_size_tmp,
328 tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_SUB,
329 TGSI_FILE_TEMPORARY, ts->point_coord_k,
331 TGSI_FILE_IMMEDIATE, ts->point_imm,
333 TGSI_FILE_TEMPORARY, ts->point_coord_k,
338 for (i = 0; i < 4; i++) {
339 unsigned point_dir_swz = ts->point_dir_swz[i];
340 unsigned point_coord_swz = ts->point_coord_swz[i];
342 /* All outputs need to be emitted for each vertex */
343 for (j = 0; j < ts->num_orig_out; j++) {
344 if (ts->out_tmp_index[j] != INVALID_INDEX) {
345 tgsi_transform_op1_inst(ctx, TGSI_OPCODE_MOV,
348 TGSI_FILE_TEMPORARY, ts->out_tmp_index[j]);
352 /* pos = point_scale * point_dir + point_pos */
353 inst = tgsi_default_full_instruction();
354 inst.Instruction.Opcode = TGSI_OPCODE_MAD;
355 inst.Instruction.NumDstRegs = 1;
356 tgsi_transform_dst_reg(&inst.Dst[0], TGSI_FILE_OUTPUT, ts->point_pos_out,
357 TGSI_WRITEMASK_XYZW);
358 inst.Instruction.NumSrcRegs = 3;
359 tgsi_transform_src_reg(&inst.Src[0], TGSI_FILE_TEMPORARY, ts->point_scale_tmp,
360 TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X,
362 tgsi_transform_src_reg(&inst.Src[1], TGSI_FILE_IMMEDIATE, ts->point_imm,
363 get_swizzle(point_dir_swz, 0),
364 get_swizzle(point_dir_swz, 1),
365 get_swizzle(point_dir_swz, 2),
366 get_swizzle(point_dir_swz, 3));
367 tgsi_transform_src_reg(&inst.Src[2], TGSI_FILE_TEMPORARY, ts->point_pos_tmp,
368 TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z,
370 ctx->emit_instruction(ctx, &inst);
373 for (j = 0, s = 0, en = point_coord_enable; en; en>>=1, s++) {
377 dstReg = ts->point_coord_out + j;
379 inst = tgsi_default_full_instruction();
380 inst.Instruction.Opcode = TGSI_OPCODE_MOV;
381 inst.Instruction.NumDstRegs = 1;
382 tgsi_transform_dst_reg(&inst.Dst[0], TGSI_FILE_OUTPUT,
383 dstReg, TGSI_WRITEMASK_XYZW);
384 inst.Instruction.NumSrcRegs = 1;
385 tgsi_transform_src_reg(&inst.Src[0], TGSI_FILE_IMMEDIATE, ts->point_imm,
386 get_swizzle(point_coord_swz, 0),
387 get_swizzle(point_coord_swz, 1),
388 get_swizzle(point_coord_swz, 2),
389 get_swizzle(point_coord_swz, 3));
390 ctx->emit_instruction(ctx, &inst);
392 /* MOV point_coord.z point_coord_k.x */
393 if (s == ts->point_coord_aa) {
394 tgsi_transform_op1_swz_inst(ctx, TGSI_OPCODE_MOV,
395 TGSI_FILE_OUTPUT, dstReg, TGSI_WRITEMASK_Z,
396 TGSI_FILE_TEMPORARY, ts->point_coord_k,
399 j++; /* the next point coord output offset */
403 /* Emit the EMIT instruction for each vertex of the quad */
404 ctx->emit_instruction(ctx, vert_inst);
407 /* Emit the ENDPRIM instruction for the quad */
408 inst = tgsi_default_full_instruction();
409 inst.Instruction.Opcode = TGSI_OPCODE_ENDPRIM;
410 inst.Instruction.NumDstRegs = 0;
411 inst.Instruction.NumSrcRegs = 1;
412 inst.Src[0] = vert_inst->Src[0];
413 ctx->emit_instruction(ctx, &inst);
418 * TGSI instruction transform callback.
421 psprite_inst(struct tgsi_transform_context *ctx,
422 struct tgsi_full_instruction *inst)
424 struct psprite_transform_context *ts = psprite_transform_context(ctx);
426 if (inst->Instruction.Opcode == TGSI_OPCODE_EMIT) {
427 psprite_emit_vertex_inst(ctx, inst);
429 else if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT &&
430 inst->Dst[0].Register.Index == ts->point_size_out) {
432 * Replace point size output reg with tmp reg.
433 * The tmp reg will be later used as a src reg for computing
434 * the point scale factor.
436 inst->Dst[0].Register.File = TGSI_FILE_TEMPORARY;
437 inst->Dst[0].Register.Index = ts->point_size_tmp;
438 ctx->emit_instruction(ctx, inst);
440 /* Clamp the point size */
441 /* MAX point_size_tmp.x, point_size_tmp.x, point_imm.y */
442 tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MAX,
443 TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_WRITEMASK_X,
444 TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_SWIZZLE_X,
445 TGSI_FILE_IMMEDIATE, ts->point_imm, TGSI_SWIZZLE_Y);
447 /* MIN point_size_tmp.x, point_size_tmp.x, point_ivp.w */
448 tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MIN,
449 TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_WRITEMASK_X,
450 TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_SWIZZLE_X,
451 TGSI_FILE_CONSTANT, ts->point_ivp, TGSI_SWIZZLE_W);
453 else if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT &&
454 inst->Dst[0].Register.Index == ts->point_pos_out) {
456 * Replace point pos output reg with tmp reg.
458 inst->Dst[0].Register.File = TGSI_FILE_TEMPORARY;
459 inst->Dst[0].Register.Index = ts->point_pos_tmp;
460 ctx->emit_instruction(ctx, inst);
462 else if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) {
464 * Replace output reg with tmp reg.
466 inst->Dst[0].Register.File = TGSI_FILE_TEMPORARY;
467 inst->Dst[0].Register.Index = ts->out_tmp_index[inst->Dst[0].Register.Index];
468 ctx->emit_instruction(ctx, inst);
471 ctx->emit_instruction(ctx, inst);
477 * TGSI property instruction transform callback.
478 * Transforms a point into a 4-vertex triangle strip.
481 psprite_property(struct tgsi_transform_context *ctx,
482 struct tgsi_full_property *prop)
484 switch (prop->Property.PropertyName) {
485 case TGSI_PROPERTY_GS_OUTPUT_PRIM:
486 prop->u[0].Data = PIPE_PRIM_TRIANGLE_STRIP;
488 case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
489 prop->u[0].Data *= 4;
494 ctx->emit_property(ctx, prop);
498 * TGSI utility to transform a geometry shader to support point sprite.
501 tgsi_add_point_sprite(const struct tgsi_token *tokens_in,
502 const unsigned point_coord_enable,
503 const bool sprite_origin_lower_left,
504 const bool stream_out_point_pos,
505 int *aa_point_coord_index)
507 struct psprite_transform_context transform;
508 const uint num_new_tokens = 200; /* should be enough */
509 const uint new_len = tgsi_num_tokens(tokens_in) + num_new_tokens;
510 struct tgsi_token *new_tokens;
512 /* setup transformation context */
513 memset(&transform, 0, sizeof(transform));
514 transform.base.transform_declaration = psprite_decl;
515 transform.base.transform_instruction = psprite_inst;
516 transform.base.transform_property = psprite_property;
517 transform.base.transform_immediate = psprite_immediate;
518 transform.base.prolog = psprite_prolog;
520 transform.point_size_in = INVALID_INDEX;
521 transform.point_size_out = INVALID_INDEX;
522 transform.point_size_tmp = INVALID_INDEX;
523 transform.point_pos_in = INVALID_INDEX;
524 transform.point_pos_out = INVALID_INDEX;
525 transform.point_pos_sout = INVALID_INDEX;
526 transform.point_pos_tmp = INVALID_INDEX;
527 transform.point_scale_tmp = INVALID_INDEX;
528 transform.point_imm = INVALID_INDEX;
529 transform.point_coord_aa = INVALID_INDEX;
530 transform.point_coord_k = INVALID_INDEX;
532 transform.stream_out_point_pos = stream_out_point_pos;
533 transform.point_coord_enable = point_coord_enable;
534 transform.aa_point = aa_point_coord_index != NULL;
535 transform.max_generic = -1;
537 /* point sprite directions based on the immediates (0, 1, 0.5, -1) */
539 transform.point_dir_swz[0] = set_swizzle(-1, -1, 0, 0);
541 transform.point_dir_swz[1] = set_swizzle(-1, 1, 0, 0);
543 transform.point_dir_swz[2] = set_swizzle(1, -1, 0, 0);
545 transform.point_dir_swz[3] = set_swizzle(1, 1, 0, 0);
547 /* point coord based on the immediates (0, 1, 0, -1) */
548 if (sprite_origin_lower_left) {
550 transform.point_coord_swz[0] = set_swizzle(0, 0, 0, 1);
552 transform.point_coord_swz[1] = set_swizzle(0, 1, 0, 1);
554 transform.point_coord_swz[2] = set_swizzle(1, 0, 0, 1);
556 transform.point_coord_swz[3] = set_swizzle(1, 1, 0, 1);
560 transform.point_coord_swz[0] = set_swizzle(0, 1, 0, 1);
562 transform.point_coord_swz[1] = set_swizzle(0, 0, 0, 1);
564 transform.point_coord_swz[2] = set_swizzle(1, 1, 0, 1);
566 transform.point_coord_swz[3] = set_swizzle(1, 0, 0, 1);
570 /* allocate new tokens buffer */
571 new_tokens = tgsi_alloc_tokens(new_len);
575 /* transform the shader */
576 tgsi_transform_shader(tokens_in, new_tokens, new_len, &transform.base);
578 if (aa_point_coord_index)
579 *aa_point_coord_index = transform.point_coord_aa;