1 /**************************************************************************
3 * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
33 #include "api_validate.h"
37 #include "brw_defines.h"
38 #include "brw_context.h"
39 #include "brw_state.h"
40 #include "brw_fallback.h"
42 #include "intel_ioctl.h"
43 #include "intel_batchbuffer.h"
44 #include "intel_buffer_objects.h"
47 struct brw_array_state {
48 union header_union header;
61 struct buffer *buffer;
65 GLuint instance_data_step_rate;
71 static struct buffer *array_buffer( const struct gl_client_array *array )
73 return intel_bufferobj_buffer(intel_buffer_object(array->BufferObj));
76 static GLuint double_types[5] = {
78 BRW_SURFACEFORMAT_R64_FLOAT,
79 BRW_SURFACEFORMAT_R64G64_FLOAT,
80 BRW_SURFACEFORMAT_R64G64B64_FLOAT,
81 BRW_SURFACEFORMAT_R64G64B64A64_FLOAT
84 static GLuint float_types[5] = {
86 BRW_SURFACEFORMAT_R32_FLOAT,
87 BRW_SURFACEFORMAT_R32G32_FLOAT,
88 BRW_SURFACEFORMAT_R32G32B32_FLOAT,
89 BRW_SURFACEFORMAT_R32G32B32A32_FLOAT
92 static GLuint uint_types_norm[5] = {
94 BRW_SURFACEFORMAT_R32_UNORM,
95 BRW_SURFACEFORMAT_R32G32_UNORM,
96 BRW_SURFACEFORMAT_R32G32B32_UNORM,
97 BRW_SURFACEFORMAT_R32G32B32A32_UNORM
100 static GLuint uint_types_scale[5] = {
102 BRW_SURFACEFORMAT_R32_USCALED,
103 BRW_SURFACEFORMAT_R32G32_USCALED,
104 BRW_SURFACEFORMAT_R32G32B32_USCALED,
105 BRW_SURFACEFORMAT_R32G32B32A32_USCALED
108 static GLuint int_types_norm[5] = {
110 BRW_SURFACEFORMAT_R32_SNORM,
111 BRW_SURFACEFORMAT_R32G32_SNORM,
112 BRW_SURFACEFORMAT_R32G32B32_SNORM,
113 BRW_SURFACEFORMAT_R32G32B32A32_SNORM
116 static GLuint int_types_scale[5] = {
118 BRW_SURFACEFORMAT_R32_SSCALED,
119 BRW_SURFACEFORMAT_R32G32_SSCALED,
120 BRW_SURFACEFORMAT_R32G32B32_SSCALED,
121 BRW_SURFACEFORMAT_R32G32B32A32_SSCALED
124 static GLuint ushort_types_norm[5] = {
126 BRW_SURFACEFORMAT_R16_UNORM,
127 BRW_SURFACEFORMAT_R16G16_UNORM,
128 BRW_SURFACEFORMAT_R16G16B16_UNORM,
129 BRW_SURFACEFORMAT_R16G16B16A16_UNORM
132 static GLuint ushort_types_scale[5] = {
134 BRW_SURFACEFORMAT_R16_USCALED,
135 BRW_SURFACEFORMAT_R16G16_USCALED,
136 BRW_SURFACEFORMAT_R16G16B16_USCALED,
137 BRW_SURFACEFORMAT_R16G16B16A16_USCALED
140 static GLuint short_types_norm[5] = {
142 BRW_SURFACEFORMAT_R16_SNORM,
143 BRW_SURFACEFORMAT_R16G16_SNORM,
144 BRW_SURFACEFORMAT_R16G16B16_SNORM,
145 BRW_SURFACEFORMAT_R16G16B16A16_SNORM
148 static GLuint short_types_scale[5] = {
150 BRW_SURFACEFORMAT_R16_SSCALED,
151 BRW_SURFACEFORMAT_R16G16_SSCALED,
152 BRW_SURFACEFORMAT_R16G16B16_SSCALED,
153 BRW_SURFACEFORMAT_R16G16B16A16_SSCALED
156 static GLuint ubyte_types_norm[5] = {
158 BRW_SURFACEFORMAT_R8_UNORM,
159 BRW_SURFACEFORMAT_R8G8_UNORM,
160 BRW_SURFACEFORMAT_R8G8B8_UNORM,
161 BRW_SURFACEFORMAT_R8G8B8A8_UNORM
164 static GLuint ubyte_types_scale[5] = {
166 BRW_SURFACEFORMAT_R8_USCALED,
167 BRW_SURFACEFORMAT_R8G8_USCALED,
168 BRW_SURFACEFORMAT_R8G8B8_USCALED,
169 BRW_SURFACEFORMAT_R8G8B8A8_USCALED
172 static GLuint byte_types_norm[5] = {
174 BRW_SURFACEFORMAT_R8_SNORM,
175 BRW_SURFACEFORMAT_R8G8_SNORM,
176 BRW_SURFACEFORMAT_R8G8B8_SNORM,
177 BRW_SURFACEFORMAT_R8G8B8A8_SNORM
180 static GLuint byte_types_scale[5] = {
182 BRW_SURFACEFORMAT_R8_SSCALED,
183 BRW_SURFACEFORMAT_R8G8_SSCALED,
184 BRW_SURFACEFORMAT_R8G8B8_SSCALED,
185 BRW_SURFACEFORMAT_R8G8B8A8_SSCALED
189 static GLuint get_surface_type( GLenum type, GLuint size, GLboolean normalized )
191 if (INTEL_DEBUG & DEBUG_VERTS)
192 _mesa_printf("type %s size %d normalized %d\n",
193 _mesa_lookup_enum_by_nr(type), size, normalized);
197 case GL_DOUBLE: return double_types[size];
198 case GL_FLOAT: return float_types[size];
199 case GL_INT: return int_types_norm[size];
200 case GL_SHORT: return short_types_norm[size];
201 case GL_BYTE: return byte_types_norm[size];
202 case GL_UNSIGNED_INT: return uint_types_norm[size];
203 case GL_UNSIGNED_SHORT: return ushort_types_norm[size];
204 case GL_UNSIGNED_BYTE: return ubyte_types_norm[size];
205 default: assert(0); return 0;
210 case GL_DOUBLE: return double_types[size];
211 case GL_FLOAT: return float_types[size];
212 case GL_INT: return int_types_scale[size];
213 case GL_SHORT: return short_types_scale[size];
214 case GL_BYTE: return byte_types_scale[size];
215 case GL_UNSIGNED_INT: return uint_types_scale[size];
216 case GL_UNSIGNED_SHORT: return ushort_types_scale[size];
217 case GL_UNSIGNED_BYTE: return ubyte_types_scale[size];
218 default: assert(0); return 0;
224 static GLuint get_size( GLenum type )
227 case GL_DOUBLE: return sizeof(GLdouble);
228 case GL_FLOAT: return sizeof(GLfloat);
229 case GL_INT: return sizeof(GLint);
230 case GL_SHORT: return sizeof(GLshort);
231 case GL_BYTE: return sizeof(GLbyte);
232 case GL_UNSIGNED_INT: return sizeof(GLuint);
233 case GL_UNSIGNED_SHORT: return sizeof(GLushort);
234 case GL_UNSIGNED_BYTE: return sizeof(GLubyte);
239 static GLuint get_index_type(GLenum type)
242 case GL_UNSIGNED_BYTE: return BRW_INDEX_BYTE;
243 case GL_UNSIGNED_SHORT: return BRW_INDEX_WORD;
244 case GL_UNSIGNED_INT: return BRW_INDEX_DWORD;
245 default: assert(0); return 0;
249 static void copy_strided_array( GLubyte *dest,
256 do_memcpy(dest, src, count * size);
260 for (i = 0; i < count; i++) {
261 for (j = 0; j < size; j++)
263 src += (stride - size);
268 static void wrap_buffers( struct brw_context *brw,
271 GLcontext *ctx = &brw->intel.ctx;
273 if (size < BRW_UPLOAD_INIT_SIZE)
274 size = BRW_UPLOAD_INIT_SIZE;
276 brw->vb.upload.buf++;
277 brw->vb.upload.buf %= BRW_NR_UPLOAD_BUFS;
278 brw->vb.upload.offset = 0;
280 ctx->Driver.BufferData(ctx,
285 brw->vb.upload.vbo[brw->vb.upload.buf]);
288 static void get_space( struct brw_context *brw,
290 struct gl_buffer_object **vbo_return,
291 GLuint *offset_return )
293 size = ALIGN(size, 64);
295 if (brw->vb.upload.offset + size > BRW_UPLOAD_INIT_SIZE)
296 wrap_buffers(brw, size);
298 *vbo_return = brw->vb.upload.vbo[brw->vb.upload.buf];
299 *offset_return = brw->vb.upload.offset;
301 brw->vb.upload.offset += size;
306 static struct gl_client_array *
307 copy_array_to_vbo_array( struct brw_context *brw,
309 const struct gl_client_array *array,
313 GLcontext *ctx = &brw->intel.ctx;
314 struct gl_client_array *vbo_array = &brw->vb.vbo_array[i];
315 GLuint size = count * element_size;
316 struct gl_buffer_object *vbo;
320 get_space(brw, size, &vbo, &offset);
322 if (array->StrideB == 0) {
327 new_stride = element_size;
329 vbo_array->Size = array->Size;
330 vbo_array->Type = array->Type;
331 vbo_array->Stride = new_stride;
332 vbo_array->StrideB = new_stride;
333 vbo_array->Ptr = (const void *)offset;
334 vbo_array->Enabled = 1;
335 vbo_array->Normalized = array->Normalized;
336 vbo_array->_MaxElement = array->_MaxElement; /* ? */
337 vbo_array->BufferObj = vbo;
340 GLubyte *map = ctx->Driver.MapBuffer(ctx,
347 copy_strided_array( map,
353 ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, vbo_array->BufferObj);
361 static struct gl_client_array *
362 interleaved_vbo_array( struct brw_context *brw,
364 const struct gl_client_array *uploaded_array,
365 const struct gl_client_array *array,
368 struct gl_client_array *vbo_array = &brw->vb.vbo_array[i];
370 vbo_array->Size = array->Size;
371 vbo_array->Type = array->Type;
372 vbo_array->Stride = array->Stride;
373 vbo_array->StrideB = array->StrideB;
374 vbo_array->Ptr = (const void *)((const char *)uploaded_array->Ptr +
375 ((const char *)array->Ptr - ptr));
376 vbo_array->Enabled = 1;
377 vbo_array->Normalized = array->Normalized;
378 vbo_array->_MaxElement = array->_MaxElement;
379 vbo_array->BufferObj = uploaded_array->BufferObj;
385 GLboolean brw_upload_vertices( struct brw_context *brw,
389 GLcontext *ctx = &brw->intel.ctx;
390 struct intel_context *intel = intel_context(ctx);
391 GLuint tmp = brw->vs.prog_data->inputs_read;
392 struct brw_vertex_element_packet vep;
393 struct brw_array_state vbp;
395 const void *ptr = NULL;
396 GLuint interleave = 0;
398 struct brw_vertex_element *enabled[VERT_ATTRIB_MAX];
399 GLuint nr_enabled = 0;
401 struct brw_vertex_element *upload[VERT_ATTRIB_MAX];
402 GLuint nr_uploads = 0;
405 memset(&vbp, 0, sizeof(vbp));
406 memset(&vep, 0, sizeof(vep));
408 /* First build an array of pointers to ve's in vb.inputs_read
411 _mesa_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index);
414 GLuint i = _mesa_ffsll(tmp)-1;
415 struct brw_vertex_element *input = &brw->vb.inputs[i];
418 enabled[nr_enabled++] = input;
421 input->element_size = get_size(input->glarray->Type) * input->glarray->Size;
422 input->count = input->glarray->StrideB ? max_index + 1 - min_index : 1;
424 if (!input->glarray->BufferObj->Name) {
426 /* Position array not properly enabled:
428 if (input->glarray->StrideB == 0)
431 interleave = input->glarray->StrideB;
432 ptr = input->glarray->Ptr;
434 else if (interleave != input->glarray->StrideB ||
435 (const char *)input->glarray->Ptr - (const char *)ptr < 0 ||
436 (const char *)input->glarray->Ptr - (const char *)ptr > interleave) {
440 upload[nr_uploads++] = input;
442 /* We rebase drawing to start at element zero only when
443 * varyings are not in vbos, which means we can end up
444 * uploading non-varying arrays (stride != 0) when min_index
445 * is zero. This doesn't matter as the amount to upload is
446 * the same for these arrays whether the draw call is rebased
447 * or not - we just have to upload the one element.
449 assert(min_index == 0 || input->glarray->StrideB == 0);
453 /* Upload interleaved arrays if all uploads are interleaved
455 if (nr_uploads > 1 &&
458 struct brw_vertex_element *input0 = upload[0];
460 input0->glarray = copy_array_to_vbo_array(brw, 0,
465 for (i = 1; i < nr_uploads; i++) {
466 upload[i]->glarray = interleaved_vbo_array(brw,
474 for (i = 0; i < nr_uploads; i++) {
475 struct brw_vertex_element *input = upload[i];
477 input->glarray = copy_array_to_vbo_array(brw, i,
485 /* XXX: In the rare cases where this happens we fallback all
486 * the way to software rasterization, although a tnl fallback
487 * would be sufficient. I don't know of *any* real world
488 * cases with > 17 vertex attributes enabled, so it probably
489 * isn't an issue at this point.
491 if (nr_enabled >= BRW_VEP_MAX)
494 /* This still defines a hardware VB for each input, even if they
495 * are interleaved or from the same VBO. TBD if this makes a
496 * performance difference.
498 for (i = 0; i < nr_enabled; i++) {
499 struct brw_vertex_element *input = enabled[i];
501 input->vep = &vep.ve[i];
502 input->vep->ve0.src_format = get_surface_type(input->glarray->Type,
503 input->glarray->Size,
504 input->glarray->Normalized);
505 input->vep->ve0.valid = 1;
506 input->vep->ve1.dst_offset = (i) * 4;
507 input->vep->ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_SRC;
508 input->vep->ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_SRC;
509 input->vep->ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_SRC;
510 input->vep->ve1.vfcomponent0 = BRW_VFCOMPONENT_STORE_SRC;
512 switch (input->glarray->Size) {
513 case 0: input->vep->ve1.vfcomponent0 = BRW_VFCOMPONENT_STORE_0;
514 case 1: input->vep->ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_0;
515 case 2: input->vep->ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_0;
516 case 3: input->vep->ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_1_FLT;
520 input->vep->ve0.vertex_buffer_index = i;
521 input->vep->ve0.src_offset = 0;
523 vbp.vb[i].vb0.bits.pitch = input->glarray->StrideB;
524 vbp.vb[i].vb0.bits.pad = 0;
525 vbp.vb[i].vb0.bits.access_type = BRW_VERTEXBUFFER_ACCESS_VERTEXDATA;
526 vbp.vb[i].vb0.bits.vb_index = i;
527 vbp.vb[i].offset = (GLuint)input->glarray->Ptr;
528 vbp.vb[i].buffer = array_buffer(input->glarray);
529 vbp.vb[i].max_index = max_index;
534 /* Now emit VB and VEP state packets:
536 vbp.header.bits.length = (1 + nr_enabled * 4) - 2;
537 vbp.header.bits.opcode = CMD_VERTEX_BUFFER;
539 BEGIN_BATCH(vbp.header.bits.length+2, 0);
540 OUT_BATCH( vbp.header.dword );
542 for (i = 0; i < nr_enabled; i++) {
543 OUT_BATCH( vbp.vb[i].vb0.dword );
544 OUT_RELOC( vbp.vb[i].buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
546 OUT_BATCH( vbp.vb[i].max_index );
547 OUT_BATCH( vbp.vb[i].instance_data_step_rate );
551 vep.header.length = (1 + nr_enabled * sizeof(vep.ve[0])/4) - 2;
552 vep.header.opcode = CMD_VERTEX_ELEMENT;
553 brw_cached_batch_struct(brw, &vep, 4 + nr_enabled * sizeof(vep.ve[0]));
559 static GLuint element_size( GLenum type )
562 case GL_UNSIGNED_INT: return 4;
563 case GL_UNSIGNED_SHORT: return 2;
564 case GL_UNSIGNED_BYTE: return 1;
565 default: assert(0); return 0;
571 void brw_upload_indices( struct brw_context *brw,
572 const struct _mesa_index_buffer *index_buffer )
574 GLcontext *ctx = &brw->intel.ctx;
575 struct intel_context *intel = &brw->intel;
576 GLuint ib_size = get_size(index_buffer->type) * index_buffer->count;
577 struct gl_buffer_object *bufferobj = index_buffer->obj;
578 GLuint offset = (GLuint)index_buffer->ptr;
580 /* Turn into a proper VBO:
582 if (!bufferobj->Name) {
584 /* Get new bufferobj, offset:
586 get_space(brw, ib_size, &bufferobj, &offset);
590 ctx->Driver.BufferSubData( ctx,
591 GL_ELEMENT_ARRAY_BUFFER_ARB,
597 if (((1 << get_index_type(index_buffer->type)) - 1) & offset) {
598 struct gl_buffer_object *vbo;
600 GLubyte *map = ctx->Driver.MapBuffer(ctx,
601 GL_ELEMENT_ARRAY_BUFFER_ARB,
605 get_space(brw, ib_size, &vbo, &voffset);
607 ctx->Driver.BufferSubData(ctx,
608 GL_ELEMENT_ARRAY_BUFFER_ARB,
613 ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, bufferobj);
620 /* Emit the indexbuffer packet:
623 struct brw_indexbuffer ib;
624 struct buffer *buffer = intel_bufferobj_buffer(intel_buffer_object(bufferobj));
626 memset(&ib, 0, sizeof(ib));
628 ib.header.bits.opcode = CMD_INDEX_BUFFER;
629 ib.header.bits.length = sizeof(ib)/4 - 2;
630 ib.header.bits.index_format = get_index_type(index_buffer->type);
631 ib.header.bits.cut_index_enable = 0;
635 OUT_BATCH( ib.header.dword );
636 OUT_RELOC( buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, offset);
637 OUT_RELOC( buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,