OSDN Git Service

i965_drv_video: add support for H264 on Clarkdale/Arrandale
authorXiang, Haihao <haihao.xiang@intel.com>
Fri, 26 Mar 2010 04:48:50 +0000 (12:48 +0800)
committerXiang, Haihao <haihao.xiang@intel.com>
Fri, 26 Mar 2010 04:48:50 +0000 (12:48 +0800)
191 files changed:
configure.ac
i965_drv_video/Makefile.am
i965_drv_video/i965_avc_bsd.c [new file with mode: 0644]
i965_drv_video/i965_avc_bsd.h [new file with mode: 0644]
i965_drv_video/i965_avc_hw_scoreboard.c [new file with mode: 0644]
i965_drv_video/i965_avc_hw_scoreboard.h [new file with mode: 0644]
i965_drv_video/i965_defines.h
i965_drv_video/i965_drv_video.c
i965_drv_video/i965_drv_video.h
i965_drv_video/i965_media.c
i965_drv_video/i965_media.h
i965_drv_video/i965_media_h264.c [new file with mode: 0644]
i965_drv_video/i965_media_h264.h [new file with mode: 0644]
i965_drv_video/i965_media_mpeg2.c
i965_drv_video/i965_render.c
i965_drv_video/i965_render.h
i965_drv_video/i965_structs.h
i965_drv_video/intel_batchbuffer.c
i965_drv_video/intel_batchbuffer.h
i965_drv_video/intel_batchbuffer_dump.c [new file with mode: 0644]
i965_drv_video/intel_batchbuffer_dump.h [new file with mode: 0644]
i965_drv_video/intel_driver.h
i965_drv_video/object_heap.c
i965_drv_video/shaders/Makefile.am
i965_drv_video/shaders/h264/Makefile.am [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/AVC_ILDB.inc [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/AVC_ILDB_Child_Field_UV.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/AVC_ILDB_Child_Field_Y.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/AVC_ILDB_Child_Mbaff_UV.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/AVC_ILDB_Child_Mbaff_Y.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/AVC_ILDB_Child_UV.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/AVC_ILDB_Child_Y.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/AVC_ILDB_Chroma_Core.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/AVC_ILDB_Chroma_Core_Mbaff.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/AVC_ILDB_CloseGateway.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/AVC_ILDB_Dep_Check.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/AVC_ILDB_Filter_Mbaff_UV_h.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/AVC_ILDB_Filter_Mbaff_UV_v.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/AVC_ILDB_Filter_Mbaff_Y_h.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/AVC_ILDB_Filter_Mbaff_Y_v.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/AVC_ILDB_Filter_UV_h.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/AVC_ILDB_Filter_UV_v.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/AVC_ILDB_Filter_Y_h.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/AVC_ILDB_Filter_Y_v.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/AVC_ILDB_ForwardMsg.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/AVC_ILDB_LumaThrdLimit.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/AVC_ILDB_Luma_Core.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/AVC_ILDB_Luma_Core_Mbaff.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/AVC_ILDB_OpenGateway.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/AVC_ILDB_Root_Field_UV.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/AVC_ILDB_Root_Field_Y.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/AVC_ILDB_Root_Mbaff_UV.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/AVC_ILDB_Root_Mbaff_Y.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/AVC_ILDB_Root_UV.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/AVC_ILDB_Root_Y.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/AVC_ILDB_Spawn.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/AVC_ILDB_SpawnChild.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/AVC_ILDB_SpawnChromaRoot.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/Child_Undefs.inc [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/ILDB_header.inc [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/Load_ILDB_Cntrl_Data.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/Load_ILDB_Cntrl_Data_16DW.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/Load_ILDB_Cntrl_Data_22DW.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/Load_ILDB_Cntrl_Data_64DW.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/Makefile.am [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/Root_Undefs.inc [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/SetupVPKernel.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/TransposeNV12_16x16.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/TransposeNV12_4x16.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/Transpose_Cur_UV_2x8.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/Transpose_Cur_UV_8x8.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/Transpose_Cur_UV_Right_Most_2x8.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/Transpose_Cur_Y_16x16.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/Transpose_Cur_Y_4x16.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/Transpose_Cur_Y_Right_Most_4x16.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/Transpose_Left_UV_2x8.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/Transpose_Left_Y_4x16.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/loadNV12_16x16T.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/loadNV12_16x4.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/load_Cur_UV_8x8T.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/load_Cur_UV_8x8T_Mbaff.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/load_Cur_UV_Right_Most_2x8.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/load_Cur_Y_16x16T.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/load_Cur_Y_16x16T_Mbaff.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/load_Cur_Y_Right_Most_4x16.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/load_Left_UV_2x8T.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/load_Left_UV_2x8T_Mbaff.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/load_Left_Y_4x16T.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/load_Left_Y_4x16T_Mbaff.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/load_Top_UV_8x2.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/load_Top_UV_8x2_Mbaff.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/load_Top_Y_16x4.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/load_Top_Y_16x4_Mbaff.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/saveNV12_16x16.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/saveNV12_16x4.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/saveNV12_16x4T.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/save_Cur_UV_8x8.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/save_Cur_UV_8x8_Mbaff.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/save_Cur_Y_16x16.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/save_Cur_Y_16x16_Mbaff.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/save_Left_UV_8x2T.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/save_Left_UV_8x2T_Mbaff.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/save_Left_Y_16x4T.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/save_Left_Y_16x4T_Mbaff.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/save_Top_UV_8x2.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/save_Top_UV_8x2_Mbaff.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/save_Top_Y_16x4.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/save_Top_Y_16x4_Mbaff.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/writeURB.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/writeURB_UV_Child.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/ildb/writeURB_Y_Child.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/AVCMCInter.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/AllAVC.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/AllAVCField.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/AllAVCFrame.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/AllAVCMBAFF.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/AllAVC_Build.inc [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/AllAVC_Export.inc [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/AllIntra.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/BSDReset.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/DCResetDummy.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/Decode_Chroma_Intra.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/EndIntraThread.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/HwmcOnlyHeader.inc [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/Intra_16x16.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/Intra_4x4.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/Intra_8x8.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/Intra_PCM.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/Intra_funcLib.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/Makefile.am [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/Scoreboard_header.inc [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/SetHWScoreboard.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/SetHWScoreboard_MBAFF.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/SetHWScoreboard_header.inc [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/SetupForHWMC.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/add_Error_16x16_Y.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/add_Error_UV.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/avc_mc.g4b [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/avc_mc.g4b.gen5 [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/chromaMVAdjust.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/export.inc [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/export.inc.gen5 [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/header.inc [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/initialize_MBPara.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/inter_Header.inc [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/interpolate_C_2x2.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/interpolate_C_4x4.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/interpolate_Y_4x4.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/interpolate_Y_8x8.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/intra_Header.inc [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/intra_Pred_4x4_Y_4.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/intra_Pred_8x8_Y.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/intra_Pred_Chroma.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/intra_pred_16x16_Y.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/loadRef_C_10x5.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/loadRef_C_6x3.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/loadRef_Y_16x13.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/loadRef_Y_16x9.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/load_Intra_Ref_UV.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/load_Intra_Ref_Y.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/null.g4a [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/null.g4b [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/null.g4b.gen5 [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/recon_C_4x4.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/recon_Y_8x8.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/roundShift_C_4x4.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/save_16x16_Y.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/save_4x4_Y.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/save_8x8_UV.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/save_8x8_Y.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/save_I_PCM.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/scoreboard.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/scoreboard_MBAFF.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/scoreboard_restore_AS.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/scoreboard_save_AS.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/scoreboard_sip.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/scoreboard_start_inter.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/scoreboard_start_intra.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/scoreboard_update.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/set_SB_offset.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/weightedPred.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/writeRecon_C_8x4.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/writeRecon_YC.asm [new file with mode: 0644]
i965_drv_video/shaders/h264/mc/writeRecon_Y_16x8.asm [new file with mode: 0644]
i965_drv_video/shaders/render/exa_wm.g4i
i965_drv_video/shaders/render/exa_wm_src_sample_planar.g4a
i965_drv_video/shaders/render/exa_wm_src_sample_planar.g4b
i965_drv_video/shaders/render/exa_wm_src_sample_planar.g4b.gen5
i965_drv_video/shaders/render/exa_wm_yuv_rgb.g4a
i965_drv_video/shaders/render/exa_wm_yuv_rgb.g4b
i965_drv_video/shaders/render/exa_wm_yuv_rgb.g4b.gen5

index eedfe4c..b119003 100644 (file)
@@ -122,6 +122,8 @@ AC_OUTPUT([
        dummy_drv_video/Makefile
        i965_drv_video/Makefile
        i965_drv_video/shaders/Makefile
+       i965_drv_video/shaders/h264/Makefile
+       i965_drv_video/shaders/h264/mc/Makefile
        i965_drv_video/shaders/mpeg2/Makefile
        i965_drv_video/shaders/mpeg2/vld/Makefile
        i965_drv_video/shaders/render/Makefile
index 88b5590..bb160ae 100644 (file)
@@ -32,21 +32,29 @@ i965_drv_video_la_LIBADD = ../va/libva-x11.la -lpthread
 i965_drv_video_la_SOURCES =    \
        object_heap.c           \
        intel_batchbuffer.c     \
+       intel_batchbuffer_dump.c\
        intel_memman.c          \
        intel_driver.c          \
        i965_media.c            \
        i965_media_mpeg2.c      \
+       i965_media_h264.c       \
        i965_render.c           \
-       i965_drv_video.c
+       i965_drv_video.c        \
+       i965_avc_bsd.c          \
+       i965_avc_hw_scoreboard.c
 
 noinst_HEADERS =                \
        object_heap.h           \
        intel_batchbuffer.h     \
+       intel_batchbuffer_dump.h\
        intel_memman.h          \
        intel_driver.h          \
        i965_media.h            \
        i965_media_mpeg2.h      \
+       i965_media_h264.h       \
        i965_render.h           \
        i965_drv_video.h        \
        i965_defines.h          \
-       i965_structs.h
+       i965_structs.h          \
+       i965_avc_bsd.h          \
+       i965_avc_hw_scoreboard.h
diff --git a/i965_drv_video/i965_avc_bsd.c b/i965_drv_video/i965_avc_bsd.c
new file mode 100644 (file)
index 0000000..9770207
--- /dev/null
@@ -0,0 +1,801 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Xiang Haihao <haihao.xiang@intel.com>
+ *
+ */
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+#include "va_backend.h"
+
+#include "intel_batchbuffer.h"
+#include "intel_driver.h"
+
+#include "i965_defines.h"
+#include "i965_drv_video.h"
+#include "i965_avc_bsd.h"
+#include "i965_media_h264.h"
+#include "i965_media.h"
+
+static void
+i965_bsd_ind_obj_base_address(VADriverContextP ctx, struct decode_state *decode_state)
+{
+    dri_bo *ind_bo = decode_state->slice_data->bo;
+
+    BEGIN_BCS_BATCH(ctx, 3);
+    OUT_BCS_BATCH(ctx, CMD_BSD_IND_OBJ_BASE_ADDR | (3 - 2));
+    OUT_BCS_RELOC(ctx, ind_bo,
+                  I915_GEM_DOMAIN_INSTRUCTION, 0,
+                  0);
+    OUT_BCS_BATCH(ctx, 0);
+    ADVANCE_BCS_BATCH(ctx);
+}
+
+static void
+i965_avc_bsd_img_state(VADriverContextP ctx, struct decode_state *decode_state)
+{
+    int qm_present_flag;
+    int img_struct;
+    int mbaff_frame_flag;
+    unsigned int avc_it_command_header;
+    unsigned int width_in_mbs, height_in_mbs;
+    VAPictureParameterBufferH264 *pic_param;
+
+    if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
+        qm_present_flag = 1;
+    else
+        qm_present_flag = 0; /* built-in QM matrices */
+
+    assert(decode_state->pic_param && decode_state->pic_param->buffer);
+    pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
+
+    assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
+
+    if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
+        img_struct = 1;
+    else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
+        img_struct = 3;
+    else
+        img_struct = 0;
+
+    if ((img_struct & 0x1) == 0x1) {
+        assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
+    } else {
+        assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
+    }
+
+    if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
+        assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
+        assert(pic_param->pic_fields.bits.field_pic_flag == 0);
+    } else {
+        assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
+    }
+
+    mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
+                        !pic_param->pic_fields.bits.field_pic_flag);
+
+    width_in_mbs = ((pic_param->picture_width_in_mbs_minus1 + 1) & 0xff);
+    height_in_mbs = ((pic_param->picture_height_in_mbs_minus1 + 1) & 0xff); /* frame height */
+                                                                               
+    assert(!((width_in_mbs * height_in_mbs) & 0x8000)); /* hardware requirement */
+
+    /* BSD unit doesn't support 4:2:2 and 4:4:4 picture */
+    assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
+           pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
+    assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
+
+    avc_it_command_header = (CMD_MEDIA_OBJECT_EX | (12 - 2));
+
+    BEGIN_BCS_BATCH(ctx, 6);
+    OUT_BCS_BATCH(ctx, CMD_AVC_BSD_IMG_STATE | (6 - 2));
+    OUT_BCS_BATCH(ctx, 
+                  ((width_in_mbs * height_in_mbs) & 0x7fff));
+    OUT_BCS_BATCH(ctx, 
+                  (height_in_mbs << 16) | 
+                  (width_in_mbs << 0));
+    OUT_BCS_BATCH(ctx, 
+                  (pic_param->second_chroma_qp_index_offset << 24) |
+                  (pic_param->chroma_qp_index_offset << 16) | 
+                  (SCAN_RASTER_ORDER << 15) | /* AVC ILDB Data */
+                  (SCAN_SPECIAL_ORDER << 14) | /* AVC IT Command */
+                  (SCAN_RASTER_ORDER << 13) | /* AVC IT Data */
+                  (1 << 12) | /* always 1, hardware requirement */
+                  (qm_present_flag << 10) |
+                  (img_struct << 8) |
+                  (16 << 0)); /* FIXME: always support 16 reference frames ??? */
+    OUT_BCS_BATCH(ctx,
+                  (RESIDUAL_DATA_OFFSET << 24) | /* residual data offset */
+                  (0 << 17) | /* don't overwrite SRT */
+                  (0 << 16) | /* Un-SRT (Unsynchronized Root Thread) */
+                  (0 << 12) | /* FIXME: no 16MV ??? */
+                  (pic_param->seq_fields.bits.chroma_format_idc << 10) |
+                  (1 << 8)  | /* Enable ILDB writing output */
+                  (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
+                  ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
+                  (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
+                  (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
+                  (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
+                  (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
+                  (mbaff_frame_flag << 1) |
+                  (pic_param->pic_fields.bits.field_pic_flag << 0));
+    OUT_BCS_BATCH(ctx, avc_it_command_header);
+    ADVANCE_BCS_BATCH(ctx);
+}
+
+static void
+i965_avc_bsd_qm_state(VADriverContextP ctx, struct decode_state *decode_state)
+{
+    int cmd_len;
+    VAIQMatrixBufferH264 *iq_matrix;
+    VAPictureParameterBufferH264 *pic_param;
+
+    if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
+        return;
+
+    iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
+
+    assert(decode_state->pic_param && decode_state->pic_param->buffer);
+    pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
+
+    cmd_len = 2 + 6 * 4; /* always load six 4x4 scaling matrices */
+
+    if (pic_param->pic_fields.bits.transform_8x8_mode_flag)
+        cmd_len += 2 * 16; /* load two 8x8 scaling matrices */
+
+    BEGIN_BCS_BATCH(ctx, cmd_len);
+    OUT_BCS_BATCH(ctx, CMD_AVC_BSD_QM_STATE | (cmd_len - 2));
+
+    if (pic_param->pic_fields.bits.transform_8x8_mode_flag)
+        OUT_BCS_BATCH(ctx, 
+                      (0x0  << 8) | /* don't use default built-in matrices */
+                      (0xff << 0)); /* six 4x4 and two 8x8 scaling matrices */
+    else
+        OUT_BCS_BATCH(ctx, 
+                      (0x0  << 8) | /* don't use default built-in matrices */
+                      (0x3f << 0)); /* six 4x4 scaling matrices */
+
+    intel_batchbuffer_data_bcs(ctx, &iq_matrix->ScalingList4x4[0][0], 6 * 4 * 4);
+
+    if (pic_param->pic_fields.bits.transform_8x8_mode_flag)
+        intel_batchbuffer_data_bcs(ctx, &iq_matrix->ScalingList8x8[0][0], 2 * 16 * 4);
+
+    ADVANCE_BCS_BATCH(ctx);
+}
+
+static void
+i965_avc_bsd_slice_state(VADriverContextP ctx, 
+                         VAPictureParameterBufferH264 *pic_param, 
+                         VASliceParameterBufferH264 *slice_param)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_media_state *media_state = &i965->media_state;
+    struct i965_h264_context *i965_h264_context = (struct i965_h264_context *)media_state->private_context;
+    int present_flag, cmd_len, list, j;
+    struct {
+        unsigned char bottom_idc:1;
+        unsigned char frame_store_index:4;
+        unsigned char field_picture:1;
+        unsigned char long_term:1;
+        unsigned char non_exist:1;
+    } refs[32];
+    char weightoffsets[32 * 6];
+
+    /* don't issue SLICE_STATE for intra-prediction decoding */
+    if (slice_param->slice_type == SLICE_TYPE_I)
+        return;
+
+    cmd_len = 2;
+
+    if (slice_param->slice_type == SLICE_TYPE_P) {
+        present_flag = PRESENT_REF_LIST0;
+        cmd_len += 8;
+    } else { 
+        present_flag = PRESENT_REF_LIST0 | PRESENT_REF_LIST1;
+        cmd_len += 16;
+    }
+
+    if (slice_param->luma_weight_l0_flag | slice_param->chroma_weight_l0_flag) {
+        present_flag |= PRESENT_WEIGHT_OFFSET_L0;
+        cmd_len += 48;
+        assert((pic_param->pic_fields.bits.weighted_pred_flag == 1) || /* P slice */
+               (pic_param->pic_fields.bits.weighted_bipred_idc == 1)); /* B slice */
+    }
+
+    if (slice_param->luma_weight_l1_flag | slice_param->chroma_weight_l1_flag) {
+        present_flag |= PRESENT_WEIGHT_OFFSET_L1;
+        cmd_len += 48;
+        assert(slice_param->slice_type == SLICE_TYPE_B);
+        assert(pic_param->pic_fields.bits.weighted_bipred_idc == 1);
+    }
+
+    BEGIN_BCS_BATCH(ctx, cmd_len);
+    OUT_BCS_BATCH(ctx, CMD_AVC_BSD_SLICE_STATE | (cmd_len - 2));
+    OUT_BCS_BATCH(ctx, present_flag);
+
+    for (list = 0; list < 2; list++) {
+        int flag;
+        VAPictureH264 *va_pic;
+
+        if (list == 0) {
+            flag = PRESENT_REF_LIST0;
+            va_pic = slice_param->RefPicList0;
+        } else {
+            flag = PRESENT_REF_LIST1;
+            va_pic = slice_param->RefPicList1;
+        }
+
+        if (!(present_flag & flag))
+            continue;
+
+        for (j = 0; j < 32; j++) {
+            if (va_pic->flags & VA_PICTURE_H264_INVALID) {
+                refs[j].non_exist = 1;
+                refs[j].long_term = 1;
+                refs[j].field_picture = 1;
+                refs[j].frame_store_index = 0xf;
+                refs[j].bottom_idc = 1;
+            } else {
+                int frame_idx;
+                
+                for (frame_idx = 0; frame_idx < 16; frame_idx++) {
+                    VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[frame_idx];
+                    
+                    if (!(ref_pic->flags & VA_PICTURE_H264_INVALID)) {
+                        if (ref_pic->picture_id == va_pic->picture_id)
+                            break;
+                    }       
+                }
+                
+                assert(frame_idx < 16);
+                
+                refs[j].non_exist = 0;
+                refs[j].long_term = !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
+                refs[j].field_picture = !!(va_pic->flags & 
+                                           (VA_PICTURE_H264_TOP_FIELD | 
+                                            VA_PICTURE_H264_BOTTOM_FIELD));
+                refs[j].frame_store_index = frame_idx;
+                refs[j].bottom_idc = !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
+            }
+
+            va_pic++;
+        }
+        
+        intel_batchbuffer_data_bcs(ctx, refs, sizeof(refs));
+    }
+
+    i965_h264_context->weight128_luma_l0 = 0;
+    i965_h264_context->weight128_luma_l1 = 0;
+    i965_h264_context->weight128_chroma_l0 = 0;
+    i965_h264_context->weight128_chroma_l1 = 0;
+
+    i965_h264_context->weight128_offset0_flag = 0;
+    i965_h264_context->weight128_offset0 = 0;
+
+    if (present_flag & PRESENT_WEIGHT_OFFSET_L0) {
+        for (j = 0; j < 32; j++) {
+            weightoffsets[j * 6 + 0] = slice_param->luma_offset_l0[j];
+            weightoffsets[j * 6 + 1] = slice_param->luma_weight_l0[j];
+            weightoffsets[j * 6 + 2] = slice_param->chroma_offset_l0[j][0];
+            weightoffsets[j * 6 + 3] = slice_param->chroma_weight_l0[j][0];
+            weightoffsets[j * 6 + 4] = slice_param->chroma_offset_l0[j][1];
+            weightoffsets[j * 6 + 5] = slice_param->chroma_weight_l0[j][1];
+
+            if (pic_param->pic_fields.bits.weighted_bipred_idc == 1) {
+                if (i965_h264_context->use_hw_w128) {
+                    if (slice_param->luma_weight_l0[j] == 128)
+                        i965_h264_context->weight128_luma_l0 |= (1 << j);
+
+                    if (slice_param->chroma_weight_l0[j][0] == 128 ||
+                        slice_param->chroma_weight_l0[j][1] == 128)
+                        i965_h264_context->weight128_chroma_l0 |= (1 << j);
+                } else {
+                    /* FIXME: workaround for weight 128 */
+                    if (slice_param->luma_weight_l0[j] == 128 ||
+                        slice_param->chroma_weight_l0[j][0] == 128 ||
+                        slice_param->chroma_weight_l0[j][1] == 128)
+                        i965_h264_context->weight128_offset0_flag = 1;
+                }
+            }
+        }
+
+        intel_batchbuffer_data_bcs(ctx, weightoffsets, sizeof(weightoffsets));
+    }
+
+    if (present_flag & PRESENT_WEIGHT_OFFSET_L1) {
+        for (j = 0; j < 32; j++) {
+            weightoffsets[j * 6 + 0] = slice_param->luma_offset_l1[j];
+            weightoffsets[j * 6 + 1] = slice_param->luma_weight_l1[j];
+            weightoffsets[j * 6 + 2] = slice_param->chroma_offset_l1[j][0];
+            weightoffsets[j * 6 + 3] = slice_param->chroma_weight_l1[j][0];
+            weightoffsets[j * 6 + 4] = slice_param->chroma_offset_l1[j][1];
+            weightoffsets[j * 6 + 5] = slice_param->chroma_weight_l1[j][1];
+
+            if (pic_param->pic_fields.bits.weighted_bipred_idc == 1) {
+                if (i965_h264_context->use_hw_w128) {
+                    if (slice_param->luma_weight_l1[j] == 128)
+                        i965_h264_context->weight128_luma_l1 |= (1 << j);
+
+                    if (slice_param->chroma_weight_l1[j][0] == 128 ||
+                        slice_param->chroma_weight_l1[j][1] == 128)
+                        i965_h264_context->weight128_chroma_l1 |= (1 << j);
+                } else {
+                    if (slice_param->luma_weight_l0[j] == 128 ||
+                        slice_param->chroma_weight_l0[j][0] == 128 ||
+                        slice_param->chroma_weight_l0[j][1] == 128)
+                        i965_h264_context->weight128_offset0_flag = 1;
+                }
+            }
+        }
+
+        intel_batchbuffer_data_bcs(ctx, weightoffsets, sizeof(weightoffsets));
+    }
+
+    ADVANCE_BCS_BATCH(ctx);
+}
+
+static void
+i965_avc_bsd_buf_base_state(VADriverContextP ctx, struct decode_state *decode_state)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_media_state *media_state = &i965->media_state;
+    struct i965_h264_context *i965_h264_context;
+    struct i965_avc_bsd_context *i965_avc_bsd_context;
+    int i;
+    VAPictureParameterBufferH264 *pic_param;
+    VAPictureH264 *va_pic;
+    struct object_surface *obj_surface;
+
+    assert(decode_state->pic_param && decode_state->pic_param->buffer);
+    pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
+
+    assert(media_state->private_context);
+    i965_h264_context = (struct i965_h264_context *)media_state->private_context;
+    i965_avc_bsd_context = &i965_h264_context->i965_avc_bsd_context;
+
+    BEGIN_BCS_BATCH(ctx, 74);
+    OUT_BCS_BATCH(ctx, CMD_AVC_BSD_BUF_BASE_STATE | (74 - 2));
+    OUT_BCS_RELOC(ctx, i965_avc_bsd_context->bsd_raw_store.bo,
+                  I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                  0);
+    OUT_BCS_RELOC(ctx, i965_avc_bsd_context->mpr_row_store.bo,
+                  I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                  0);
+    OUT_BCS_RELOC(ctx, i965_h264_context->avc_it_command_mb_info.bo,
+                  I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                  i965_h264_context->avc_it_command_mb_info.mbs * i965_h264_context->use_avc_hw_scoreboard * MB_CMD_IN_BYTES);
+    OUT_BCS_RELOC(ctx, i965_h264_context->avc_it_data.bo,
+                  I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                  (i965_h264_context->avc_it_data.write_offset << 6));
+    OUT_BCS_RELOC(ctx, i965_avc_bsd_context->ildb_data.bo,
+                  I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                  0);
+
+    for (i = 0; i < 16; i++) {
+        va_pic = &pic_param->ReferenceFrames[i];
+
+        if (!(va_pic->flags & VA_PICTURE_H264_INVALID)) {
+            obj_surface = SURFACE(va_pic->picture_id);
+            assert(obj_surface);
+            OUT_BCS_RELOC(ctx, obj_surface->direct_mv_wr_top_bo,
+                          I915_GEM_DOMAIN_INSTRUCTION, 0,
+                          0);
+
+            if (pic_param->pic_fields.bits.field_pic_flag && 
+                !pic_param->seq_fields.bits.direct_8x8_inference_flag)
+                OUT_BCS_RELOC(ctx, obj_surface->direct_mv_wr_bottom_bo,
+                              I915_GEM_DOMAIN_INSTRUCTION, 0,
+                              0);
+            else 
+                OUT_BCS_RELOC(ctx, obj_surface->direct_mv_wr_top_bo,
+                              I915_GEM_DOMAIN_INSTRUCTION, 0,
+                              0);
+        } else {
+            OUT_BCS_BATCH(ctx, 0);
+            OUT_BCS_BATCH(ctx, 0);
+        }
+    }
+
+    va_pic = &pic_param->CurrPic;
+    assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
+    obj_surface = SURFACE(va_pic->picture_id);
+    assert(obj_surface);
+    OUT_BCS_RELOC(ctx, obj_surface->direct_mv_wr_top_bo,
+                  I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                  0);
+
+    if (pic_param->pic_fields.bits.field_pic_flag && 
+        !pic_param->seq_fields.bits.direct_8x8_inference_flag)
+        OUT_BCS_RELOC(ctx, obj_surface->direct_mv_wr_bottom_bo,
+                      I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                      0);
+    else
+        OUT_BCS_RELOC(ctx, obj_surface->direct_mv_wr_top_bo,
+                      I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                      0);
+
+    /* POC List */
+    for (i = 0; i < 16; i++) {
+        va_pic = &pic_param->ReferenceFrames[i];
+        if (!(va_pic->flags & VA_PICTURE_H264_INVALID)) {
+            OUT_BCS_BATCH(ctx, va_pic->TopFieldOrderCnt);
+            OUT_BCS_BATCH(ctx, va_pic->BottomFieldOrderCnt);
+        } else {
+            OUT_BCS_BATCH(ctx, 0);
+            OUT_BCS_BATCH(ctx, 0);
+        }
+    }
+
+    OUT_BCS_BATCH(ctx, va_pic->TopFieldOrderCnt);
+    OUT_BCS_BATCH(ctx, va_pic->BottomFieldOrderCnt);
+
+    ADVANCE_BCS_BATCH(ctx);
+}
+
+static void
+g4x_avc_bsd_object(VADriverContextP ctx, 
+                   struct decode_state *decode_state,
+                   VAPictureParameterBufferH264 *pic_param,
+                   VASliceParameterBufferH264 *slice_param)
+{
+    int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
+    int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
+
+    if (slice_param) {
+        int encrypted, counter_value, cmd_len;
+        int slice_hor_pos, slice_ver_pos;
+        int num_ref_idx_l0, num_ref_idx_l1;
+        int field_or_mbaff_picture = (pic_param->pic_fields.bits.field_pic_flag ||
+                                      pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
+        int slice_data_bit_offset;
+        int weighted_pred_idc = 0;
+
+        encrypted = 0; /* FIXME: which flag in VAAPI is used for encryption? */
+
+        if (encrypted) {
+            cmd_len = 9;
+            counter_value = 0; /* FIXME: ??? */
+        } else 
+            cmd_len = 8;
+
+        slice_data_bit_offset = slice_param->slice_data_bit_offset;    
+
+        if (pic_param->pic_fields.bits.entropy_coding_mode_flag == ENTROPY_CABAC)
+            slice_data_bit_offset = ALIGN(slice_data_bit_offset, 0x8);
+
+        if (slice_param->slice_type == SLICE_TYPE_I) {
+            assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
+            assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
+            num_ref_idx_l0 = 0;
+            num_ref_idx_l1 = 0;
+        } else if (slice_param->slice_type == SLICE_TYPE_P) {
+            assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
+            num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
+            num_ref_idx_l1 = 0;
+        } else {
+            num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
+            num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
+        }
+
+        if (slice_param->slice_type == SLICE_TYPE_P)
+            weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
+        else if (slice_param->slice_type == SLICE_TYPE_B)
+            weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
+
+        slice_hor_pos = slice_param->first_mb_in_slice % width_in_mbs; 
+        slice_ver_pos = slice_param->first_mb_in_slice / width_in_mbs;
+        slice_ver_pos <<= (1 + field_or_mbaff_picture); /* FIXME: right ??? */
+
+        BEGIN_BCS_BATCH(ctx, cmd_len);
+        OUT_BCS_BATCH(ctx, CMD_AVC_BSD_OBJECT | (cmd_len - 2));
+        OUT_BCS_BATCH(ctx, 
+                      (encrypted << 31) |
+                      ((slice_param->slice_data_size - (slice_data_bit_offset >> 3)) << 0));
+        OUT_BCS_BATCH(ctx, 
+                      (slice_param->slice_data_offset +
+                       (slice_data_bit_offset >> 3)));
+        OUT_BCS_BATCH(ctx, 
+                      (0 << 31) | /* concealment mode: 0->intra 16x16 prediction, 1->inter P Copy */
+                      (0 << 14) | /* ignore BSDPrematureComplete Error handling */
+                      (0 << 13) | /* FIXME: ??? */
+                      (0 << 12) | /* ignore MPR Error handling */
+                      (0 << 10) | /* ignore Entropy Error handling */
+                      (0 << 8)  | /* ignore MB Header Error handling */
+                      (slice_param->slice_type << 0));
+        OUT_BCS_BATCH(ctx, 
+                      (num_ref_idx_l1 << 24) |
+                      (num_ref_idx_l0 << 16) |
+                      (slice_param->chroma_log2_weight_denom << 8) |
+                      (slice_param->luma_log2_weight_denom << 0));
+        OUT_BCS_BATCH(ctx, 
+                      (weighted_pred_idc << 30) |
+                      (slice_param->direct_spatial_mv_pred_flag << 29) |
+                      (slice_param->disable_deblocking_filter_idc << 27) |
+                      (slice_param->cabac_init_idc << 24) |
+                      ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
+                      ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
+                      ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
+        OUT_BCS_BATCH(ctx, 
+                      (slice_ver_pos << 24) |
+                      (slice_hor_pos << 16) | 
+                      (slice_param->first_mb_in_slice << 0));
+        OUT_BCS_BATCH(ctx, 
+                      (0 << 7) | /* FIXME: ??? */
+                      ((0x7 - (slice_data_bit_offset & 0x7)) << 0));
+
+        if (encrypted) {
+            OUT_BCS_BATCH(ctx, counter_value);
+        }
+
+        ADVANCE_BCS_BATCH(ctx); 
+    } else {
+        BEGIN_BCS_BATCH(ctx, 8); 
+        OUT_BCS_BATCH(ctx, CMD_AVC_BSD_OBJECT | (8 - 2));
+        OUT_BCS_BATCH(ctx, 0); /* indirect data length for phantom slice is 0 */
+        OUT_BCS_BATCH(ctx, 0); /* indirect data start address for phantom slice is 0 */
+        OUT_BCS_BATCH(ctx, 0);
+        OUT_BCS_BATCH(ctx, 0);
+        OUT_BCS_BATCH(ctx, 0);
+        OUT_BCS_BATCH(ctx, width_in_mbs * height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag));
+        OUT_BCS_BATCH(ctx, 0);
+        ADVANCE_BCS_BATCH(ctx);
+    }
+}
+
+static void
+ironlake_avc_bsd_object(VADriverContextP ctx, 
+                        struct decode_state *decode_state,
+                        VAPictureParameterBufferH264 *pic_param,
+                        VASliceParameterBufferH264 *slice_param)
+{
+    int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
+    int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
+
+    if (slice_param) {
+        struct i965_driver_data *i965 = i965_driver_data(ctx);
+        struct i965_media_state *media_state = &i965->media_state;
+        struct i965_h264_context *i965_h264_context = (struct i965_h264_context *)media_state->private_context;
+        int encrypted, counter_value;
+        int slice_hor_pos, slice_ver_pos;
+        int num_ref_idx_l0, num_ref_idx_l1;
+        int field_or_mbaff_picture = (pic_param->pic_fields.bits.field_pic_flag ||
+                                      pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
+        int slice_data_bit_offset;
+        int weighted_pred_idc = 0;
+
+        encrypted = 0; /* FIXME: which flag in VAAPI is used for encryption? */
+
+        if (encrypted) {
+            counter_value = 0; /* FIXME: ??? */
+        } else 
+            counter_value = 0;
+
+        slice_data_bit_offset = slice_param->slice_data_bit_offset;    
+
+        if (pic_param->pic_fields.bits.entropy_coding_mode_flag == ENTROPY_CABAC)
+            slice_data_bit_offset = ALIGN(slice_data_bit_offset, 0x8);
+
+        if (slice_param->slice_type == SLICE_TYPE_I) {
+            assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
+            assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
+            num_ref_idx_l0 = 0;
+            num_ref_idx_l1 = 0;
+        } else if (slice_param->slice_type == SLICE_TYPE_P) {
+            assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
+            num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
+            num_ref_idx_l1 = 0;
+        } else {
+            num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
+            num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
+        }
+
+        if (slice_param->slice_type == SLICE_TYPE_P)
+            weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
+        else if (slice_param->slice_type == SLICE_TYPE_B)
+            weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
+
+        slice_hor_pos = slice_param->first_mb_in_slice % width_in_mbs; 
+        slice_ver_pos = slice_param->first_mb_in_slice / width_in_mbs;
+        slice_ver_pos <<= (1 + field_or_mbaff_picture); /* FIXME: right ??? */
+
+        BEGIN_BCS_BATCH(ctx, 16);
+        OUT_BCS_BATCH(ctx, CMD_AVC_BSD_OBJECT | (16 - 2));
+        OUT_BCS_BATCH(ctx, 
+                      (encrypted << 31) |
+                      (0 << 30) | /* FIXME: packet based bit stream */
+                      (0 << 29) | /* FIXME: packet format */
+                      ((slice_param->slice_data_size - (slice_data_bit_offset >> 3)) << 0));
+        OUT_BCS_BATCH(ctx, 
+                      (slice_param->slice_data_offset +
+                       (slice_data_bit_offset >> 3)));
+        OUT_BCS_BATCH(ctx, 
+                      (0 << 31) | /* concealment mode: 0->intra 16x16 prediction, 1->inter P Copy */
+                      (0 << 14) | /* ignore BSDPrematureComplete Error handling */
+                      (0 << 13) | /* FIXME: ??? */
+                      (0 << 12) | /* ignore MPR Error handling */
+                      (0 << 10) | /* ignore Entropy Error handling */
+                      (0 << 8)  | /* ignore MB Header Error handling */
+                      (slice_param->slice_type << 0));
+        OUT_BCS_BATCH(ctx, 
+                      (num_ref_idx_l1 << 24) |
+                      (num_ref_idx_l0 << 16) |
+                      (slice_param->chroma_log2_weight_denom << 8) |
+                      (slice_param->luma_log2_weight_denom << 0));
+        OUT_BCS_BATCH(ctx, 
+                      (weighted_pred_idc << 30) |
+                      (slice_param->direct_spatial_mv_pred_flag << 29) |
+                      (slice_param->disable_deblocking_filter_idc << 27) |
+                      (slice_param->cabac_init_idc << 24) |
+                      ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
+                      ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
+                      ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
+        OUT_BCS_BATCH(ctx, 
+                      (slice_ver_pos << 24) |
+                      (slice_hor_pos << 16) | 
+                      (slice_param->first_mb_in_slice << 0));
+        OUT_BCS_BATCH(ctx, 
+                      (0 << 7) | /* FIXME: ??? */
+                      ((0x7 - (slice_data_bit_offset & 0x7)) << 0));
+        OUT_BCS_BATCH(ctx, counter_value);
+        
+        /* FIXME: dw9-dw11 */
+        OUT_BCS_BATCH(ctx, 0);
+        OUT_BCS_BATCH(ctx, 0);
+        OUT_BCS_BATCH(ctx, 0);
+        OUT_BCS_BATCH(ctx, i965_h264_context->weight128_luma_l0);
+        OUT_BCS_BATCH(ctx, i965_h264_context->weight128_luma_l1);
+        OUT_BCS_BATCH(ctx, i965_h264_context->weight128_chroma_l0);
+        OUT_BCS_BATCH(ctx, i965_h264_context->weight128_chroma_l1);
+
+        ADVANCE_BCS_BATCH(ctx); 
+    } else {
+        BEGIN_BCS_BATCH(ctx, 16);
+        OUT_BCS_BATCH(ctx, CMD_AVC_BSD_OBJECT | (16 - 2));
+        OUT_BCS_BATCH(ctx, 0); /* indirect data length for phantom slice is 0 */
+        OUT_BCS_BATCH(ctx, 0); /* indirect data start address for phantom slice is 0 */
+        OUT_BCS_BATCH(ctx, 0);
+        OUT_BCS_BATCH(ctx, 0);
+        OUT_BCS_BATCH(ctx, 0);
+        OUT_BCS_BATCH(ctx, width_in_mbs * height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag));
+        OUT_BCS_BATCH(ctx, 0);
+        OUT_BCS_BATCH(ctx, 0);
+        OUT_BCS_BATCH(ctx, 0);
+        OUT_BCS_BATCH(ctx, 0);
+        OUT_BCS_BATCH(ctx, 0);
+        OUT_BCS_BATCH(ctx, 0);
+        OUT_BCS_BATCH(ctx, 0);
+        OUT_BCS_BATCH(ctx, 0);
+        OUT_BCS_BATCH(ctx, 0);
+        ADVANCE_BCS_BATCH(ctx);
+    }
+}
+
+static void
+i965_avc_bsd_object(VADriverContextP ctx, 
+                    struct decode_state *decode_state,
+                    VAPictureParameterBufferH264 *pic_param,
+                    VASliceParameterBufferH264 *slice_param)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+
+    if (IS_IRONLAKE(i965->intel.device_id))
+        ironlake_avc_bsd_object(ctx, decode_state, pic_param, slice_param);
+    else
+        g4x_avc_bsd_object(ctx, decode_state, pic_param, slice_param);
+}
+
+static void
+i965_avc_bsd_phantom_slice(VADriverContextP ctx, 
+                           struct decode_state *decode_state,
+                           VAPictureParameterBufferH264 *pic_param)
+{
+    i965_avc_bsd_object(ctx, decode_state, pic_param, NULL);
+}
+
+void 
+i965_avc_bsd_pipeline(VADriverContextP ctx, struct decode_state *decode_state)
+{
+    int i;
+    VAPictureParameterBufferH264 *pic_param;
+    VASliceParameterBufferH264 *slice_param;
+
+    assert(decode_state->pic_param && decode_state->pic_param->buffer);
+    pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
+    assert(decode_state->slice_param && decode_state->slice_param->buffer);
+    slice_param = (VASliceParameterBufferH264 *)decode_state->slice_param->buffer;
+
+    intel_batchbuffer_start_atomic_bcs(ctx, 0x1000);
+    i965_bsd_ind_obj_base_address(ctx, decode_state);
+
+    assert(decode_state->num_slices == 1); /* FIXME: */
+    for (i = 0; i < decode_state->num_slices; i++) {
+        assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
+        assert((slice_param->slice_type == SLICE_TYPE_I) ||
+               (slice_param->slice_type == SLICE_TYPE_P) ||
+               (slice_param->slice_type == SLICE_TYPE_B)); /* hardware requirement */
+
+        if (i == 0) {
+            i965_avc_bsd_img_state(ctx, decode_state);
+            i965_avc_bsd_qm_state(ctx, decode_state);
+        }
+
+        i965_avc_bsd_slice_state(ctx, pic_param, slice_param);
+        i965_avc_bsd_buf_base_state(ctx, decode_state);
+        i965_avc_bsd_object(ctx, decode_state, pic_param, slice_param);
+        slice_param++;
+    }
+
+    i965_avc_bsd_phantom_slice(ctx, decode_state, pic_param);
+    intel_batchbuffer_emit_mi_flush_bcs(ctx);
+    intel_batchbuffer_end_atomic_bcs(ctx);
+    intel_batchbuffer_flush_bcs(ctx);
+}
+
+void 
+i965_avc_bsd_decode_init(VADriverContextP ctx)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_media_state *media_state = &i965->media_state;
+    struct i965_h264_context *i965_h264_context = (struct i965_h264_context *)media_state->private_context;
+    struct i965_avc_bsd_context *i965_avc_bsd_context;
+    dri_bo *bo;
+
+    assert(i965_h264_context);
+    i965_avc_bsd_context = &i965_h264_context->i965_avc_bsd_context;
+
+    dri_bo_unreference(i965_avc_bsd_context->bsd_raw_store.bo);
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "bsd raw store",
+                      0x4000, /* at least 11520 bytes to support 120 MBs per row */
+                      64);
+    assert(bo);
+    i965_avc_bsd_context->bsd_raw_store.bo = bo;
+
+    dri_bo_unreference(i965_avc_bsd_context->mpr_row_store.bo);
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "mpr row store",
+                      0x2000, /* at least 7680 bytes to support 120 MBs per row */
+                      64);
+    assert(bo);
+    i965_avc_bsd_context->mpr_row_store.bo = bo;
+
+    dri_bo_unreference(i965_avc_bsd_context->ildb_data.bo);
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "ildb data",
+                      0x100000, /* at least 1044480 bytes */
+                      64);
+    assert(bo);
+    i965_avc_bsd_context->ildb_data.bo = bo;
+}
+
+Bool 
+i965_avc_bsd_ternimate(struct i965_avc_bsd_context *i965_avc_bsd_context)
+{
+    dri_bo_unreference(i965_avc_bsd_context->bsd_raw_store.bo);
+    dri_bo_unreference(i965_avc_bsd_context->mpr_row_store.bo);
+    dri_bo_unreference(i965_avc_bsd_context->ildb_data.bo);
+
+    return True;
+}
diff --git a/i965_drv_video/i965_avc_bsd.h b/i965_drv_video/i965_avc_bsd.h
new file mode 100644 (file)
index 0000000..7c373f3
--- /dev/null
@@ -0,0 +1,61 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Xiang Haihao <haihao.xiang@intel.com>
+ *
+ */
+
+#ifndef __I965_AVC_BSD_H__
+#define __I965_AVC_BSD_H__
+
+struct i965_avc_bsd_context
+{
+    struct {
+        dri_bo *bo;
+    } bsd_raw_store;
+
+    struct {
+        dri_bo *bo;
+    } mpr_row_store;
+
+    struct {
+        dri_bo *bo;
+    } avc_it_command_mb_info;
+
+    struct {
+        dri_bo *bo;
+        long write_offset;
+    } avc_it_data;
+
+    struct {
+        dri_bo *bo;
+    } ildb_data;
+};
+
+void i965_avc_bsd_pipeline(VADriverContextP, struct decode_state *);
+void i965_avc_bsd_decode_init(VADriverContextP);
+Bool i965_avc_bsd_ternimate(struct i965_avc_bsd_context *);
+
+#endif /* __I965_AVC_BSD_H__ */
+
diff --git a/i965_drv_video/i965_avc_hw_scoreboard.c b/i965_drv_video/i965_avc_hw_scoreboard.c
new file mode 100644 (file)
index 0000000..9d6d973
--- /dev/null
@@ -0,0 +1,420 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Xiang Haihao <haihao.xiang@intel.com>
+ *
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+#include "va_backend.h"
+
+#include "intel_batchbuffer.h"
+#include "intel_driver.h"
+
+#include "i965_defines.h"
+#include "i965_drv_video.h"
+#include "i965_avc_hw_scoreboard.h"
+#include "i965_media_h264.h"
+#include "i965_media.h"
+
+extern struct media_kernel *h264_avc_kernels;
+
+/* On Ironlake */
+#include "shaders/h264/mc/export.inc.gen5"
+
+enum {
+    AVC_HW_SCOREBOARD = 0,
+    AVC_HW_SCOREBOARD_MBAFF
+};
+
+static unsigned long avc_hw_scoreboard_kernel_offset[] = {
+    SETHWSCOREBOARD_IP_GEN5,
+    SETHWSCOREBOARD_MBAFF_IP_GEN5
+};
+
+static unsigned int avc_hw_scoreboard_constants[] = {
+    0x08040201,
+    0x00000010,
+    0x08000210,
+    0x00000000,
+    0x08040201,
+    0x08040210,
+    0x01000010,
+    0x08040200
+};
+
+static void
+i965_avc_hw_scoreboard_surface_state(struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context)
+{
+    struct i965_surface_state *ss;
+    dri_bo *bo;
+
+    bo = avc_hw_scoreboard_context->surface.ss_bo;
+    dri_bo_map(bo, 1);
+    assert(bo->virtual);
+    ss = bo->virtual;
+    memset(ss, 0, sizeof(*ss));
+    ss->ss0.surface_type = I965_SURFACE_BUFFER;
+    ss->ss1.base_addr = avc_hw_scoreboard_context->surface.s_bo->offset;
+    ss->ss2.width = ((avc_hw_scoreboard_context->surface.total_mbs * MB_CMD_IN_OWS - 1) & 0x7f);
+    ss->ss2.height = (((avc_hw_scoreboard_context->surface.total_mbs * MB_CMD_IN_OWS - 1) >> 7) & 0x1fff);
+    ss->ss3.depth = (((avc_hw_scoreboard_context->surface.total_mbs * MB_CMD_IN_OWS - 1) >> 20) & 0x7f);
+    dri_bo_emit_reloc(bo,
+                      I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                      0,
+                      offsetof(struct i965_surface_state, ss1),
+                      avc_hw_scoreboard_context->surface.s_bo);
+    dri_bo_unmap(bo);
+}
+
+static void
+i965_avc_hw_scoreboard_interface_descriptor_table(struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context)
+{
+    struct i965_interface_descriptor *desc;
+    dri_bo *bo;
+
+    bo = avc_hw_scoreboard_context->idrt.bo;
+    dri_bo_map(bo, 1);
+    assert(bo->virtual);
+    desc = bo->virtual;
+    memset(desc, 0, sizeof(*desc));
+    desc->desc0.grf_reg_blocks = 7;
+    desc->desc0.kernel_start_pointer = (avc_hw_scoreboard_context->hw_kernel.bo->offset + 
+                                        avc_hw_scoreboard_context->hw_kernel.offset) >> 6; /* reloc */
+    desc->desc1.const_urb_entry_read_offset = 0;
+    desc->desc1.const_urb_entry_read_len = 1;
+    desc->desc3.binding_table_entry_count = 0;
+    desc->desc3.binding_table_pointer = 
+        avc_hw_scoreboard_context->binding_table.bo->offset >> 5; /*reloc */
+
+    dri_bo_emit_reloc(bo,
+                      I915_GEM_DOMAIN_INSTRUCTION, 0,
+                      desc->desc0.grf_reg_blocks + avc_hw_scoreboard_context->hw_kernel.offset,
+                      offsetof(struct i965_interface_descriptor, desc0),
+                      avc_hw_scoreboard_context->hw_kernel.bo);
+
+    dri_bo_emit_reloc(bo,
+                      I915_GEM_DOMAIN_INSTRUCTION, 0,
+                      desc->desc3.binding_table_entry_count,
+                      offsetof(struct i965_interface_descriptor, desc3),
+                      avc_hw_scoreboard_context->binding_table.bo);
+
+    dri_bo_unmap(bo);
+}
+
+static void
+i965_avc_hw_scoreboard_binding_table(struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context)
+{
+    unsigned int *binding_table;
+    dri_bo *bo = avc_hw_scoreboard_context->binding_table.bo;
+
+    dri_bo_map(bo, 1);
+    assert(bo->virtual);
+    binding_table = bo->virtual;
+    memset(binding_table, 0, bo->size);
+    binding_table[0] = avc_hw_scoreboard_context->surface.ss_bo->offset;
+    dri_bo_emit_reloc(bo,
+                      I915_GEM_DOMAIN_INSTRUCTION, 0,
+                      0,
+                      0,
+                      avc_hw_scoreboard_context->surface.ss_bo);
+    dri_bo_unmap(bo);
+}
+
+static void
+i965_avc_hw_scoreboard_vfe_state(struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context)
+{
+    struct i965_vfe_state *vfe_state;
+    dri_bo *bo;
+
+    bo = avc_hw_scoreboard_context->vfe_state.bo;
+    dri_bo_map(bo, 1);
+    assert(bo->virtual);
+    vfe_state = bo->virtual;
+    memset(vfe_state, 0, sizeof(*vfe_state));
+    vfe_state->vfe1.max_threads = avc_hw_scoreboard_context->urb.num_vfe_entries - 1;
+    vfe_state->vfe1.urb_entry_alloc_size = avc_hw_scoreboard_context->urb.size_vfe_entry - 1;
+    vfe_state->vfe1.num_urb_entries = avc_hw_scoreboard_context->urb.num_vfe_entries;
+    vfe_state->vfe1.vfe_mode = VFE_GENERIC_MODE;
+    vfe_state->vfe1.children_present = 0;
+    vfe_state->vfe2.interface_descriptor_base = 
+        avc_hw_scoreboard_context->idrt.bo->offset >> 4; /* reloc */
+    dri_bo_emit_reloc(bo,
+                      I915_GEM_DOMAIN_INSTRUCTION, 0,
+                      0,
+                      offsetof(struct i965_vfe_state, vfe2),
+                      avc_hw_scoreboard_context->idrt.bo);
+    dri_bo_unmap(bo);
+}
+
+static void
+i965_avc_hw_scoreboard_upload_constants(struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context)
+{
+    unsigned char *constant_buffer;
+
+    dri_bo_map(avc_hw_scoreboard_context->curbe.bo, 1);
+    assert(avc_hw_scoreboard_context->curbe.bo->virtual);
+    constant_buffer = avc_hw_scoreboard_context->curbe.bo->virtual;
+    memcpy(constant_buffer, avc_hw_scoreboard_constants, sizeof(avc_hw_scoreboard_constants));
+    dri_bo_unmap(avc_hw_scoreboard_context->curbe.bo);
+}
+
+static void
+i965_avc_hw_scoreboard_states_setup(struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context)
+{
+    i965_avc_hw_scoreboard_surface_state(avc_hw_scoreboard_context);
+    i965_avc_hw_scoreboard_binding_table(avc_hw_scoreboard_context);
+    i965_avc_hw_scoreboard_interface_descriptor_table(avc_hw_scoreboard_context);
+    i965_avc_hw_scoreboard_vfe_state(avc_hw_scoreboard_context);
+    i965_avc_hw_scoreboard_upload_constants(avc_hw_scoreboard_context);
+}
+
+static void
+i965_avc_hw_scoreboard_pipeline_select(VADriverContextP ctx)
+{
+    BEGIN_BATCH(ctx, 1);
+    OUT_BATCH(ctx, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
+    ADVANCE_BATCH(ctx);
+}
+
+static void
+i965_avc_hw_scoreboard_urb_layout(VADriverContextP ctx, struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    unsigned int vfe_fence, cs_fence;
+
+    vfe_fence = avc_hw_scoreboard_context->urb.cs_start;
+    cs_fence = URB_SIZE((&i965->intel));
+
+    BEGIN_BATCH(ctx, 3);
+    OUT_BATCH(ctx, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1);
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 
+              (vfe_fence << UF2_VFE_FENCE_SHIFT) |      /* VFE_SIZE */
+              (cs_fence << UF2_CS_FENCE_SHIFT));        /* CS_SIZE */
+    ADVANCE_BATCH(ctx);
+}
+
+static void
+i965_avc_hw_scoreboard_state_base_address(VADriverContextP ctx)
+{
+    BEGIN_BATCH(ctx, 8);
+    OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | 6);
+    OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
+    OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
+    OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
+    OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
+    OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
+    OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
+    OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
+    ADVANCE_BATCH(ctx);
+}
+
+static void
+i965_avc_hw_scoreboard_state_pointers(VADriverContextP ctx, struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context)
+{
+    BEGIN_BATCH(ctx, 3);
+    OUT_BATCH(ctx, CMD_MEDIA_STATE_POINTERS | 1);
+    OUT_BATCH(ctx, 0);
+    OUT_RELOC(ctx, avc_hw_scoreboard_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+    ADVANCE_BATCH(ctx);
+}
+
+static void 
+i965_avc_hw_scoreboard_cs_urb_layout(VADriverContextP ctx, struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context)
+{
+    BEGIN_BATCH(ctx, 2);
+    OUT_BATCH(ctx, CMD_CS_URB_STATE | 0);
+    OUT_BATCH(ctx,
+              ((avc_hw_scoreboard_context->urb.size_cs_entry - 1) << 4) |     /* URB Entry Allocation Size */
+              (avc_hw_scoreboard_context->urb.num_cs_entries << 0));          /* Number of URB Entries */
+    ADVANCE_BATCH(ctx);
+}
+
+static void
+i965_avc_hw_scoreboard_constant_buffer(VADriverContextP ctx, struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context)
+{
+    BEGIN_BATCH(ctx, 2);
+    OUT_BATCH(ctx, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
+    OUT_RELOC(ctx, avc_hw_scoreboard_context->curbe.bo,
+              I915_GEM_DOMAIN_INSTRUCTION, 0,
+              avc_hw_scoreboard_context->urb.size_cs_entry - 1);
+    ADVANCE_BATCH(ctx);    
+}
+
+static void
+i965_avc_hw_scoreboard_objects(VADriverContextP ctx, struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context)
+{
+    BEGIN_BATCH(ctx, 6);
+    OUT_BATCH(ctx, CMD_MEDIA_OBJECT | 4);
+    OUT_BATCH(ctx, 0); /* interface descriptor offset: 0 */
+    OUT_BATCH(ctx, 0); /* no indirect data */
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, ((avc_hw_scoreboard_context->inline_data.num_mb_cmds << 16) |
+                    (avc_hw_scoreboard_context->inline_data.starting_mb_number << 0)));
+    OUT_BATCH(ctx, avc_hw_scoreboard_context->inline_data.pic_width_in_mbs);
+    ADVANCE_BATCH(ctx);
+}
+
+static void
+i965_avc_hw_scoreboard_pipeline_setup(VADriverContextP ctx, struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context)
+{
+    intel_batchbuffer_start_atomic(ctx, 0x1000);
+    intel_batchbuffer_emit_mi_flush(ctx);
+    i965_avc_hw_scoreboard_pipeline_select(ctx);
+    i965_avc_hw_scoreboard_state_base_address(ctx);
+    i965_avc_hw_scoreboard_state_pointers(ctx, avc_hw_scoreboard_context);
+    i965_avc_hw_scoreboard_urb_layout(ctx, avc_hw_scoreboard_context);
+    i965_avc_hw_scoreboard_cs_urb_layout(ctx, avc_hw_scoreboard_context);
+    i965_avc_hw_scoreboard_constant_buffer(ctx, avc_hw_scoreboard_context);
+    i965_avc_hw_scoreboard_objects(ctx, avc_hw_scoreboard_context);
+    intel_batchbuffer_end_atomic(ctx);
+}
+
+void
+i965_avc_hw_scoreboard(VADriverContextP ctx, struct decode_state *decode_state)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_media_state *media_state = &i965->media_state;
+    struct i965_h264_context *i965_h264_context = (struct i965_h264_context *)media_state->private_context;
+    struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context;
+
+    if (i965_h264_context->use_avc_hw_scoreboard) {
+        assert(i965_h264_context != NULL);
+        avc_hw_scoreboard_context = &i965_h264_context->avc_hw_scoreboard_context;
+
+        avc_hw_scoreboard_context->inline_data.num_mb_cmds = i965_h264_context->avc_it_command_mb_info.mbs;
+        avc_hw_scoreboard_context->inline_data.starting_mb_number = i965_h264_context->avc_it_command_mb_info.mbs;
+        avc_hw_scoreboard_context->inline_data.pic_width_in_mbs = i965_h264_context->picture.width_in_mbs;
+        avc_hw_scoreboard_context->surface.total_mbs = i965_h264_context->avc_it_command_mb_info.mbs * 2;
+        
+        dri_bo_unreference(avc_hw_scoreboard_context->hw_kernel.bo);
+        avc_hw_scoreboard_context->hw_kernel.bo = h264_avc_kernels[H264_AVC_COMBINED].bo;
+        assert(avc_hw_scoreboard_context->hw_kernel.bo != NULL);
+        dri_bo_reference(avc_hw_scoreboard_context->hw_kernel.bo);
+
+        if (i965_h264_context->picture.mbaff_frame_flag)
+            avc_hw_scoreboard_context->hw_kernel.offset = avc_hw_scoreboard_kernel_offset[AVC_HW_SCOREBOARD_MBAFF];
+        else
+            avc_hw_scoreboard_context->hw_kernel.offset = avc_hw_scoreboard_kernel_offset[AVC_HW_SCOREBOARD];
+
+        i965_avc_hw_scoreboard_states_setup(avc_hw_scoreboard_context);
+        i965_avc_hw_scoreboard_pipeline_setup(ctx, avc_hw_scoreboard_context);
+    }
+}
+
+void
+i965_avc_hw_scoreboard_decode_init(VADriverContextP ctx)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_media_state *media_state = &i965->media_state;
+    struct i965_h264_context *i965_h264_context = (struct i965_h264_context *)media_state->private_context;
+    struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context;
+    dri_bo *bo;
+
+    if (i965_h264_context->use_avc_hw_scoreboard) {
+        assert(i965_h264_context != NULL);
+        avc_hw_scoreboard_context = &i965_h264_context->avc_hw_scoreboard_context;
+
+        dri_bo_unreference(avc_hw_scoreboard_context->curbe.bo);
+        bo = dri_bo_alloc(i965->intel.bufmgr,
+                          "constant buffer",
+                          4096, 64);
+        assert(bo);
+        avc_hw_scoreboard_context->curbe.bo = bo;
+
+        dri_bo_unreference(avc_hw_scoreboard_context->surface.s_bo);
+        avc_hw_scoreboard_context->surface.s_bo = i965_h264_context->avc_it_command_mb_info.bo;
+        assert(avc_hw_scoreboard_context->surface.s_bo != NULL);
+        dri_bo_reference(avc_hw_scoreboard_context->surface.s_bo);
+
+        dri_bo_unreference(avc_hw_scoreboard_context->surface.ss_bo);
+        bo = dri_bo_alloc(i965->intel.bufmgr, 
+                          "surface state", 
+                          sizeof(struct i965_surface_state), 32);
+        assert(bo);
+        avc_hw_scoreboard_context->surface.ss_bo = bo;
+
+        dri_bo_unreference(avc_hw_scoreboard_context->binding_table.bo);
+        bo = dri_bo_alloc(i965->intel.bufmgr, 
+                          "binding table",
+                          MAX_MEDIA_SURFACES * sizeof(unsigned int), 32);
+        assert(bo);
+        avc_hw_scoreboard_context->binding_table.bo = bo;
+
+        dri_bo_unreference(avc_hw_scoreboard_context->idrt.bo);
+        bo = dri_bo_alloc(i965->intel.bufmgr, 
+                          "interface discriptor", 
+                          MAX_INTERFACE_DESC * sizeof(struct i965_interface_descriptor), 16);
+        assert(bo);
+        avc_hw_scoreboard_context->idrt.bo = bo;
+
+        dri_bo_unreference(avc_hw_scoreboard_context->vfe_state.bo);
+        bo = dri_bo_alloc(i965->intel.bufmgr, 
+                          "vfe state", 
+                          sizeof(struct i965_vfe_state), 32);
+        assert(bo);
+        avc_hw_scoreboard_context->vfe_state.bo = bo;
+
+        avc_hw_scoreboard_context->urb.num_vfe_entries = 1;
+        avc_hw_scoreboard_context->urb.size_vfe_entry = 2;
+        avc_hw_scoreboard_context->urb.num_cs_entries = 1;
+        avc_hw_scoreboard_context->urb.size_cs_entry = 1;
+        avc_hw_scoreboard_context->urb.vfe_start = 0;
+        avc_hw_scoreboard_context->urb.cs_start = avc_hw_scoreboard_context->urb.vfe_start + 
+            avc_hw_scoreboard_context->urb.num_vfe_entries * avc_hw_scoreboard_context->urb.size_vfe_entry;
+        assert(avc_hw_scoreboard_context->urb.cs_start + 
+               avc_hw_scoreboard_context->urb.num_cs_entries * avc_hw_scoreboard_context->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
+    }
+}
+
+Bool 
+i965_avc_hw_scoreboard_ternimate(struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context)
+{
+    dri_bo_unreference(avc_hw_scoreboard_context->curbe.bo);
+    avc_hw_scoreboard_context->curbe.bo = NULL;
+
+    dri_bo_unreference(avc_hw_scoreboard_context->surface.ss_bo);
+    avc_hw_scoreboard_context->surface.ss_bo = NULL;
+
+    dri_bo_unreference(avc_hw_scoreboard_context->surface.s_bo);
+    avc_hw_scoreboard_context->surface.s_bo = NULL;
+
+    dri_bo_unreference(avc_hw_scoreboard_context->binding_table.bo);
+    avc_hw_scoreboard_context->binding_table.bo = NULL;
+
+    dri_bo_unreference(avc_hw_scoreboard_context->idrt.bo);
+    avc_hw_scoreboard_context->idrt.bo = NULL;
+
+    dri_bo_unreference(avc_hw_scoreboard_context->vfe_state.bo);
+    avc_hw_scoreboard_context->vfe_state.bo = NULL;
+
+    dri_bo_unreference(avc_hw_scoreboard_context->hw_kernel.bo);
+    avc_hw_scoreboard_context->hw_kernel.bo = NULL;
+
+    return True;
+}
diff --git a/i965_drv_video/i965_avc_hw_scoreboard.h b/i965_drv_video/i965_avc_hw_scoreboard.h
new file mode 100644 (file)
index 0000000..66ee9fb
--- /dev/null
@@ -0,0 +1,84 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Xiang Haihao <haihao.xiang@intel.com>
+ *
+ */
+
+#ifndef __I965_AVC_HW_SCOREBOARD_H__
+#define __I965_AVC_HW_SCOREBOARD_H__
+
+struct i965_avc_hw_scoreboard_context
+{
+    struct {
+        unsigned int num_mb_cmds;
+        unsigned int starting_mb_number;
+        unsigned int pic_width_in_mbs;
+    } inline_data;
+
+    struct {
+        dri_bo *ss_bo;
+        dri_bo *s_bo;
+        unsigned int total_mbs;
+    } surface;
+
+    struct {
+        dri_bo *bo;
+    } binding_table;
+
+    struct {
+        dri_bo *bo;
+    } idrt;
+
+    struct {
+        dri_bo *bo;
+    } vfe_state;
+
+    struct {
+        dri_bo *bo;
+    } curbe;
+
+    struct {
+        dri_bo *bo;
+        unsigned long offset;
+    } hw_kernel;
+
+    struct {
+        unsigned int vfe_start;
+        unsigned int cs_start;
+
+        unsigned int num_vfe_entries;
+        unsigned int num_cs_entries;
+
+        unsigned int size_vfe_entry;
+        unsigned int size_cs_entry;
+    } urb;
+};
+
+void i965_avc_hw_scoreboard(VADriverContextP, struct decode_state *);
+void i965_avc_hw_scoreboard_decode_init(VADriverContextP);
+Bool i965_avc_hw_scoreboard_ternimate(struct i965_avc_hw_scoreboard_context *);
+
+#endif /* __I965_AVC_HW_SCOREBOARD_H__ */
+
index 260d820..e50b41b 100644 (file)
 #define CMD_MEDIA_OBJECT                        CMD(2, 1, 0)
 #define CMD_MEDIA_OBJECT_EX                     CMD(2, 1, 1)
 
+#define CMD_AVC_BSD_IMG_STATE                   CMD(2, 4, 0)
+#define CMD_AVC_BSD_QM_STATE                    CMD(2, 4, 1)
+#define CMD_AVC_BSD_SLICE_STATE                 CMD(2, 4, 2)
+#define CMD_AVC_BSD_BUF_BASE_STATE              CMD(2, 4, 3)
+#define CMD_BSD_IND_OBJ_BASE_ADDR               CMD(2, 4, 4)
+#define CMD_AVC_BSD_OBJECT                      CMD(2, 4, 8)
+
 #define CMD_PIPELINED_POINTERS                  CMD(3, 0, 0)
 #define CMD_BINDING_TABLE_POINTERS              CMD(3, 0, 1)
 #define CMD_VERTEX_BUFFERS                      CMD(3, 0, 8)
 #define I965_TILEWALK_XMAJOR                 0
 #define I965_TILEWALK_YMAJOR                 1
 
-#define URB_SIZE(intel)         (IS_IGDNG(intel->device_id) ? 1024 : \
+#define SCAN_RASTER_ORDER       0
+#define SCAN_SPECIAL_ORDER      1
+
+#define ENTROPY_CAVLD           0
+#define ENTROPY_CABAC           1
+
+#define SLICE_TYPE_P            0
+#define SLICE_TYPE_B            1
+#define SLICE_TYPE_I            2
+#define SLICE_TYPE_SP           3
+#define SLICE_TYPE_SI           4
+
+#define PRESENT_REF_LIST0               (1 << 0)
+#define PRESENT_REF_LIST1               (1 << 1)
+#define PRESENT_WEIGHT_OFFSET_L0        (1 << 2)
+#define PRESENT_WEIGHT_OFFSET_L1        (1 << 3)
+
+#define RESIDUAL_DATA_OFFSET    48
+
+#define PRESENT_NOMV            0
+#define PRESENT_NOWO            1
+#define PRESENT_MV_WO           3
+
+#define SCOREBOARD_STALLING     0
+#define SCOREBOARD_NON_STALLING 1
+
+#define URB_SIZE(intel)         (IS_IRONLAKE(intel->device_id) ? 1024 : \
                                  IS_G4X(intel->device_id) ? 384 : 256)
 #endif /* _I965_DEFINES_H_ */
index 6a60ecc..5033fe8 100644 (file)
@@ -99,6 +99,9 @@ i965_QueryConfigProfiles(VADriverContextP ctx,
 
     profile_list[i++] = VAProfileMPEG2Simple;
     profile_list[i++] = VAProfileMPEG2Main;
+    profile_list[i++] = VAProfileH264Baseline;
+    profile_list[i++] = VAProfileH264Main;
+    profile_list[i++] = VAProfileH264High;
 
     /* If the assert fails then I965_MAX_PROFILES needs to be bigger */
     assert(i <= I965_MAX_PROFILES);
@@ -122,6 +125,13 @@ i965_QueryConfigEntrypoints(VADriverContextP ctx,
         entrypoint_list[0] = VAEntrypointVLD;
         break;
 
+    case VAProfileH264Baseline:
+    case VAProfileH264Main:
+    case VAProfileH264High:
+        *num_entrypoints = 1;
+        entrypoint_list[0] = VAEntrypointVLD;
+        break;
+
     default:
         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
         *num_entrypoints = 0;
@@ -217,6 +227,17 @@ i965_CreateConfig(VADriverContextP ctx,
         }
         break;
 
+    case VAProfileH264Baseline:
+    case VAProfileH264Main:
+    case VAProfileH264High:
+        if (VAEntrypointVLD == entrypoint) {
+            vaStatus = VA_STATUS_SUCCESS;
+        } else {
+            vaStatus = VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT;
+        }
+
+        break;
+
     default:
         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
         break;
@@ -305,6 +326,10 @@ i965_destroy_surface(struct object_heap *heap, struct object_base *obj)
 
     dri_bo_unreference(obj_surface->bo);
     obj_surface->bo = NULL;
+    dri_bo_unreference(obj_surface->direct_mv_wr_top_bo);
+    obj_surface->direct_mv_wr_top_bo = NULL;
+    dri_bo_unreference(obj_surface->direct_mv_wr_bottom_bo);
+    obj_surface->direct_mv_wr_bottom_bo = NULL;
     object_heap_free(heap, obj);
 }
 
@@ -344,12 +369,17 @@ i965_CreateSurfaces(VADriverContextP ctx,
                                        "vaapi surface",
                                        obj_surface->size,
                                        64);
-
         assert(obj_surface->bo);
-        if (NULL == obj_surface->bo) {
-            vaStatus = VA_STATUS_ERROR_UNKNOWN;
-            break;
-        }
+        obj_surface->direct_mv_wr_top_bo = dri_bo_alloc(i965->intel.bufmgr,
+                                                        "direct mv wr top",
+                                                        0x90000,
+                                                        64);
+        assert(obj_surface->direct_mv_wr_top_bo);
+        obj_surface->direct_mv_wr_bottom_bo = dri_bo_alloc(i965->intel.bufmgr,
+                                                           "direct mv wr bottom",
+                                                           0x90000,
+                                                           64);
+        assert(obj_surface->direct_mv_wr_bottom_bo);
     }
 
     /* Error recovery */
@@ -881,6 +911,12 @@ i965_BeginPicture(VADriverContextP ctx,
         vaStatus = VA_STATUS_SUCCESS;
         break;
 
+    case VAProfileH264Baseline:
+    case VAProfileH264Main:
+    case VAProfileH264High:
+        vaStatus = VA_STATUS_SUCCESS;
+        break;
+
     default:
         assert(0);
         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
@@ -1014,6 +1050,7 @@ VAStatus
 i965_EndPicture(VADriverContextP ctx, VAContextID context)
 {
     struct i965_driver_data *i965 = i965_driver_data(ctx); 
+    struct i965_render_state *render_state = &i965->render_state;
     struct object_context *obj_context = CONTEXT(context);
     struct object_config *obj_config;
     VAContextID config;
@@ -1026,6 +1063,18 @@ i965_EndPicture(VADriverContextP ctx, VAContextID context)
     config = obj_context->config_id;
     obj_config = CONFIG(config);
     assert(obj_config);
+
+    switch (obj_config->profile) {
+    case VAProfileH264Baseline:
+    case VAProfileH264Main:
+    case VAProfileH264High:
+        render_state->interleaved_uv = 1;
+        break;
+
+    default:
+        render_state->interleaved_uv = 0;
+    }
+
     i965_media_decode_picture(ctx, obj_config->profile, &obj_context->decode_state);
     obj_context->decode_state.current_render_target = -1;
     obj_context->decode_state.num_slices = 0;
@@ -1131,7 +1180,7 @@ i965_Init(VADriverContextP ctx)
         return VA_STATUS_ERROR_UNKNOWN;
 
     if (!IS_G4X(i965->intel.device_id) &&
-        !IS_IGDNG(i965->intel.device_id))
+        !IS_IRONLAKE(i965->intel.device_id))
         return VA_STATUS_ERROR_UNKNOWN;
 
     if (i965_media_init(ctx) == False)
index fa09869..605b386 100644 (file)
@@ -97,6 +97,9 @@ struct object_surface
     int height;
     int size;
     dri_bo *bo;
+    /* FIXME: only for H.264 */
+    dri_bo *direct_mv_wr_top_bo;
+    dri_bo *direct_mv_wr_bottom_bo;
 };
 
 struct object_buffer 
index 82c5652..6a2971c 100644 (file)
@@ -38,6 +38,7 @@
 
 #include "i965_defines.h"
 #include "i965_media_mpeg2.h"
+#include "i965_media_h264.h"
 #include "i965_media.h"
 #include "i965_drv_video.h"
 
@@ -72,13 +73,21 @@ static void
 i965_media_state_base_address(VADriverContextP ctx)
 {
     struct i965_driver_data *i965 = i965_driver_data(ctx); 
+    struct i965_media_state *media_state = &i965->media_state;
 
-    if (IS_IGDNG(i965->intel.device_id)) {
+    if (IS_IRONLAKE(i965->intel.device_id)) {
         BEGIN_BATCH(ctx, 8);
         OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | 6);
         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
-        OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
+        
+        if (media_state->indirect_object.bo) {
+            OUT_RELOC(ctx, media_state->indirect_object.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 
+                      media_state->indirect_object.offset | BASE_ADDRESS_MODIFY);
+        } else {
+            OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
+        }
+
         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
@@ -89,7 +98,14 @@ i965_media_state_base_address(VADriverContextP ctx)
         OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | 4);
         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
-        OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
+
+        if (media_state->indirect_object.bo) {
+            OUT_RELOC(ctx, media_state->indirect_object.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 
+                      media_state->indirect_object.offset | BASE_ADDRESS_MODIFY);
+        } else {
+            OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
+        }
+
         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
         ADVANCE_BATCH(ctx);
@@ -221,6 +237,12 @@ i965_media_decode_init(VADriverContextP ctx, VAProfile profile)
     case VAProfileMPEG2Main:
         i965_media_mpeg2_decode_init(ctx);
         break;
+        
+    case VAProfileH264Baseline:
+    case VAProfileH264Main:
+    case VAProfileH264High:
+        i965_media_h264_decode_init(ctx);
+        break;
 
     default:
         assert(0);
@@ -237,8 +259,8 @@ i965_media_decode_picture(VADriverContextP ctx,
     struct i965_media_state *media_state = &i965->media_state;
 
     i965_media_decode_init(ctx, profile);
-    assert(media_state->states_setup);
-    media_state->states_setup(ctx, decode_state);
+    assert(media_state->media_states_setup);
+    media_state->media_states_setup(ctx, decode_state);
     i965_media_pipeline_setup(ctx, decode_state);
     intel_batchbuffer_flush(ctx);
 }
@@ -247,6 +269,7 @@ Bool
 i965_media_init(VADriverContextP ctx)
 {
     i965_media_mpeg2_init(ctx);
+    i965_media_h264_init(ctx);
     return True;
 }
 
@@ -277,7 +300,11 @@ i965_media_terminate(VADriverContextP ctx)
     dri_bo_unreference(media_state->curbe.bo);
     media_state->curbe.bo = NULL;
 
+    dri_bo_unreference(media_state->indirect_object.bo);
+    media_state->indirect_object.bo = NULL;
+
     i965_media_mpeg2_ternimate(ctx);
+    i965_media_h264_ternimate(ctx);
     return True;
 }
 
index e207f4e..b745777 100644 (file)
@@ -38,7 +38,7 @@
 #include "i965_structs.h"
 
 #define MAX_INTERFACE_DESC      16
-#define MAX_MEDIA_SURFACES      32
+#define MAX_MEDIA_SURFACES      34
 
 #define MPEG_TOP_FIELD         1
 #define MPEG_BOTTOM_FIELD      2
@@ -83,6 +83,11 @@ struct i965_media_state
     } curbe;
 
     struct {
+        dri_bo *bo;
+        unsigned long offset;
+    } indirect_object;
+
+    struct {
         unsigned int vfe_start;
         unsigned int cs_start;
 
@@ -93,7 +98,8 @@ struct i965_media_state
         unsigned int size_cs_entry;
     } urb;
 
-    void (*states_setup)(VADriverContextP ctx, struct decode_state *decode_state);
+    void *private_context;
+    void (*media_states_setup)(VADriverContextP ctx, struct decode_state *decode_state);
     void (*media_objects)(VADriverContextP ctx, struct decode_state *decode_state);
 };
 
diff --git a/i965_drv_video/i965_media_h264.c b/i965_drv_video/i965_media_h264.c
new file mode 100644 (file)
index 0000000..c1b5626
--- /dev/null
@@ -0,0 +1,882 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+#include "va_backend.h"
+
+#include "intel_batchbuffer.h"
+#include "intel_driver.h"
+
+#include "i965_defines.h"
+#include "i965_drv_video.h"
+#include "i965_media.h"
+#include "i965_media_h264.h"
+
+enum {
+    INTRA_16X16 = 0,
+    INTRA_8X8,
+    INTRA_4X4,
+    INTRA_PCM,
+    FRAMEMB_MOTION,
+    FIELDMB_MOTION,
+    MBAFF_MOTION,
+};
+
+struct intra_kernel_header
+{
+    /* R1.0 */
+    unsigned char intra_4x4_luma_mode_0_offset;
+    unsigned char intra_4x4_luma_mode_1_offset;
+    unsigned char intra_4x4_luma_mode_2_offset;
+    unsigned char intra_4x4_luma_mode_3_offset;
+    /* R1.1 */
+    unsigned char intra_4x4_luma_mode_4_offset;
+    unsigned char intra_4x4_luma_mode_5_offset;
+    unsigned char intra_4x4_luma_mode_6_offset;
+    unsigned char intra_4x4_luma_mode_7_offset;
+    /* R1.2 */
+    unsigned char intra_4x4_luma_mode_8_offset;
+    unsigned char pad0;
+    unsigned short top_reference_offset;
+    /* R1.3 */
+    unsigned char intra_8x8_luma_mode_0_offset;
+    unsigned char intra_8x8_luma_mode_1_offset;
+    unsigned char intra_8x8_luma_mode_2_offset;
+    unsigned char intra_8x8_luma_mode_3_offset;
+    /* R1.4 */
+    unsigned char intra_8x8_luma_mode_4_offset;
+    unsigned char intra_8x8_luma_mode_5_offset;
+    unsigned char intra_8x8_luma_mode_6_offset;
+    unsigned char intra_8x8_luma_mode_7_offset;
+    /* R1.5 */
+    unsigned char intra_8x8_luma_mode_8_offset;
+    unsigned char pad1;
+    unsigned short const_reverse_data_transfer_intra_8x8;
+    /* R1.6 */
+    unsigned char intra_16x16_luma_mode_0_offset;
+    unsigned char intra_16x16_luma_mode_1_offset;
+    unsigned char intra_16x16_luma_mode_2_offset;
+    unsigned char intra_16x16_luma_mode_3_offset;
+    /* R1.7 */
+    unsigned char intra_chroma_mode_0_offset;
+    unsigned char intra_chroma_mode_1_offset;
+    unsigned char intra_chroma_mode_2_offset;
+    unsigned char intra_chroma_mode_3_offset;
+    /* R2.0 */
+    unsigned int const_intra_16x16_plane_0;
+    /* R2.1 */
+    unsigned int const_intra_16x16_chroma_plane_0;
+    /* R2.2 */
+    unsigned int const_intra_16x16_chroma_plane_1;
+    /* R2.3 */
+    unsigned int const_intra_16x16_plane_1;
+    /* R2.4 */
+    unsigned int left_shift_count_reverse_dw_ordering;
+    /* R2.5 */
+    unsigned int const_reverse_data_transfer_intra_4x4;
+    /* R2.6 */
+    unsigned int intra_4x4_pred_mode_offset;
+};
+
+struct inter_kernel_header
+{
+    unsigned short weight_offset;
+    unsigned char weight_offset_flag;
+    unsigned char pad0;
+};
+
+#include "shaders/h264/mc/export.inc"
+static unsigned long avc_mc_kernel_offset_gen4[] = {
+    INTRA_16x16_IP,
+    INTRA_8x8_IP,
+    INTRA_4x4_IP,
+    INTRA_PCM_IP,
+    FRAME_MB_IP,
+    FIELD_MB_IP,
+    MBAFF_MB_IP
+};
+
+#define INST_UNIT_GEN4  16
+struct intra_kernel_header intra_kernel_header_gen4 = {
+    0 / INST_UNIT_GEN4,
+    (INTRA_4X4_HORIZONTAL_IP - INTRA_4X4_VERTICAL_IP) / INST_UNIT_GEN4,
+    (INTRA_4X4_DC_IP - INTRA_4X4_VERTICAL_IP) / INST_UNIT_GEN4,
+    (INTRA_4X4_DIAG_DOWN_LEFT_IP - INTRA_4X4_VERTICAL_IP) / INST_UNIT_GEN4,
+
+    (INTRA_4X4_DIAG_DOWN_RIGHT_IP - INTRA_4X4_VERTICAL_IP) / INST_UNIT_GEN4,
+    (INTRA_4X4_VERT_RIGHT_IP - INTRA_4X4_VERTICAL_IP) / INST_UNIT_GEN4,
+    (INTRA_4X4_HOR_DOWN_IP - INTRA_4X4_VERTICAL_IP) / INST_UNIT_GEN4,
+    (INTRA_4X4_VERT_LEFT_IP - INTRA_4X4_VERTICAL_IP) / INST_UNIT_GEN4,
+
+    (INTRA_4X4_HOR_UP_IP - INTRA_4X4_VERTICAL_IP) / INST_UNIT_GEN4,
+    0,
+    0xFFFC,
+
+    0 / INST_UNIT_GEN4,
+    (INTRA_8X8_HORIZONTAL_IP - INTRA_8X8_VERTICAL_IP) / INST_UNIT_GEN4,
+    (INTRA_8X8_DC_IP - INTRA_8X8_VERTICAL_IP) / INST_UNIT_GEN4,
+    (INTRA_8X8_DIAG_DOWN_LEFT_IP - INTRA_8X8_VERTICAL_IP) / INST_UNIT_GEN4,
+
+    (INTRA_8X8_DIAG_DOWN_RIGHT_IP - INTRA_8X8_VERTICAL_IP) / INST_UNIT_GEN4,
+    (INTRA_8X8_VERT_RIGHT_IP - INTRA_8X8_VERTICAL_IP) / INST_UNIT_GEN4,
+    (INTRA_8X8_HOR_DOWN_IP - INTRA_8X8_VERTICAL_IP) / INST_UNIT_GEN4,
+    (INTRA_8X8_VERT_LEFT_IP - INTRA_8X8_VERTICAL_IP) / INST_UNIT_GEN4,
+
+    (INTRA_8X8_HOR_UP_IP - INTRA_8X8_VERTICAL_IP) / INST_UNIT_GEN4,
+    0,
+    0x0001,
+
+    0 / INST_UNIT_GEN4,
+    (INTRA_16x16_HORIZONTAL_IP - INTRA_16x16_VERTICAL_IP) / INST_UNIT_GEN4,
+    (INTRA_16x16_DC_IP - INTRA_16x16_VERTICAL_IP) / INST_UNIT_GEN4,
+    (INTRA_16x16_PLANE_IP - INTRA_16x16_VERTICAL_IP) / INST_UNIT_GEN4,
+
+    0 / INST_UNIT_GEN4,
+    (INTRA_CHROMA_HORIZONTAL_IP - INTRA_CHROMA_DC_IP) / INST_UNIT_GEN4,
+    (INTRA_CHROMA_VERTICAL_IP - INTRA_CHROMA_DC_IP) / INST_UNIT_GEN4,
+    (INTRA_Chroma_PLANE_IP - INTRA_CHROMA_DC_IP) / INST_UNIT_GEN4,
+
+    0xFCFBFAF9,
+
+    0x00FFFEFD,
+
+    0x04030201,
+
+    0x08070605,
+
+    0x18100800,
+
+    0x00020406,
+
+    (intra_Pred_4x4_Y_IP - ADD_ERROR_SB3_IP) / INST_UNIT_GEN4 * 0x1000000 + 
+    (intra_Pred_4x4_Y_IP - ADD_ERROR_SB2_IP) / INST_UNIT_GEN4 * 0x10000 + 
+    (intra_Pred_4x4_Y_IP - ADD_ERROR_SB1_IP) / INST_UNIT_GEN4 * 0x100 + 
+    (intra_Pred_4x4_Y_IP - ADD_ERROR_SB0_IP) / INST_UNIT_GEN4
+};
+
+static uint32_t h264_avc_combined_gen4[][4] = {
+#include "shaders/h264/mc/avc_mc.g4b"
+};
+
+static uint32_t h264_avc_null_gen4[][4] = {
+#include "shaders/h264/mc/null.g4b"
+};
+
+static struct media_kernel h264_avc_kernels_gen4[] = {
+    {
+        "AVC combined kernel",
+        H264_AVC_COMBINED,
+        h264_avc_combined_gen4,
+        sizeof(h264_avc_combined_gen4),
+        NULL
+    },
+
+    {
+        "NULL kernel",
+        H264_AVC_NULL,
+        h264_avc_null_gen4,
+        sizeof(h264_avc_null_gen4),
+        NULL
+    }
+};
+
+/* On Ironlake */
+#include "shaders/h264/mc/export.inc.gen5"
+static unsigned long avc_mc_kernel_offset_gen5[] = {
+    INTRA_16x16_IP_GEN5,
+    INTRA_8x8_IP_GEN5,
+    INTRA_4x4_IP_GEN5,
+    INTRA_PCM_IP_GEN5,
+    FRAME_MB_IP_GEN5,
+    FIELD_MB_IP_GEN5,
+    MBAFF_MB_IP_GEN5
+};
+
+#define INST_UNIT_GEN5  8
+
+struct intra_kernel_header intra_kernel_header_gen5 = {
+    0 / INST_UNIT_GEN5,
+    (INTRA_4X4_HORIZONTAL_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
+    (INTRA_4X4_DC_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
+    (INTRA_4X4_DIAG_DOWN_LEFT_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
+
+    (INTRA_4X4_DIAG_DOWN_RIGHT_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
+    (INTRA_4X4_VERT_RIGHT_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
+    (INTRA_4X4_HOR_DOWN_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
+    (INTRA_4X4_VERT_LEFT_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
+
+    (INTRA_4X4_HOR_UP_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
+    0,
+    0xFFFC,
+
+    0 / INST_UNIT_GEN5,
+    (INTRA_8X8_HORIZONTAL_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
+    (INTRA_8X8_DC_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
+    (INTRA_8X8_DIAG_DOWN_LEFT_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
+
+    (INTRA_8X8_DIAG_DOWN_RIGHT_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
+    (INTRA_8X8_VERT_RIGHT_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
+    (INTRA_8X8_HOR_DOWN_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
+    (INTRA_8X8_VERT_LEFT_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
+
+    (INTRA_8X8_HOR_UP_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
+    0,
+    0x0001,
+
+    0 / INST_UNIT_GEN5,
+    (INTRA_16x16_HORIZONTAL_IP_GEN5 - INTRA_16x16_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
+    (INTRA_16x16_DC_IP_GEN5 - INTRA_16x16_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
+    (INTRA_16x16_PLANE_IP_GEN5 - INTRA_16x16_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
+
+    0 / INST_UNIT_GEN5,
+    (INTRA_CHROMA_HORIZONTAL_IP_GEN5 - INTRA_CHROMA_DC_IP_GEN5) / INST_UNIT_GEN5,
+    (INTRA_CHROMA_VERTICAL_IP_GEN5 - INTRA_CHROMA_DC_IP_GEN5) / INST_UNIT_GEN5,
+    (INTRA_Chroma_PLANE_IP_GEN5 - INTRA_CHROMA_DC_IP_GEN5) / INST_UNIT_GEN5,
+
+    0xFCFBFAF9,
+
+    0x00FFFEFD,
+
+    0x04030201,
+
+    0x08070605,
+
+    0x18100800,
+
+    0x00020406,
+
+    (intra_Pred_4x4_Y_IP_GEN5 - ADD_ERROR_SB3_IP_GEN5) / INST_UNIT_GEN5 * 0x1000000 + 
+    (intra_Pred_4x4_Y_IP_GEN5 - ADD_ERROR_SB2_IP_GEN5) / INST_UNIT_GEN5 * 0x10000 + 
+    (intra_Pred_4x4_Y_IP_GEN5 - ADD_ERROR_SB1_IP_GEN5) / INST_UNIT_GEN5 * 0x100 + 
+    (intra_Pred_4x4_Y_IP_GEN5 - ADD_ERROR_SB0_IP_GEN5) / INST_UNIT_GEN5
+};
+
+static uint32_t h264_avc_combined_gen5[][4] = {
+#include "shaders/h264/mc/avc_mc.g4b.gen5"
+};
+
+static uint32_t h264_avc_null_gen5[][4] = {
+#include "shaders/h264/mc/null.g4b.gen5"
+};
+
+static struct media_kernel h264_avc_kernels_gen5[] = {
+    {
+        "AVC combined kernel",
+        H264_AVC_COMBINED,
+        h264_avc_combined_gen5,
+        sizeof(h264_avc_combined_gen5),
+        NULL
+    },
+
+    {
+        "NULL kernel",
+        H264_AVC_NULL,
+        h264_avc_null_gen5,
+        sizeof(h264_avc_null_gen5),
+        NULL
+    }
+};
+
+#define NUM_H264_AVC_KERNELS (sizeof(h264_avc_kernels_gen4) / sizeof(h264_avc_kernels_gen4[0]))
+struct media_kernel *h264_avc_kernels = NULL;
+
+#define NUM_AVC_MC_INTERFACES (sizeof(avc_mc_kernel_offset_gen4) / sizeof(avc_mc_kernel_offset_gen4[0]))
+static unsigned long *avc_mc_kernel_offset = NULL;
+
+static struct intra_kernel_header *intra_kernel_header = NULL;
+
+static void
+i965_media_h264_surface_state(VADriverContextP ctx, 
+                              int index,
+                              struct object_surface *obj_surface,
+                              unsigned long offset, 
+                              int w, int h, int pitch,
+                              Bool is_dst,
+                              int vert_line_stride,
+                              int vert_line_stride_ofs,
+                              int format)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);  
+    struct i965_media_state *media_state = &i965->media_state;
+    struct i965_surface_state *ss;
+    dri_bo *bo;
+    uint32_t write_domain, read_domain;
+
+    bo = dri_bo_alloc(i965->intel.bufmgr, 
+                      "surface state", 
+                      sizeof(struct i965_surface_state), 32);
+    assert(bo);
+    dri_bo_map(bo, 1);
+    assert(bo->virtual);
+    ss = bo->virtual;
+    memset(ss, 0, sizeof(*ss));
+    ss->ss0.surface_type = I965_SURFACE_2D;
+    ss->ss0.surface_format = format;
+    ss->ss0.vert_line_stride = vert_line_stride;
+    ss->ss0.vert_line_stride_ofs = vert_line_stride_ofs;
+    ss->ss1.base_addr = obj_surface->bo->offset + offset;
+    ss->ss2.width = w - 1;
+    ss->ss2.height = h - 1;
+    ss->ss3.pitch = pitch - 1;
+
+    if (is_dst) {
+        write_domain = I915_GEM_DOMAIN_RENDER;
+        read_domain = I915_GEM_DOMAIN_RENDER;
+    } else {
+        write_domain = 0;
+        read_domain = I915_GEM_DOMAIN_SAMPLER;
+    }
+
+    dri_bo_emit_reloc(bo,
+                      read_domain, write_domain,
+                      offset,
+                      offsetof(struct i965_surface_state, ss1),
+                      obj_surface->bo);
+    dri_bo_unmap(bo);
+
+    assert(index < MAX_MEDIA_SURFACES);
+    media_state->surface_state[index].bo = bo;
+}
+
+static void 
+i965_media_h264_surfaces_setup(VADriverContextP ctx, 
+                               struct decode_state *decode_state)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);  
+    struct object_surface *obj_surface;
+    VAPictureParameterBufferH264 *pic_param;
+    VAPictureH264 *va_pic;
+    int i, w, h;
+    int field_picture;
+
+    assert(decode_state->pic_param && decode_state->pic_param->buffer);
+    pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
+
+    /* Target Picture */
+    va_pic = &pic_param->CurrPic;
+    assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
+    obj_surface = SURFACE(va_pic->picture_id);
+    assert(obj_surface);
+    w = obj_surface->width;
+    h = obj_surface->height;
+    field_picture = !!(va_pic->flags & (VA_PICTURE_H264_TOP_FIELD | VA_PICTURE_H264_BOTTOM_FIELD));
+    i965_media_h264_surface_state(ctx, 0, obj_surface,
+                                  0, w / 4, h / (1 + field_picture), w,
+                                  1, 
+                                  field_picture,
+                                  !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD),
+                                  I965_SURFACEFORMAT_R8_SINT); /* Y */
+    i965_media_h264_surface_state(ctx, 1, obj_surface,
+                                  w * h, w / 4, h / 2 / (1 + field_picture), w,
+                                  1, 
+                                  field_picture,
+                                  !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD),
+                                  I965_SURFACEFORMAT_R8G8_SINT);  /* INTERLEAVED U/V */
+
+    /* Reference Pictures */
+    for (i = 0; i < 16; i++) {
+        va_pic = &pic_param->ReferenceFrames[i];
+
+        if (!(va_pic->flags & VA_PICTURE_H264_INVALID)) {
+            obj_surface = SURFACE(va_pic->picture_id);
+            assert(obj_surface);
+            w = obj_surface->width;
+            h = obj_surface->height;
+            field_picture = !!(va_pic->flags & (VA_PICTURE_H264_TOP_FIELD | VA_PICTURE_H264_BOTTOM_FIELD));
+            i965_media_h264_surface_state(ctx, 2 + i, obj_surface,
+                                          0, w / 4, h / (1 + field_picture), w,
+                                          0, 
+                                          field_picture,
+                                          !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD),
+                                          I965_SURFACEFORMAT_R8_SINT); /* Y */
+            i965_media_h264_surface_state(ctx, 18 + i, obj_surface,
+                                          w * h, w / 4, h / 2 / (1 + field_picture), w,
+                                          0, 
+                                          field_picture,
+                                          !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD),
+                                          I965_SURFACEFORMAT_R8G8_SINT);  /* INTERLEAVED U/V */
+        }
+    }
+}
+
+static void
+i965_media_h264_binding_table(VADriverContextP ctx)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_media_state *media_state = &i965->media_state;
+    int i;
+    unsigned int *binding_table;
+    dri_bo *bo = media_state->binding_table.bo;
+
+    dri_bo_map(bo, 1);
+    assert(bo->virtual);
+    binding_table = bo->virtual;
+    memset(binding_table, 0, bo->size);
+
+    for (i = 0; i < MAX_MEDIA_SURFACES; i++) {
+        if (media_state->surface_state[i].bo) {
+            binding_table[i] = media_state->surface_state[i].bo->offset;
+            dri_bo_emit_reloc(bo,
+                              I915_GEM_DOMAIN_INSTRUCTION, 0,
+                              0,
+                              i * sizeof(*binding_table),
+                              media_state->surface_state[i].bo);
+        }
+    }
+
+    dri_bo_unmap(media_state->binding_table.bo);
+}
+
+static void 
+i965_media_h264_interface_descriptor_remap_table(VADriverContextP ctx)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_media_state *media_state = &i965->media_state;
+    struct i965_interface_descriptor *desc;
+    int i;
+    dri_bo *bo;
+
+    bo = media_state->idrt.bo;
+    dri_bo_map(bo, 1);
+    assert(bo->virtual);
+    desc = bo->virtual;
+
+    for (i = 0; i < NUM_AVC_MC_INTERFACES; i++) {
+        int kernel_offset = avc_mc_kernel_offset[i];
+        memset(desc, 0, sizeof(*desc));
+        desc->desc0.grf_reg_blocks = 7; 
+        desc->desc0.kernel_start_pointer = (h264_avc_kernels[H264_AVC_COMBINED].bo->offset + kernel_offset) >> 6; /* reloc */
+        desc->desc1.const_urb_entry_read_offset = 0;
+        desc->desc1.const_urb_entry_read_len = 2;
+        desc->desc3.binding_table_entry_count = 0;
+        desc->desc3.binding_table_pointer = 
+            media_state->binding_table.bo->offset >> 5; /*reloc */
+
+        dri_bo_emit_reloc(bo,
+                          I915_GEM_DOMAIN_INSTRUCTION, 0,
+                          desc->desc0.grf_reg_blocks + kernel_offset,
+                          i * sizeof(*desc) + offsetof(struct i965_interface_descriptor, desc0),
+                          h264_avc_kernels[H264_AVC_COMBINED].bo);
+
+        dri_bo_emit_reloc(bo,
+                          I915_GEM_DOMAIN_INSTRUCTION, 0,
+                          desc->desc3.binding_table_entry_count,
+                          i * sizeof(*desc) + offsetof(struct i965_interface_descriptor, desc3),
+                          media_state->binding_table.bo);
+        desc++;
+    }
+
+    dri_bo_unmap(bo);
+}
+
+static void
+i965_media_h264_vfe_state(VADriverContextP ctx)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_media_state *media_state = &i965->media_state;
+    struct i965_vfe_state *vfe_state;
+    dri_bo *bo;
+
+    bo = media_state->vfe_state.bo;
+    dri_bo_map(bo, 1);
+    assert(bo->virtual);
+    vfe_state = bo->virtual;
+    memset(vfe_state, 0, sizeof(*vfe_state));
+    vfe_state->vfe0.extend_vfe_state_present = 1;
+    vfe_state->vfe1.max_threads = media_state->urb.num_vfe_entries - 1;
+    vfe_state->vfe1.urb_entry_alloc_size = media_state->urb.size_vfe_entry - 1;
+    vfe_state->vfe1.num_urb_entries = media_state->urb.num_vfe_entries;
+    vfe_state->vfe1.vfe_mode = VFE_AVC_IT_MODE;
+    vfe_state->vfe1.children_present = 0;
+    vfe_state->vfe2.interface_descriptor_base = 
+        media_state->idrt.bo->offset >> 4; /* reloc */
+    dri_bo_emit_reloc(bo,
+                      I915_GEM_DOMAIN_INSTRUCTION, 0,
+                      0,
+                      offsetof(struct i965_vfe_state, vfe2),
+                      media_state->idrt.bo);
+    dri_bo_unmap(bo);
+}
+
+static void 
+i965_media_h264_vfe_state_extension(VADriverContextP ctx, 
+                                    struct decode_state *decode_state)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_media_state *media_state = &i965->media_state;
+    struct i965_h264_context *i965_h264_context;
+    struct i965_vfe_state_ex *vfe_state_ex;
+    VAPictureParameterBufferH264 *pic_param;
+    VASliceParameterBufferH264 *slice_param;
+    int mbaff_frame_flag;
+
+    assert(media_state->private_context);
+    i965_h264_context = (struct i965_h264_context *)media_state->private_context;
+
+    assert(decode_state->pic_param && decode_state->pic_param->buffer);
+    pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
+
+    assert(decode_state->slice_param && decode_state->slice_param->buffer);
+    slice_param = (VASliceParameterBufferH264 *)decode_state->slice_param->buffer;
+
+    mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
+                        !pic_param->pic_fields.bits.field_pic_flag);
+
+    assert(media_state->extended_state.bo);
+    dri_bo_map(media_state->extended_state.bo, 1);
+    assert(media_state->extended_state.bo->virtual);
+    vfe_state_ex = media_state->extended_state.bo->virtual;
+    memset(vfe_state_ex, 0, sizeof(*vfe_state_ex));
+
+    /*
+     * Indirect data buffer:
+     * --------------------------------------------------------
+     * | Motion Vectors | Weight/Offset data | Residual data |
+     * --------------------------------------------------------
+     * R4-R7: Motion Vectors
+     * R8-R9: Weight/Offset
+     * R10-R33: Residual data
+     */
+    vfe_state_ex->vfex1.avc.residual_data_fix_offset_flag = !!RESIDUAL_DATA_OFFSET;
+    vfe_state_ex->vfex1.avc.residual_data_offset = RESIDUAL_DATA_OFFSET;
+
+    if (slice_param->slice_type == SLICE_TYPE_I) 
+        vfe_state_ex->vfex1.avc.sub_field_present_flag = PRESENT_NOMV; /* NoMV */
+    else 
+        vfe_state_ex->vfex1.avc.sub_field_present_flag = PRESENT_MV_WO; /* Both MV and W/O */
+
+    if (vfe_state_ex->vfex1.avc.sub_field_present_flag == 0) {
+        vfe_state_ex->vfex1.avc.weight_grf_offset = 0;
+        vfe_state_ex->vfex1.avc.residual_grf_offset = 0;
+    } else {
+        vfe_state_ex->vfex1.avc.weight_grf_offset = 4;
+        vfe_state_ex->vfex1.avc.residual_grf_offset = 6;
+    }
+
+    if (!pic_param->pic_fields.bits.field_pic_flag) {
+        if (mbaff_frame_flag) {
+            vfe_state_ex->remap_table0.remap_index_0 = INTRA_16X16;
+            vfe_state_ex->remap_table0.remap_index_1 = INTRA_8X8;
+            vfe_state_ex->remap_table0.remap_index_2 = INTRA_4X4;
+            vfe_state_ex->remap_table0.remap_index_3 = INTRA_PCM;
+            vfe_state_ex->remap_table0.remap_index_4 = MBAFF_MOTION;
+            vfe_state_ex->remap_table0.remap_index_5 = MBAFF_MOTION;
+            vfe_state_ex->remap_table0.remap_index_6 = MBAFF_MOTION;
+            vfe_state_ex->remap_table0.remap_index_7 = MBAFF_MOTION;
+
+            vfe_state_ex->remap_table1.remap_index_8 = MBAFF_MOTION;
+            vfe_state_ex->remap_table1.remap_index_9 = MBAFF_MOTION;
+            vfe_state_ex->remap_table1.remap_index_10 = MBAFF_MOTION;
+            vfe_state_ex->remap_table1.remap_index_11 = MBAFF_MOTION;
+            vfe_state_ex->remap_table1.remap_index_12 = MBAFF_MOTION;
+            vfe_state_ex->remap_table1.remap_index_13 = MBAFF_MOTION;
+            vfe_state_ex->remap_table1.remap_index_14 = MBAFF_MOTION;
+            vfe_state_ex->remap_table1.remap_index_15 = MBAFF_MOTION;
+        } else {
+            vfe_state_ex->remap_table0.remap_index_0 = INTRA_16X16;
+            vfe_state_ex->remap_table0.remap_index_1 = INTRA_8X8;
+            vfe_state_ex->remap_table0.remap_index_2 = INTRA_4X4;
+            vfe_state_ex->remap_table0.remap_index_3 = INTRA_PCM;
+            vfe_state_ex->remap_table0.remap_index_4 = FRAMEMB_MOTION;
+            vfe_state_ex->remap_table0.remap_index_5 = FRAMEMB_MOTION;
+            vfe_state_ex->remap_table0.remap_index_6 = FRAMEMB_MOTION;
+            vfe_state_ex->remap_table0.remap_index_7 = FRAMEMB_MOTION;
+
+            vfe_state_ex->remap_table1.remap_index_8 = FRAMEMB_MOTION;
+            vfe_state_ex->remap_table1.remap_index_9 = FRAMEMB_MOTION;
+            vfe_state_ex->remap_table1.remap_index_10 = FRAMEMB_MOTION;
+            vfe_state_ex->remap_table1.remap_index_11 = FRAMEMB_MOTION;
+            vfe_state_ex->remap_table1.remap_index_12 = FRAMEMB_MOTION;
+            vfe_state_ex->remap_table1.remap_index_13 = FRAMEMB_MOTION;
+            vfe_state_ex->remap_table1.remap_index_14 = FRAMEMB_MOTION;
+            vfe_state_ex->remap_table1.remap_index_15 = FRAMEMB_MOTION;
+        }
+    } else {
+        vfe_state_ex->remap_table0.remap_index_0 = INTRA_16X16;
+        vfe_state_ex->remap_table0.remap_index_1 = INTRA_8X8;
+        vfe_state_ex->remap_table0.remap_index_2 = INTRA_4X4;
+        vfe_state_ex->remap_table0.remap_index_3 = INTRA_PCM;
+        vfe_state_ex->remap_table0.remap_index_4 = FIELDMB_MOTION;
+        vfe_state_ex->remap_table0.remap_index_5 = FIELDMB_MOTION;
+        vfe_state_ex->remap_table0.remap_index_6 = FIELDMB_MOTION;
+        vfe_state_ex->remap_table0.remap_index_7 = FIELDMB_MOTION;
+
+        vfe_state_ex->remap_table1.remap_index_8 = FIELDMB_MOTION;
+        vfe_state_ex->remap_table1.remap_index_9 = FIELDMB_MOTION;
+        vfe_state_ex->remap_table1.remap_index_10 = FIELDMB_MOTION;
+        vfe_state_ex->remap_table1.remap_index_11 = FIELDMB_MOTION;
+        vfe_state_ex->remap_table1.remap_index_12 = FIELDMB_MOTION;
+        vfe_state_ex->remap_table1.remap_index_13 = FIELDMB_MOTION;
+        vfe_state_ex->remap_table1.remap_index_14 = FIELDMB_MOTION;
+        vfe_state_ex->remap_table1.remap_index_15 = FIELDMB_MOTION;
+    }
+
+    if (i965_h264_context->use_avc_hw_scoreboard) {
+        vfe_state_ex->scoreboard0.enable = 1;
+        vfe_state_ex->scoreboard0.type = SCOREBOARD_STALLING;
+        vfe_state_ex->scoreboard0.mask = 0xff;
+
+        vfe_state_ex->scoreboard1.delta_x0 = -1;
+        vfe_state_ex->scoreboard1.delta_y0 = 0;
+        vfe_state_ex->scoreboard1.delta_x1 = 0;
+        vfe_state_ex->scoreboard1.delta_y1 = -1;
+        vfe_state_ex->scoreboard1.delta_x2 = 1;
+        vfe_state_ex->scoreboard1.delta_y2 = -1;
+        vfe_state_ex->scoreboard1.delta_x3 = -1;
+        vfe_state_ex->scoreboard1.delta_y3 = -1;
+
+        vfe_state_ex->scoreboard2.delta_x4 = -1;
+        vfe_state_ex->scoreboard2.delta_y4 = 1;
+        vfe_state_ex->scoreboard2.delta_x5 = 0;
+        vfe_state_ex->scoreboard2.delta_y5 = -2;
+        vfe_state_ex->scoreboard2.delta_x6 = 1;
+        vfe_state_ex->scoreboard2.delta_y6 = -2;
+        vfe_state_ex->scoreboard2.delta_x7 = -1;
+        vfe_state_ex->scoreboard2.delta_y7 = -2;
+    }
+
+    dri_bo_unmap(media_state->extended_state.bo);
+}
+
+static void
+i965_media_h264_upload_constants(VADriverContextP ctx, struct decode_state *decode_state)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_media_state *media_state = &i965->media_state;
+    struct i965_h264_context *i965_h264_context;
+    unsigned char *constant_buffer;
+    VASliceParameterBufferH264 *slice_param;
+
+    assert(media_state->private_context);
+    i965_h264_context = (struct i965_h264_context *)media_state->private_context;
+
+    assert(decode_state->slice_param && decode_state->slice_param->buffer);
+    slice_param = (VASliceParameterBufferH264 *)decode_state->slice_param->buffer;
+
+    dri_bo_map(media_state->curbe.bo, 1);
+    assert(media_state->curbe.bo->virtual);
+    constant_buffer = media_state->curbe.bo->virtual;
+
+    /* HW solution for W=128 */
+    if (i965_h264_context->use_hw_w128) {
+        memcpy(constant_buffer, intra_kernel_header, sizeof(*intra_kernel_header));
+    } else {
+        if (slice_param->slice_type == SLICE_TYPE_I) {
+            memcpy(constant_buffer, intra_kernel_header, sizeof(*intra_kernel_header));
+        } else {
+            /* FIXME: Need to upload CURBE data to inter kernel interface 
+             * to support weighted prediction work-around 
+             */
+            *(short *)constant_buffer = i965_h264_context->weight128_offset0;
+            constant_buffer += 2;
+            *(char *)constant_buffer = i965_h264_context->weight128_offset0_flag;
+            constant_buffer++;
+            *constant_buffer = 0;
+        }
+    }
+
+    dri_bo_unmap(media_state->curbe.bo);
+}
+
+static void
+i965_media_h264_states_setup(VADriverContextP ctx, struct decode_state *decode_state)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_media_state *media_state = &i965->media_state;
+    struct i965_h264_context *i965_h264_context;
+    VAPictureParameterBufferH264 *pic_param;
+    unsigned int *object_command;
+
+    assert(media_state->private_context);
+    i965_h264_context = (struct i965_h264_context *)media_state->private_context;
+    assert(decode_state->pic_param && decode_state->pic_param->buffer);
+    pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
+
+    i965_h264_context->picture.width_in_mbs = ((pic_param->picture_width_in_mbs_minus1 + 1) & 0xff);
+    i965_h264_context->picture.height_in_mbs = ((pic_param->picture_height_in_mbs_minus1 + 1) & 0xff) / 
+        (1 + !!pic_param->pic_fields.bits.field_pic_flag); /* picture height */
+    i965_h264_context->picture.mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
+                                                   !pic_param->pic_fields.bits.field_pic_flag);
+
+    i965_h264_context->avc_it_command_mb_info.mbs = (i965_h264_context->picture.width_in_mbs * 
+                                                     i965_h264_context->picture.height_in_mbs);
+
+    i965_avc_bsd_pipeline(ctx, decode_state);
+
+    dri_bo_map(i965_h264_context->avc_it_command_mb_info.bo, True);
+    assert(i965_h264_context->avc_it_command_mb_info.bo->virtual);
+    object_command = i965_h264_context->avc_it_command_mb_info.bo->virtual;
+    memset(object_command, 0, i965_h264_context->avc_it_command_mb_info.mbs * i965_h264_context->use_avc_hw_scoreboard * MB_CMD_IN_BYTES);
+    object_command += i965_h264_context->avc_it_command_mb_info.mbs * (1 + i965_h264_context->use_avc_hw_scoreboard) * MB_CMD_IN_DWS;
+    *object_command = MI_BATCH_BUFFER_END;
+    dri_bo_unmap(i965_h264_context->avc_it_command_mb_info.bo);
+
+    i965_avc_hw_scoreboard(ctx, decode_state);
+
+    i965_media_h264_surfaces_setup(ctx, decode_state);
+    i965_media_h264_binding_table(ctx);
+    i965_media_h264_interface_descriptor_remap_table(ctx);
+    i965_media_h264_vfe_state_extension(ctx, decode_state);
+    i965_media_h264_vfe_state(ctx);
+    i965_media_h264_upload_constants(ctx, decode_state);
+}
+
+static void
+i965_media_h264_objects(VADriverContextP ctx, struct decode_state *decode_state)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_media_state *media_state = &i965->media_state;
+    struct i965_h264_context *i965_h264_context;
+
+    assert(media_state->private_context);
+    i965_h264_context = (struct i965_h264_context *)media_state->private_context;
+    
+    BEGIN_BATCH(ctx, 2);
+    OUT_BATCH(ctx, MI_BATCH_BUFFER_START | (2 << 6));
+    OUT_RELOC(ctx, i965_h264_context->avc_it_command_mb_info.bo, 
+              I915_GEM_DOMAIN_COMMAND, 0, 
+              0);
+    ADVANCE_BATCH(ctx);
+}
+
+void
+i965_media_h264_decode_init(VADriverContextP ctx)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_media_state *media_state = &i965->media_state;
+    struct i965_h264_context *i965_h264_context;
+    dri_bo *bo;
+
+    assert(media_state->private_context);
+    i965_h264_context = media_state->private_context;
+
+    dri_bo_unreference(i965_h264_context->avc_it_command_mb_info.bo);
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "avc it command mb info",
+                      0x80000 * (1 + i965_h264_context->use_avc_hw_scoreboard),  /* at least 522240 bytes */
+                      0x1000);
+    assert(bo);
+    i965_h264_context->avc_it_command_mb_info.bo = bo;
+
+    dri_bo_unreference(i965_h264_context->avc_it_data.bo);
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "avc it data",
+                      0x1000000, /* at least 16711680 bytes */
+                      4096);
+    assert(bo);
+    i965_h264_context->avc_it_data.bo = bo;
+    i965_h264_context->avc_it_data.write_offset = 0;
+    dri_bo_unreference(media_state->indirect_object.bo);
+    media_state->indirect_object.bo = bo;
+    dri_bo_reference(media_state->indirect_object.bo);
+    media_state->indirect_object.offset = i965_h264_context->avc_it_data.write_offset;
+
+    /* bsd pipeline */
+    i965_avc_bsd_decode_init(ctx);
+
+    /* HW scoreboard */
+    i965_avc_hw_scoreboard_decode_init(ctx);
+
+    /* for Media pipeline */
+    media_state->extended_state.enabled = 1;
+    dri_bo_unreference(media_state->extended_state.bo);
+    bo = dri_bo_alloc(i965->intel.bufmgr, 
+                      "extened vfe state", 
+                      sizeof(struct i965_vfe_state_ex), 32);
+    assert(bo);
+    media_state->extended_state.bo = bo;
+
+    /* URB */
+    if (IS_IRONLAKE(i965->intel.device_id)) {
+        media_state->urb.num_vfe_entries = 63;
+    } else {
+        media_state->urb.num_vfe_entries = 23;
+    }
+
+    media_state->urb.size_vfe_entry = 16;
+
+    media_state->urb.num_cs_entries = 1;
+    media_state->urb.size_cs_entry = 1;
+
+    media_state->urb.vfe_start = 0;
+    media_state->urb.cs_start = media_state->urb.vfe_start + 
+        media_state->urb.num_vfe_entries * media_state->urb.size_vfe_entry;
+    assert(media_state->urb.cs_start + 
+           media_state->urb.num_cs_entries * media_state->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
+
+    /* hook functions */
+    media_state->media_states_setup = i965_media_h264_states_setup;
+    media_state->media_objects = i965_media_h264_objects;
+}
+
+Bool 
+i965_media_h264_init(VADriverContextP ctx)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_media_state *media_state = &i965->media_state;
+    struct i965_h264_context *i965_h264_context;
+    int i;
+
+    i965_h264_context = calloc(1, sizeof(struct i965_h264_context));
+
+    /* kernel */
+    assert(NUM_H264_AVC_KERNELS == (sizeof(h264_avc_kernels_gen5) / 
+                                    sizeof(h264_avc_kernels_gen5[0])));
+    assert(NUM_AVC_MC_INTERFACES == (sizeof(avc_mc_kernel_offset_gen5) /
+                                     sizeof(avc_mc_kernel_offset_gen5[0])));
+
+    if (IS_IRONLAKE(i965->intel.device_id)) {
+        h264_avc_kernels = h264_avc_kernels_gen5;
+        avc_mc_kernel_offset = avc_mc_kernel_offset_gen5;
+        intra_kernel_header = &intra_kernel_header_gen5;
+        i965_h264_context->use_avc_hw_scoreboard = 1;
+        i965_h264_context->use_hw_w128 = 1;
+    } else {
+        h264_avc_kernels = h264_avc_kernels_gen4;
+        avc_mc_kernel_offset = avc_mc_kernel_offset_gen4;
+        intra_kernel_header = &intra_kernel_header_gen4;
+        i965_h264_context->use_avc_hw_scoreboard = 0;
+        i965_h264_context->use_hw_w128 = 0;
+    }
+
+    for (i = 0; i < NUM_H264_AVC_KERNELS; i++) {
+        struct media_kernel *kernel = &h264_avc_kernels[i];
+        kernel->bo = dri_bo_alloc(i965->intel.bufmgr, 
+                                  kernel->name, 
+                                  kernel->size, 64);
+        assert(kernel->bo);
+        dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
+    }
+
+    media_state->private_context = i965_h264_context;
+    return True;
+}
+
+Bool 
+i965_media_h264_ternimate(VADriverContextP ctx)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_media_state *media_state = &i965->media_state;
+    struct i965_h264_context *i965_h264_context;
+    int i;
+
+    if (media_state->private_context) {
+        i965_h264_context = (struct i965_h264_context *)media_state->private_context;
+        i965_avc_hw_scoreboard_ternimate(&i965_h264_context->avc_hw_scoreboard_context);
+        i965_avc_bsd_ternimate(&i965_h264_context->i965_avc_bsd_context);
+        dri_bo_unreference(i965_h264_context->avc_it_command_mb_info.bo);
+        dri_bo_unreference(i965_h264_context->avc_it_data.bo);
+        free(i965_h264_context);
+        media_state->private_context = NULL;
+    }
+
+    for (i = 0; i < NUM_H264_AVC_KERNELS; i++) {
+        struct media_kernel *kernel = &h264_avc_kernels[i];
+
+        dri_bo_unreference(kernel->bo);
+        kernel->bo = NULL;
+    }
+
+    return True;
+}
diff --git a/i965_drv_video/i965_media_h264.h b/i965_drv_video/i965_media_h264.h
new file mode 100644 (file)
index 0000000..3001b2e
--- /dev/null
@@ -0,0 +1,52 @@
+#ifndef _I965_MEDIA_H264_H_
+#define _I965_MEDIA_H264_H_
+
+#include "i965_avc_bsd.h"
+#include "i965_avc_hw_scoreboard.h"
+
+#define MB_CMD_IN_BYTES         64
+#define MB_CMD_IN_DWS           16
+#define MB_CMD_IN_OWS           4
+
+enum {
+    H264_AVC_COMBINED = 0,
+    H264_AVC_NULL
+};
+
+struct i965_h264_context
+{
+    struct {
+        dri_bo *bo;
+        unsigned int mbs;
+    } avc_it_command_mb_info;
+
+    struct {
+        dri_bo *bo;
+        long write_offset;
+    } avc_it_data;
+
+    struct {
+        unsigned int width_in_mbs;
+        unsigned int height_in_mbs;
+        int mbaff_frame_flag;
+    } picture;
+
+    int use_avc_hw_scoreboard;
+
+    int use_hw_w128;
+    unsigned int weight128_luma_l0;
+    unsigned int weight128_luma_l1;
+    unsigned int weight128_chroma_l0;
+    unsigned int weight128_chroma_l1;
+    char weight128_offset0_flag;
+    short weight128_offset0;
+
+    struct i965_avc_bsd_context i965_avc_bsd_context;
+    struct i965_avc_hw_scoreboard_context avc_hw_scoreboard_context;
+};
+
+Bool i965_media_h264_init(VADriverContextP ctx);
+Bool i965_media_h264_ternimate(VADriverContextP ctx);
+void i965_media_h264_decode_init(VADriverContextP ctx);
+
+#endif /* _I965_MEDIA_H264_H_ */
index 83ee505..eaed1e9 100644 (file)
@@ -280,7 +280,7 @@ static struct media_kernel  mpeg2_vld_kernels_gen4[] = {
     }
 };
 
-/* On IGDNG */
+/* On IRONLAKE */
 static uint32_t frame_intra_kernel_gen5[][4] = {
    #include "shaders/mpeg2/vld/frame_intra.g4b.gen5"
 };
@@ -886,6 +886,7 @@ i965_media_mpeg2_decode_init(VADriverContextP ctx)
     dri_bo *bo;
 
     media_state->extended_state.enabled = 1;
+    media_state->indirect_object.bo = NULL;
     dri_bo_unreference(media_state->extended_state.bo);
     bo = dri_bo_alloc(i965->intel.bufmgr, 
                       "vld state", 
@@ -907,7 +908,7 @@ i965_media_mpeg2_decode_init(VADriverContextP ctx)
            media_state->urb.num_cs_entries * media_state->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
 
     /* hook functions */
-    media_state->states_setup = i965_media_mpeg2_states_setup;
+    media_state->media_states_setup = i965_media_mpeg2_states_setup;
     media_state->media_objects = i965_media_mpeg2_objects;
 
 }
@@ -923,7 +924,7 @@ i965_media_mpeg2_init(VADriverContextP ctx)
                                      sizeof(mpeg2_vld_kernels_gen5[0])));
     assert(NUM_MPEG2_VLD_KERNELS <= MAX_INTERFACE_DESC);
 
-    if (IS_IGDNG(i965->intel.device_id))
+    if (IS_IRONLAKE(i965->intel.device_id))
         mpeg2_vld_kernels = mpeg2_vld_kernels_gen5;
     else
         mpeg2_vld_kernels = mpeg2_vld_kernels_gen4;
index 4a4f041..08fa582 100644 (file)
@@ -75,7 +75,7 @@ static const unsigned int ps_subpic_kernel_static[][4] =
 #include "shaders/render/exa_wm_write.g4b"
 };
 
-/* On IGDNG */
+/* On IRONLAKE */
 static const unsigned int sf_kernel_static_gen5[][4] = 
 {
 #include "shaders/render/exa_sf.g4b.gen5"
@@ -183,8 +183,8 @@ static struct render_kernel *render_kernels = NULL;
 #define URB_SF_ENTRIES       1
 #define URB_SF_ENTRY_SIZE     2
 
-#define URB_CS_ENTRIES       0
-#define URB_CS_ENTRY_SIZE     0
+#define URB_CS_ENTRIES       1
+#define URB_CS_ENTRY_SIZE     1
 
 static void
 i965_render_vs_unit(VADriverContextP ctx)
@@ -198,7 +198,7 @@ i965_render_vs_unit(VADriverContextP ctx)
     vs_state = render_state->vs.state->virtual;
     memset(vs_state, 0, sizeof(*vs_state));
 
-    if (IS_IGDNG(i965->intel.device_id))
+    if (IS_IRONLAKE(i965->intel.device_id))
         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES >> 2;
     else
         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES;
@@ -312,7 +312,7 @@ i965_subpic_render_wm_unit(VADriverContextP ctx)
 
     wm_state->thread1.single_program_flow = 1; /* XXX */
 
-    if (IS_IGDNG(i965->intel.device_id))
+    if (IS_IRONLAKE(i965->intel.device_id))
         wm_state->thread1.binding_table_entry_count = 0; /* hardware requirement */
     else
         wm_state->thread1.binding_table_entry_count = 7;
@@ -329,7 +329,7 @@ i965_subpic_render_wm_unit(VADriverContextP ctx)
     wm_state->wm4.stats_enable = 0;
     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
 
-    if (IS_IGDNG(i965->intel.device_id))
+    if (IS_IRONLAKE(i965->intel.device_id))
         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
     else
         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
@@ -375,7 +375,7 @@ i965_render_wm_unit(VADriverContextP ctx)
 
     wm_state->thread1.single_program_flow = 1; /* XXX */
 
-    if (IS_IGDNG(i965->intel.device_id))
+    if (IS_IRONLAKE(i965->intel.device_id))
         wm_state->thread1.binding_table_entry_count = 0;        /* hardware requirement */
     else
         wm_state->thread1.binding_table_entry_count = 7;
@@ -383,8 +383,8 @@ i965_render_wm_unit(VADriverContextP ctx)
     wm_state->thread2.scratch_space_base_pointer = 0;
     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
 
-    wm_state->thread3.dispatch_grf_start_reg = 3; /* XXX */
-    wm_state->thread3.const_urb_entry_read_length = 0;
+    wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */
+    wm_state->thread3.const_urb_entry_read_length = 1;
     wm_state->thread3.const_urb_entry_read_offset = 0;
     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
@@ -392,7 +392,7 @@ i965_render_wm_unit(VADriverContextP ctx)
     wm_state->wm4.stats_enable = 0;
     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
 
-    if (IS_IGDNG(i965->intel.device_id))
+    if (IS_IRONLAKE(i965->intel.device_id))
         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
     else 
         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
@@ -532,7 +532,8 @@ i965_render_src_surface_state(VADriverContextP ctx,
                               int index,
                               dri_bo *region,
                               unsigned long offset,
-                              int w, int h)
+                              int w, int h,
+                              int pitch, int format)
 {
     struct i965_driver_data *i965 = i965_driver_data(ctx);  
     struct i965_render_state *render_state = &i965->render_state;
@@ -548,7 +549,7 @@ i965_render_src_surface_state(VADriverContextP ctx,
     ss = ss_bo->virtual;
     memset(ss, 0, sizeof(*ss));
     ss->ss0.surface_type = I965_SURFACE_2D;
-    ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
+    ss->ss0.surface_format = format;
     ss->ss0.writedisable_alpha = 0;
     ss->ss0.writedisable_red = 0;
     ss->ss0.writedisable_green = 0;
@@ -566,7 +567,7 @@ i965_render_src_surface_state(VADriverContextP ctx,
     ss->ss2.mip_count = 0;
     ss->ss2.render_target_rotation = 0;
 
-    ss->ss3.pitch = w - 1;
+    ss->ss3.pitch = pitch - 1;
 
     dri_bo_emit_reloc(ss_bo,
                       I915_GEM_DOMAIN_SAMPLER, 0,
@@ -642,6 +643,7 @@ i965_render_src_surfaces_state(VADriverContextP ctx,
                               VASurfaceID surface)
 {
     struct i965_driver_data *i965 = i965_driver_data(ctx);  
+    struct i965_render_state *render_state = &i965->render_state;
     struct object_surface *obj_surface;
     int w, h;
     dri_bo *region;
@@ -653,12 +655,18 @@ i965_render_src_surfaces_state(VADriverContextP ctx,
     h = obj_surface->height;
     region = obj_surface->bo;
 
-    i965_render_src_surface_state(ctx, 1, region, 0, w, h);     /* Y */
-    i965_render_src_surface_state(ctx, 2, region, 0, w, h);
-    i965_render_src_surface_state(ctx, 3, region, w * h + w * h / 4, w / 2, h / 2);     /* V */
-    i965_render_src_surface_state(ctx, 4, region, w * h + w * h / 4, w / 2, h / 2);
-    i965_render_src_surface_state(ctx, 5, region, w * h, w / 2, h / 2); /* U */
-    i965_render_src_surface_state(ctx, 6, region, w * h, w / 2, h / 2);
+    i965_render_src_surface_state(ctx, 1, region, 0, w, h, w, I965_SURFACEFORMAT_R8_UNORM);     /* Y */
+    i965_render_src_surface_state(ctx, 2, region, 0, w, h, w, I965_SURFACEFORMAT_R8_UNORM);
+
+    if (render_state->interleaved_uv) {
+        i965_render_src_surface_state(ctx, 3, region, w * h, w / 2, h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM); /* UV */
+        i965_render_src_surface_state(ctx, 4, region, w * h, w / 2, h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM);
+    } else {
+        i965_render_src_surface_state(ctx, 3, region, w * h, w / 2, h / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM); /* U */
+        i965_render_src_surface_state(ctx, 4, region, w * h, w / 2, h / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM);
+        i965_render_src_surface_state(ctx, 5, region, w * h + w * h / 4, w / 2, h / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM);     /* V */
+        i965_render_src_surface_state(ctx, 6, region, w * h + w * h / 4, w / 2, h / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM);
+    }
 }
 
 static void
@@ -903,6 +911,25 @@ i965_render_upload_vertex(VADriverContextP ctx,
 }
 
 static void
+i965_render_upload_constants(VADriverContextP ctx)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_render_state *render_state = &i965->render_state;
+    unsigned short *constant_buffer;
+
+    dri_bo_map(render_state->curbe.bo, 1);
+    assert(render_state->curbe.bo->virtual);
+    constant_buffer = render_state->curbe.bo->virtual;
+
+    if (render_state->interleaved_uv)
+        *constant_buffer = 1;
+    else
+        *constant_buffer = 0;
+
+    dri_bo_unmap(render_state->curbe.bo);
+}
+
+static void
 i965_surface_render_state_setup(VADriverContextP ctx,
                         VASurfaceID surface,
                         short srcx,
@@ -926,6 +953,7 @@ i965_surface_render_state_setup(VADriverContextP ctx,
     i965_render_upload_vertex(ctx, surface,
                               srcx, srcy, srcw, srch,
                               destx, desty, destw, desth);
+    i965_render_upload_constants(ctx);
 }
 static void
 i965_subpic_render_state_setup(VADriverContextP ctx,
@@ -980,7 +1008,7 @@ i965_render_state_base_address(VADriverContextP ctx)
 {
     struct i965_driver_data *i965 = i965_driver_data(ctx);
 
-    if (IS_IGDNG(i965->intel.device_id)) {
+    if (IS_IRONLAKE(i965->intel.device_id)) {
         BEGIN_BATCH(ctx, 8);
         OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | 6);
         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
@@ -1099,6 +1127,20 @@ i965_render_cs_urb_layout(VADriverContextP ctx)
 }
 
 static void
+i965_render_constant_buffer(VADriverContextP ctx)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_render_state *render_state = &i965->render_state;
+
+    BEGIN_BATCH(ctx, 2);
+    OUT_BATCH(ctx, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
+    OUT_RELOC(ctx, render_state->curbe.bo,
+              I915_GEM_DOMAIN_INSTRUCTION, 0,
+              URB_CS_ENTRY_SIZE - 1);
+    ADVANCE_BATCH(ctx);    
+}
+
+static void
 i965_render_drawing_rectangle(VADriverContextP ctx)
 {
     struct i965_driver_data *i965 = i965_driver_data(ctx);  
@@ -1118,7 +1160,7 @@ i965_render_vertex_elements(VADriverContextP ctx)
 {
     struct i965_driver_data *i965 = i965_driver_data(ctx);  
 
-    if (IS_IGDNG(i965->intel.device_id)) {
+    if (IS_IRONLAKE(i965->intel.device_id)) {
         BEGIN_BATCH(ctx, 5);
         OUT_BATCH(ctx, CMD_VERTEX_ELEMENTS | 3);
         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
@@ -1206,7 +1248,7 @@ i965_render_startup(VADriverContextP ctx)
               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
     OUT_RELOC(ctx, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
 
-    if (IS_IGDNG(i965->intel.device_id))
+    if (IS_IRONLAKE(i965->intel.device_id))
         OUT_RELOC(ctx, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
     else
         OUT_BATCH(ctx, 3);
@@ -1282,6 +1324,7 @@ i965_surface_render_pipeline_setup(VADriverContextP ctx)
     i965_render_pipelined_pointers(ctx);
     i965_render_urb_layout(ctx);
     i965_render_cs_urb_layout(ctx);
+    i965_render_constant_buffer(ctx);
     i965_render_drawing_rectangle(ctx);
     i965_render_vertex_elements(ctx);
     i965_render_startup(ctx);
@@ -1445,13 +1488,14 @@ Bool
 i965_render_init(VADriverContextP ctx)
 {
     struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_render_state *render_state = &i965->render_state;
     int i;
 
     /* kernel */
     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen5) / 
                                  sizeof(render_kernels_gen5[0])));
 
-    if (IS_IGDNG(i965->intel.device_id))
+    if (IS_IRONLAKE(i965->intel.device_id))
         render_kernels = render_kernels_gen5;
     else
         render_kernels = render_kernels_gen4;
@@ -1465,6 +1509,12 @@ i965_render_init(VADriverContextP ctx)
         dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
     }
 
+    /* constant buffer */
+    render_state->curbe.bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "constant buffer",
+                      4096, 64);
+    assert(render_state->curbe.bo);
+
     return True;
 }
 
@@ -1475,6 +1525,9 @@ i965_render_terminate(VADriverContextP ctx)
     struct i965_driver_data *i965 = i965_driver_data(ctx);
     struct i965_render_state *render_state = &i965->render_state;
 
+    dri_bo_unreference(render_state->curbe.bo);
+    render_state->curbe.bo = NULL;
+
     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
         struct render_kernel *kernel = &render_kernels[i];
         
index e3dce02..2643285 100644 (file)
@@ -60,6 +60,11 @@ struct i965_render_state
         dri_bo *viewport;
     } cc;
 
+    struct {
+        dri_bo *bo;
+    } curbe;
+
+    int interleaved_uv;
     struct intel_region *draw_region;
 };
 
index 93fa2f0..d133446 100644 (file)
@@ -34,16 +34,20 @@ struct i965_vfe_state_ex
        unsigned int obj_id:24;
     } vfex0;
 
-    struct {
-       unsigned int residual_grf_offset:5;
-       unsigned int pad0:3;
-       unsigned int weight_grf_offset:5;
-       unsigned int pad1:3;
-       unsigned int residual_data_offset:8;
-       unsigned int sub_field_present_flag:2;
-       unsigned int residual_data_fix_offset:1;
-       unsigned int pad2:5;
-    }vfex1;
+    union {
+        struct {
+            unsigned int residual_grf_offset:5;
+            unsigned int pad0:3;
+            unsigned int weight_grf_offset:5;
+            unsigned int pad1:3;
+            unsigned int residual_data_offset:8;
+            unsigned int sub_field_present_flag:2;
+            unsigned int residual_data_fix_offset_flag:1;
+            unsigned int pad2:5;
+        } avc;
+        
+        unsigned int vc1;
+    } vfex1;
 
     struct {
        unsigned int remap_index_0:4;
@@ -68,18 +72,32 @@ struct i965_vfe_state_ex
     } remap_table1;
 
     struct {
-       unsigned int scoreboard_mask:8;
+       unsigned int mask:8;
        unsigned int pad:22;
        unsigned int type:1;
        unsigned int enable:1;
     } scoreboard0;
 
     struct {
-       unsigned int ignore;
+        int delta_x0:4;
+        int delta_y0:4;
+        int delta_x1:4;
+        int delta_y1:4;
+        int delta_x2:4;
+        int delta_y2:4;
+        int delta_x3:4;
+        int delta_y3:4;
     } scoreboard1;
 
     struct {
-       unsigned int ignore;
+        int delta_x4:4;
+        int delta_y4:4;
+        int delta_x5:4;
+        int delta_y5:4;
+        int delta_x6:4;
+        int delta_y6:4;
+        int delta_x7:4;
+        int delta_y7:4;
     } scoreboard2;
 
     unsigned int pad;
@@ -177,8 +195,9 @@ struct i965_surface_state
        unsigned int cube_neg_y:1;
        unsigned int cube_pos_x:1;
        unsigned int cube_neg_x:1;
-       unsigned int pad:3;
+       unsigned int pad:2;
        unsigned int render_cache_read_mode:1;
+       unsigned int cube_map_corner_mode:1;
        unsigned int mipmap_layout_mode:1;
        unsigned int vert_line_stride_ofs:1;
        unsigned int vert_line_stride:1;
index 9fabf6a..4c622d6 100644 (file)
@@ -38,16 +38,16 @@ intel_batchbuffer_reset(struct intel_batchbuffer *batch)
 {
     struct intel_driver_data *intel = batch->intel; 
 
-    if (batch->buffer != NULL) {
-        dri_bo_unreference(batch->buffer);
-        batch->buffer = NULL;
-    }
+    assert(batch->flag == ON_RENDER_RING ||
+           batch->flag == ON_BSD_RING);
 
-    batch->buffer = dri_bo_alloc(intel->bufmgr, "batch buffer", 
+    dri_bo_unreference(batch->buffer);
+    batch->buffer = dri_bo_alloc(intel->bufmgr, 
+                                 batch->flag == ON_RENDER_RING ? "render batch buffer" : "bsd batch buffer", 
                                  BATCH_SIZE, 0x1000);
-
     assert(batch->buffer);
     dri_bo_map(batch->buffer, 1);
+    assert(batch->buffer->virtual);
     batch->map = batch->buffer->virtual;
     batch->size = BATCH_SIZE;
     batch->ptr = batch->map;
@@ -58,11 +58,19 @@ Bool
 intel_batchbuffer_init(struct intel_driver_data *intel)
 {
     intel->batch = calloc(1, sizeof(*(intel->batch)));
-
     assert(intel->batch);
     intel->batch->intel = intel;
+    intel->batch->flag = ON_RENDER_RING;
+    intel->batch->run = drm_intel_bo_mrb_exec;
     intel_batchbuffer_reset(intel->batch);
 
+    intel->batch_bcs = calloc(1, sizeof(*(intel->batch_bcs)));
+    assert(intel->batch_bcs);
+    intel->batch_bcs->intel = intel;
+    intel->batch_bcs->flag = ON_BSD_RING;
+    intel->batch_bcs->run = drm_intel_bo_mrb_exec;
+    intel_batchbuffer_reset(intel->batch_bcs);
+
     return True;
 }
 
@@ -80,14 +88,25 @@ intel_batchbuffer_terminate(struct intel_driver_data *intel)
         intel->batch = NULL;
     }
 
+    if (intel->batch_bcs) {
+        if (intel->batch_bcs->map) {
+            dri_bo_unmap(intel->batch_bcs->buffer);
+            intel->batch_bcs->map = NULL;
+        }
+
+        dri_bo_unreference(intel->batch_bcs->buffer);
+        free(intel->batch_bcs);
+        intel->batch_bcs = NULL;
+    }
+
     return True;
 }
 
-Bool 
-intel_batchbuffer_flush(VADriverContextP ctx)
+static Bool
+intel_batchbuffer_flush_helper(VADriverContextP ctx,
+                               struct intel_batchbuffer *batch)
 {
-    struct intel_driver_data *intel = intel_driver_data(ctx);
-    struct intel_batchbuffer *batch = intel->batch;
+    struct intel_driver_data *intel = batch->intel;
     unsigned int used = batch->ptr - batch->map;
     int is_locked = intel->locked;
 
@@ -108,98 +127,228 @@ intel_batchbuffer_flush(VADriverContextP ctx)
     if (!is_locked)
         intel_lock_hardware(ctx);
 
-    dri_bo_exec(batch->buffer, used, 0, 0, 0);
+    batch->run(batch->buffer, used, 0, 0, 0, batch->flag);
 
     if (!is_locked)
         intel_unlock_hardware(ctx);
 
-    intel_batchbuffer_reset(intel->batch);
+    intel_batchbuffer_reset(batch);
 
     return True;
 }
 
+Bool 
+intel_batchbuffer_flush(VADriverContextP ctx)
+{
+    struct intel_driver_data *intel = intel_driver_data(ctx);
+    
+    return intel_batchbuffer_flush_helper(ctx, intel->batch);
+}
+
+Bool 
+intel_batchbuffer_flush_bcs(VADriverContextP ctx)
+{
+    struct intel_driver_data *intel = intel_driver_data(ctx);
+    
+    return intel_batchbuffer_flush_helper(ctx, intel->batch_bcs);
+}
+
 static unsigned int
-intel_batchbuffer_space(struct intel_batchbuffer *batch)
+intel_batchbuffer_space_helper(struct intel_batchbuffer *batch)
 {
     return (batch->size - BATCH_RESERVED) - (batch->ptr - batch->map);
 }
 
+static void
+intel_batchbuffer_emit_dword_helper(struct intel_batchbuffer *batch, 
+                                    unsigned int x)
+{
+    assert(intel_batchbuffer_space_helper(batch) >= 4);
+    *(unsigned int *)batch->ptr = x;
+    batch->ptr += 4;
+}
+
 void 
 intel_batchbuffer_emit_dword(VADriverContextP ctx, unsigned int x)
 {
     struct intel_driver_data *intel = intel_driver_data(ctx);
-    struct intel_batchbuffer *batch = intel->batch;
 
-    assert(intel_batchbuffer_space(batch) >= 4);
-    *(unsigned int*)batch->ptr = x;
-    batch->ptr += 4;
+    intel_batchbuffer_emit_dword_helper(intel->batch, x);
 }
 
 void 
-intel_batchbuffer_emit_reloc(VADriverContextP ctx, dri_bo *bo, 
-                             uint32_t read_domains, uint32_t write_domains, 
-                             uint32_t delta)
+intel_batchbuffer_emit_dword_bcs(VADriverContextP ctx, unsigned int x)
 {
     struct intel_driver_data *intel = intel_driver_data(ctx);
-    struct intel_batchbuffer *batch = intel->batch;
 
+    intel_batchbuffer_emit_dword_helper(intel->batch_bcs, x);
+}
+
+static void 
+intel_batchbuffer_emit_reloc_helper(VADriverContextP ctx, 
+                                    struct intel_batchbuffer *batch,
+                                    dri_bo *bo, 
+                                    uint32_t read_domains, uint32_t write_domains, 
+                                    uint32_t delta)
+{
     assert(batch->ptr - batch->map < batch->size);
     dri_bo_emit_reloc(batch->buffer, read_domains, write_domains,
                       delta, batch->ptr - batch->map, bo);
-    intel_batchbuffer_emit_dword(ctx, bo->offset + delta);
+    intel_batchbuffer_emit_dword_helper(batch, bo->offset + delta);
 }
 
 void 
-intel_batchbuffer_require_space(VADriverContextP ctx, unsigned int size)
+intel_batchbuffer_emit_reloc(VADriverContextP ctx, dri_bo *bo, 
+                             uint32_t read_domains, uint32_t write_domains, 
+                             uint32_t delta)
 {
     struct intel_driver_data *intel = intel_driver_data(ctx);
-    struct intel_batchbuffer *batch = intel->batch;
 
+    intel_batchbuffer_emit_reloc_helper(ctx, intel->batch,
+                                        bo, read_domains, write_domains,
+                                        delta);
+}
+
+void 
+intel_batchbuffer_emit_reloc_bcs(VADriverContextP ctx, dri_bo *bo, 
+                                 uint32_t read_domains, uint32_t write_domains, 
+                                 uint32_t delta)
+{
+    struct intel_driver_data *intel = intel_driver_data(ctx);
+
+    intel_batchbuffer_emit_reloc_helper(ctx, intel->batch_bcs,
+                                        bo, read_domains, write_domains,
+                                        delta);
+}
+
+static void 
+intel_batchbuffer_require_space_helper(VADriverContextP ctx, 
+                                struct intel_batchbuffer *batch,
+                                unsigned int size)
+{
     assert(size < batch->size - 8);
 
-    if (intel_batchbuffer_space(batch) < size) {
-        intel_batchbuffer_flush(ctx);
+    if (intel_batchbuffer_space_helper(batch) < size) {
+        intel_batchbuffer_flush_helper(ctx, batch);
     }
 }
 
 void 
-intel_batchbuffer_data(VADriverContextP ctx, void *data, unsigned int size)
+intel_batchbuffer_require_space(VADriverContextP ctx, unsigned int size)
 {
     struct intel_driver_data *intel = intel_driver_data(ctx);
-    struct intel_batchbuffer *batch = intel->batch;
 
+    intel_batchbuffer_require_space_helper(ctx, intel->batch, size);
+}
+
+void 
+intel_batchbuffer_require_space_bcs(VADriverContextP ctx, unsigned int size)
+{
+    struct intel_driver_data *intel = intel_driver_data(ctx);
+
+    intel_batchbuffer_require_space_helper(ctx, intel->batch_bcs, size);
+}
+
+static void 
+intel_batchbuffer_data_helper(VADriverContextP ctx, 
+                              struct intel_batchbuffer *batch,
+                              void *data,
+                              unsigned int size)
+{
     assert((size & 3) == 0);
-    intel_batchbuffer_require_space(ctx, size);
+    intel_batchbuffer_require_space_helper(ctx, batch, size);
 
     assert(batch->ptr);
     memcpy(batch->ptr, data, size);
     batch->ptr += size;
 }
 
+void 
+intel_batchbuffer_data(VADriverContextP ctx, void *data, unsigned int size)
+{
+    struct intel_driver_data *intel = intel_driver_data(ctx);
+
+    intel_batchbuffer_data_helper(ctx, intel->batch, data, size);
+}
+
+void 
+intel_batchbuffer_data_bcs(VADriverContextP ctx, void *data, unsigned int size)
+{
+    struct intel_driver_data *intel = intel_driver_data(ctx);
+
+    intel_batchbuffer_data_helper(ctx, intel->batch_bcs, data, size);
+}
+
+static void
+intel_batchbuffer_emit_mi_flush_helper(VADriverContextP ctx,
+                                       struct intel_batchbuffer *batch)
+{
+    intel_batchbuffer_require_space_helper(ctx, batch, 4);
+    intel_batchbuffer_emit_dword_helper(batch, 
+                                        MI_FLUSH | STATE_INSTRUCTION_CACHE_INVALIDATE);
+}
+
 void
 intel_batchbuffer_emit_mi_flush(VADriverContextP ctx)
 {
-    intel_batchbuffer_require_space(ctx, 4);
-    intel_batchbuffer_emit_dword(ctx, MI_FLUSH | STATE_INSTRUCTION_CACHE_INVALIDATE);
+    struct intel_driver_data *intel = intel_driver_data(ctx);
+
+    intel_batchbuffer_emit_mi_flush_helper(ctx, intel->batch);
 }
 
 void
-intel_batchbuffer_start_atomic(VADriverContextP ctx, unsigned int size)
+intel_batchbuffer_emit_mi_flush_bcs(VADriverContextP ctx)
 {
     struct intel_driver_data *intel = intel_driver_data(ctx);
-    struct intel_batchbuffer *batch = intel->batch;
 
+    intel_batchbuffer_emit_mi_flush_helper(ctx, intel->batch_bcs);
+}
+
+void
+intel_batchbuffer_start_atomic_helper(VADriverContextP ctx, 
+                                      struct intel_batchbuffer *batch,
+                                      unsigned int size)
+{
     assert(!batch->atomic);
-    intel_batchbuffer_require_space(ctx, size);
+    intel_batchbuffer_require_space_helper(ctx, batch, size);
     batch->atomic = 1;
 }
 
 void
-intel_batchbuffer_end_atomic(VADriverContextP ctx)
+intel_batchbuffer_start_atomic(VADriverContextP ctx, unsigned int size)
+{
+    struct intel_driver_data *intel = intel_driver_data(ctx);
+
+    intel_batchbuffer_start_atomic_helper(ctx, intel->batch, size);
+}
+
+void
+intel_batchbuffer_start_atomic_bcs(VADriverContextP ctx, unsigned int size)
 {
     struct intel_driver_data *intel = intel_driver_data(ctx);
-    struct intel_batchbuffer *batch = intel->batch;
+    intel_batchbuffer_start_atomic_helper(ctx, intel->batch_bcs, size);
+}
 
+void
+intel_batchbuffer_end_atomic_helper(struct intel_batchbuffer *batch)
+{
     assert(batch->atomic);
     batch->atomic = 0;
 }
+
+void
+intel_batchbuffer_end_atomic(VADriverContextP ctx)
+{
+    struct intel_driver_data *intel = intel_driver_data(ctx);
+
+    intel_batchbuffer_end_atomic_helper(intel->batch);
+}
+
+void
+intel_batchbuffer_end_atomic_bcs(VADriverContextP ctx)
+{
+    struct intel_driver_data *intel = intel_driver_data(ctx);
+
+    intel_batchbuffer_end_atomic_helper(intel->batch_bcs);
+}
+
index c3a2457..99ab08d 100644 (file)
@@ -16,10 +16,16 @@ struct intel_batchbuffer
     unsigned char *map;
     unsigned char *ptr;
     int atomic;
+    int flag;
+
+    int (*run)(drm_intel_bo *bo, int used,
+               drm_clip_rect_t *cliprects, int num_cliprects,
+               int DR4, int ring_flag);
 };
 
 Bool intel_batchbuffer_init(struct intel_driver_data *intel);
 Bool intel_batchbuffer_terminate(struct intel_driver_data *intel);
+
 void intel_batchbuffer_emit_dword(VADriverContextP ctx, unsigned int x);
 void intel_batchbuffer_emit_reloc(VADriverContextP ctx, dri_bo *bo, 
                                   uint32_t read_domains, uint32_t write_domains, 
@@ -31,6 +37,17 @@ void intel_batchbuffer_start_atomic(VADriverContextP ctx, unsigned int size);
 void intel_batchbuffer_end_atomic(VADriverContextP ctx);
 Bool intel_batchbuffer_flush(VADriverContextP ctx);
 
+void intel_batchbuffer_emit_dword_bcs(VADriverContextP ctx, unsigned int x);
+void intel_batchbuffer_emit_reloc_bcs(VADriverContextP ctx, dri_bo *bo, 
+                                      uint32_t read_domains, uint32_t write_domains, 
+                                      uint32_t delta);
+void intel_batchbuffer_require_space_bcs(VADriverContextP ctx, unsigned int size);
+void intel_batchbuffer_data_bcs(VADriverContextP ctx, void *data, unsigned int size);
+void intel_batchbuffer_emit_mi_flush_bcs(VADriverContextP ctx);
+void intel_batchbuffer_start_atomic_bcs(VADriverContextP ctx, unsigned int size);
+void intel_batchbuffer_end_atomic_bcs(VADriverContextP ctx);
+Bool intel_batchbuffer_flush_bcs(VADriverContextP ctx);
+
 #define BEGIN_BATCH(ctx, n) do {                                \
    intel_batchbuffer_require_space(ctx, (n) * 4);               \
 } while (0)
@@ -48,4 +65,21 @@ Bool intel_batchbuffer_flush(VADriverContextP ctx);
 #define ADVANCE_BATCH(ctx) do {                                         \
 } while (0)
 
+#define BEGIN_BCS_BATCH(ctx, n) do {                                    \
+   intel_batchbuffer_require_space_bcs(ctx, (n) * 4);                   \
+} while (0)
+
+#define OUT_BCS_BATCH(ctx, d) do {                                      \
+   intel_batchbuffer_emit_dword_bcs(ctx, d);                            \
+} while (0)
+
+#define OUT_BCS_RELOC(ctx, bo, read_domains, write_domain, delta) do {  \
+   assert((delta) >= 0);                                                \
+   intel_batchbuffer_emit_reloc_bcs(ctx, bo,                            \
+                                    read_domains, write_domain, delta); \
+} while (0)
+
+#define ADVANCE_BCS_BATCH(ctx) do {                                     \
+} while (0)
+
 #endif /* _INTEL_BATCHBUFFER_H_ */
diff --git a/i965_drv_video/intel_batchbuffer_dump.c b/i965_drv_video/intel_batchbuffer_dump.c
new file mode 100644 (file)
index 0000000..0732f0f
--- /dev/null
@@ -0,0 +1,409 @@
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <inttypes.h>
+
+#include "intel_driver.h"
+#include "intel_batchbuffer_dump.h"
+
+#define BUFFER_FAIL(_count, _len, _name) do {                  \
+    fprintf(gout, "Buffer size too small in %s (%d < %d)\n",   \
+           (_name), (_count), (_len));                         \
+    (*failures)++;                                             \
+    return count;                                              \
+} while (0)
+
+static FILE *gout;
+
+static void
+instr_out(unsigned int *data, unsigned int offset, unsigned int index, char *fmt, ...)
+{
+    va_list va;
+
+    fprintf(gout, "0x%08x: 0x%08x:%s ", offset + index * 4, data[index],
+           index == 0 ? "" : "  ");
+    va_start(va, fmt);
+    vfprintf(gout, fmt, va);
+    va_end(va);
+}
+
+
+static int
+dump_mi(unsigned int *data, unsigned int offset, int count, unsigned int device, int *failures)
+{
+    unsigned int opcode;
+    int length, i;
+
+    struct {
+       unsigned int opcode;
+       int mask_length;
+       int min_len;
+       int max_len;
+       char *name;
+    } mi_commands[] = {
+       { 0x00, 0, 1, 1, "MI_NOOP" },
+       { 0x04, 0, 1, 1, "MI_FLUSH" },
+       { 0x0a, 0, 1, 1, "MI_BATCH_BUFFER_END" },
+    };
+
+    opcode = ((data[0] & MASK_MI_OPCODE) >> SHIFT_MI_OPCODE);
+
+    for (i = 0; i < sizeof(mi_commands) / sizeof(mi_commands[0]); i++) {
+        if (opcode == mi_commands[i].opcode) {
+            int index;
+
+            length = 1;
+           instr_out(data, offset, 0, "%s\n", mi_commands[i].name);
+
+           if (mi_commands[i].max_len > 1) {
+                length = (data[0] & mi_commands[i].mask_length) + 2;
+
+                if (length < mi_commands[i].min_len ||
+                    length > mi_commands[i].max_len) {
+                   fprintf(gout, "Bad length (%d) in %s, [%d, %d]\n",
+                           length, mi_commands[i].name,
+                           mi_commands[i].min_len,
+                           mi_commands[i].max_len);
+               }
+           }
+
+            for (index = 1; index < length; index++) {
+                if (index >= count)
+                   BUFFER_FAIL(count, length, mi_commands[i].name);
+
+               instr_out(data, offset, index, "dword %d\n", index);
+           }
+
+           return length;
+       }
+    }
+
+    instr_out(data, offset, 0, "UNKNOWN MI COMMAND\n");
+    (*failures)++;
+    return 1;
+}
+
+static int
+dump_gfxpipe_3d(unsigned int *data, unsigned int offset, int count, unsigned int device, int *failures)
+{
+    instr_out(data, offset, 0, "UNKNOWN 3D COMMAND\n");
+    (*failures)++;
+
+    return 1;
+}
+
+static void
+dump_avc_bsd_img_state(unsigned int *data, unsigned int offset, unsigned int device, int *failures)
+{
+    int img_struct = ((data[3] >> 8) & 0x3);
+
+    instr_out(data, offset, 1, "frame size: %d\n", (data[1] & 0xffff));
+    instr_out(data, offset, 2, "width: %d, height: %d\n", (data[2] & 0xff), (data[2] >> 16) & 0xff);
+    instr_out(data, offset, 3, 
+              "second_chroma_qp_offset: %d,"
+              "chroma_qp_offset: %d,"
+              "QM present flag: %d," 
+              "image struct: %s,"
+              "img_dec_fs_idc: %d,"
+              "\n",
+              (data[3] >> 24) & 0x1f,
+              (data[3] >> 16) & 0x1f,
+              (data[3] >> 10) & 0x1,
+              (img_struct == 0) ? "frame" : (img_struct == 2) ? "invalid" : (img_struct == 1) ? "top field" : "bottom field",
+              data[3] & 0xff);
+    instr_out(data, offset, 4,
+              "residual off: 0x%x,"
+              "16MV: %d,"
+              "chroma fmt: %d,"
+              "CABAC: %d,"
+              "non-ref: %d,"
+              "constrained intra: %d,"
+              "direct8x8: %d,"
+              "trans8x8: %d,"
+              "MB only: %d,"
+              "MBAFF: %d,"
+              "\n",
+              (data[4] >> 24) & 0xff,
+              (data[4] >> 12) & 0x1,
+              (data[4] >> 10) & 0x3,
+              (data[4] >> 7) & 0x1,
+              (data[4] >> 6) & 0x1,
+              (data[4] >> 5) & 0x1,
+              (data[4] >> 4) & 0x1,
+              (data[4] >> 3) & 0x1,
+              (data[4] >> 2) & 0x1,
+              (data[4] >> 1) & 0x1);
+    instr_out(data, offset, 5, "AVC-IT Command Header\n");
+}
+
+static void
+dump_avc_bsd_qm_state(unsigned int *data, unsigned int offset, unsigned int device, int *failures)
+{
+    unsigned int length = ((data[0] & MASK_GFXPIPE_LENGTH) >> SHIFT_GFXPIPE_LENGTH) + 2;
+    int i;
+
+    instr_out(data, offset, 1, "user default: %02x, QM list present: %02x\n", 
+              (data[1] >> 8) & 0xff, data[1] & 0xff);
+
+    for (i = 2; i < length; i++) {
+        instr_out(data, offset, i, "dword %d\n", i);
+    }
+}
+
+static void
+dump_avc_bsd_slice_state(unsigned int *data, unsigned int offset, unsigned int device, int *failures)
+{
+
+}
+
+static void
+dump_avc_bsd_buf_base_state(unsigned int *data, unsigned int offset, unsigned int device, int *failures)
+{
+    int i;
+
+    instr_out(data, offset, 1, "BSD row store base address\n");
+    instr_out(data, offset, 2, "MPR row store base address\n");
+    instr_out(data, offset, 3, "AVC-IT command buffer base address\n");
+    instr_out(data, offset, 4, "AVC-IT data buffer: 0x%08x, write offset: 0x%x\n", 
+              data[4] & 0xFFFFF000, data[4] & 0xFC0);
+    instr_out(data, offset, 5, "ILDB data buffer\n");
+
+    for (i = 6; i < 38; i++) {
+        instr_out(data, offset, i, "Direct MV read base address for reference frame %d\n", i - 6);
+    }
+
+    instr_out(data, offset, 38, "direct mv wr0 top\n");
+    instr_out(data, offset, 39, "direct mv wr0 bottom\n");
+
+    for (i = 40; i < 74; i++) {
+        instr_out(data, offset, i, "POC List %d\n", i - 40);
+    }
+}
+
+static void
+dump_bsd_ind_obj_base_addr(unsigned int *data, unsigned int offset, unsigned int device, int *failures)
+{
+    instr_out(data, offset, 1, "AVC indirect object base address\n");
+    instr_out(data, offset, 2, "AVC Indirect Object Access Upper Bound\n");
+}
+
+static void 
+dump_ironlake_avc_bsd_object(unsigned int *data, unsigned int offset, int *failures)
+{
+    int slice_type = data[3] & 0xf;
+    int i, is_phantom = ((data[1] & 0x3fffff) == 0);
+
+    if (!is_phantom) {
+        instr_out(data, offset, 1, "Encrypted: %d, bitsteam length: %d\n", data[1] >> 31, data[1] & 0x3fffff);
+        instr_out(data, offset, 2, "Indirect Data Start Address: %d\n", data[2] & 0x1fffffff);
+        instr_out(data, offset, 3, "%s Slice\n", slice_type == 0 ? "P" : slice_type == 1 ? "B" : "I");
+        instr_out(data, offset, 4, 
+                  "Num_Ref_Idx_L1: %d,"
+                  "Num_Ref_Idx_L0: %d,"
+                  "Log2WeightDenomChroma: %d,"
+                  "Log2WeightDenomLuma: %d"
+                  "\n",
+                  (data[4] >> 24) & 0x3f,
+                  (data[4] >> 16) & 0x3f,
+                  (data[4] >> 8) & 0x3,
+                  (data[4] >> 0) & 0x3);
+        instr_out(data, offset, 5,
+                  "WeightedPredIdc: %d,"
+                  "DirectPredType: %d,"
+                  "DisableDeblockingFilter: %d,"
+                  "CabacInitIdc: %d,"
+                  "SliceQp: %d,"
+                  "SliceBetaOffsetDiv2: %d,"
+                  "SliceAlphaC0OffsetDiv2: %d"
+                  "\n",
+                  (data[5] >> 30) & 0x3,
+                  (data[5] >> 29) & 0x1,
+                  (data[5] >> 27) & 0x3,
+                  (data[5] >> 24) & 0x3,
+                  (data[5] >> 16) & 0x3f,
+                  (data[5] >> 8) & 0xf,
+                  (data[5] >> 0) & 0xf);
+        instr_out(data, offset, 6,
+                  "Slice_MB_Start_Vert_Pos: %d,"
+                  "Slice_MB_Start_Hor_Pos: %d,"
+                  "Slice_Start_Mb_Num: %d"
+                  "\n",
+                  (data[6] >> 24) & 0xff,
+                  (data[6] >> 16) & 0xff,
+                  (data[6] >> 0) & 0x7fff);
+        instr_out(data, offset, 7,
+                  "Fix_Prev_Mb_Skipped: %d,"
+                  "First_MB_Bit_Offset: %d"
+                  "\n",
+                  (data[7] >> 7) & 0x1,
+                  (data[7] >> 0) & 0x7);
+
+        for (i = 8; i < 16; i++)
+            instr_out(data, offset, i, "dword %d\n", i);
+    } else {
+        instr_out(data, offset, 1, "phantom slice\n");
+
+        for (i = 2; i < 6; i++)
+            instr_out(data, offset, i, "dword %d\n", i);
+
+        instr_out(data, offset, 6,
+                  "Slice_Start_Mb_Num: %d"
+                  "\n",
+                  (data[6] >> 0) & 0x7fff);
+
+        for (i = 7; i < 16; i++)
+            instr_out(data, offset, i, "dword %d\n", i);
+
+    }
+}
+
+static void 
+dump_g4x_avc_bsd_object(unsigned int *data, unsigned int offset, int *failures)
+{
+
+}
+
+static void 
+dump_avc_bsd_object(unsigned int *data, unsigned int offset, unsigned int device, int *failures)
+{
+    if (IS_IRONLAKE(device))
+        dump_ironlake_avc_bsd_object(data, offset, failures);
+    else
+        dump_g4x_avc_bsd_object(data, offset, failures);
+}
+
+static int
+dump_bsd_avc(unsigned int *data, unsigned int offset, int count, unsigned int device, int *failures)
+{
+    unsigned int subopcode;
+    int length, i;
+
+    struct {
+       unsigned int subopcode;
+       int min_len;
+       int max_len;
+       char *name;
+        void (*detail)(unsigned int *data, unsigned int offset, unsigned int device, int  *failures);
+    } avc_commands[] = {
+        { 0x00, 0x06, 0x06, "AVC_BSD_IMG_STATE", dump_avc_bsd_img_state },
+        { 0x01, 0x02, 0x3a, "AVC_BSD_QM_STATE", dump_avc_bsd_qm_state },
+        { 0x02, 0x02, 0xd2, "AVC_BSD_SLICE_STATE", NULL },
+        { 0x03, 0x4a, 0x4a, "AVC_BSD_BUF_BASE_STATE", dump_avc_bsd_buf_base_state },
+        { 0x04, 0x03, 0x03, "BSD_IND_OBJ_BASE_ADDR", dump_bsd_ind_obj_base_addr },
+        { 0x08, 0x08, 0x10, "AVC_BSD_OBJECT", dump_avc_bsd_object },
+    };
+
+    subopcode = ((data[0] & MASK_GFXPIPE_SUBOPCODE) >> SHIFT_GFXPIPE_SUBOPCODE);
+
+    for (i = 0; i < sizeof(avc_commands) / sizeof(avc_commands[0]); i++) {
+        if (subopcode == avc_commands[i].subopcode) {
+            unsigned int index;
+
+            length = (data[0] & MASK_GFXPIPE_LENGTH) >> SHIFT_GFXPIPE_LENGTH;
+            length += 2;
+            instr_out(data, offset, 0, "%s\n", avc_commands[i].name);
+
+            if (length < avc_commands[i].min_len || 
+                length > avc_commands[i].max_len) {
+                fprintf(gout, "Bad length(%d) in %s [%d, %d]\n", 
+                        length, avc_commands[i].name,
+                        avc_commands[i].min_len,
+                        avc_commands[i].max_len);
+            }
+
+            if (length - 1 >= count)
+                BUFFER_FAIL(count, length, avc_commands[i].name);
+
+            if (avc_commands[i].detail)
+                avc_commands[i].detail(data, offset, device, failures);
+            else {
+                for (index = 1; index < length; index++)
+                    instr_out(data, offset, index, "dword %d\n", index);
+            }
+
+           return length;
+       }
+    }
+
+    instr_out(data, offset, 0, "UNKNOWN AVC COMMAND\n");
+    (*failures)++;
+    return 1;
+}
+
+static int
+dump_gfxpipe_bsd(unsigned int *data, unsigned int offset, int count, unsigned int device, int *failures)
+{
+    int length;
+
+    switch ((data[0] & MASK_GFXPIPE_OPCODE) >> SHIFT_GFXPIPE_OPCODE) {
+    case OPCODE_BSD_AVC:
+        length = dump_bsd_avc(data, offset, count, device, failures);
+        break;
+
+    default:
+        length = 1;
+        (*failures)++;
+        instr_out(data, offset, 0, "UNKNOWN BSD OPCODE\n");
+        break;
+    }
+
+    return length;
+}
+
+static int
+dump_gfxpipe(unsigned int *data, unsigned int offset, int count, unsigned int device, int *failures)
+{
+    int length;
+
+    switch ((data[0] & MASK_GFXPIPE_SUBTYPE) >> SHIFT_GFXPIPE_SUBTYPE) {
+    case GFXPIPE_3D:
+        length = dump_gfxpipe_3d(data, offset, count, device, failures);
+        break;
+
+    case GFXPIPE_BSD:
+        length = dump_gfxpipe_bsd(data, offset, count, device, failures);
+        break;
+
+    default:
+        length = 1;
+        (*failures)++;
+        instr_out(data, offset, 0, "UNKNOWN GFXPIPE COMMAND\n");
+        break;
+    }
+
+    return length;
+}
+
+int intel_batchbuffer_dump(unsigned int *data, unsigned int offset, int count, unsigned int device)
+{
+    int index = 0;
+    int failures = 0;
+
+    gout = fopen("/tmp/bsd_command_dump.txt", "w+");
+
+    while (index < count) {
+       switch ((data[index] & MASK_CMD_TYPE) >> SHIFT_CMD_TYPE) {
+       case CMD_TYPE_MI:
+           index += dump_mi(data + index, offset + index * 4,
+                             count - index, device, &failures);
+           break;
+
+       case CMD_TYPE_GFXPIPE:
+            index += dump_gfxpipe(data + index, offset + index * 4,
+                                  count - index, device, &failures);
+           break;
+
+       default:
+           instr_out(data, offset, index, "UNKNOWN COMMAND\n");
+           failures++;
+           index++;
+           break;
+       }
+
+       fflush(gout);
+    }
+
+    fclose(gout);
+
+    return failures;
+}
diff --git a/i965_drv_video/intel_batchbuffer_dump.h b/i965_drv_video/intel_batchbuffer_dump.h
new file mode 100644 (file)
index 0000000..ad096a9
--- /dev/null
@@ -0,0 +1,49 @@
+#ifndef _INTEL_BATCHBUFFER_DUMP_H_
+#define _INTEL_BATCHBUFFER_DUMP_H_
+
+#define MASK_CMD_TYPE           0xE0000000
+
+#define SHIFT_CMD_TYPE          29
+
+#define CMD_TYPE_GFXPIPE        3
+#define CMD_TYPE_BLT            2
+#define CMD_TYPE_MI             0
+
+
+/* GFXPIPE */
+#define MASK_GFXPIPE_SUBTYPE    0x18000000
+#define MASK_GFXPIPE_OPCODE     0x07000000
+#define MASK_GFXPIPE_SUBOPCODE  0x00FF0000
+#define MASK_GFXPIPE_LENGTH     0x0000FFFF
+
+#define SHIFT_GFXPIPE_SUBTYPE           27
+#define SHIFT_GFXPIPE_OPCODE            24
+#define SHIFT_GFXPIPE_SUBOPCODE         16
+#define SHIFT_GFXPIPE_LENGTH            0
+
+/* 3D */
+#define GFXPIPE_3D              3
+
+/* BSD */
+#define GFXPIPE_BSD             2
+
+#define OPCODE_BSD_AVC          4
+
+#define SUBOPCODE_BSD_IMG       0
+#define SUBOPCODE_BSD_QM        1
+#define SUBOPCODE_BSD_SLICE     2
+#define SUBOPCODE_BSD_BUF_BASE  3
+#define SUBOPCODE_BSD_IND_OBJ   4
+#define SUBOPCODE_BSD_OBJECT    8
+
+/* MI */
+#define MASK_MI_OPCODE          0x1F800000
+
+#define SHIFT_MI_OPCODE         23
+
+#define OPCODE_MI_FLUSH                 0x04
+#define OPCODE_MI_BATCH_BUFFER_END      0x0A
+
+int intel_batchbuffer_dump(unsigned int *data, unsigned int offset, int count, unsigned int device);
+
+#endif /* _INTEL_BATCHBUFFER_DUMP_H_ */
index 9ab95b7..7fcb760 100644 (file)
@@ -17,7 +17,7 @@
 #define INLINE
 #endif
 
-#define BATCH_SIZE      0x10000
+#define BATCH_SIZE      0x100000
 #define BATCH_RESERVED  0x10
 
 #define CMD_MI                                  (0x0 << 29)
@@ -26,6 +26,7 @@
 #define MI_NOOP                                 (CMD_MI | 0)
 
 #define MI_BATCH_BUFFER_END                     (CMD_MI | (0xA << 23))
+#define MI_BATCH_BUFFER_START                   (CMD_MI | (0x31 << 23))
 
 #define MI_FLUSH                                (CMD_MI | (0x4 << 23))
 #define STATE_INSTRUCTION_CACHE_INVALIDATE      (0x1 << 0)
@@ -85,6 +86,7 @@ struct intel_driver_data
     int locked;
 
     struct intel_batchbuffer *batch;
+    struct intel_batchbuffer *batch_bcs;
     dri_bufmgr *bufmgr;
 };
 
@@ -118,8 +120,8 @@ struct intel_region
 #define PCI_CHIP_G45_G                  0x2E22
 #define PCI_CHIP_G41_G                  0x2E32
 
-#define PCI_CHIP_IGDNG_D_G              0x0042
-#define PCI_CHIP_IGDNG_M_G              0x0046
+#define PCI_CHIP_IRONLAKE_D_G           0x0042
+#define PCI_CHIP_IRONLAKE_M_G           0x0046
 
 #define IS_G45(devid)           (devid == PCI_CHIP_IGD_E_G || \
                                  devid == PCI_CHIP_Q45_G || \
@@ -128,8 +130,8 @@ struct intel_region
 #define IS_GM45(devid)          (devid == PCI_CHIP_GM45_GM)
 #define IS_G4X(devid)          (IS_G45(devid) || IS_GM45(devid))
 
-#define IS_IGDNG_D(devid)       (devid == PCI_CHIP_IGDNG_D_G)
-#define IS_IGDNG_M(devid)       (devid == PCI_CHIP_IGDNG_M_G)
-#define IS_IGDNG(devid)         (IS_IGDNG_D(devid) || IS_IGDNG_M(devid))
+#define IS_IRONLAKE_D(devid)    (devid == PCI_CHIP_IRONLAKE_D_G)
+#define IS_IRONLAKE_M(devid)    (devid == PCI_CHIP_IRONLAKE_M_G)
+#define IS_IRONLAKE(devid)      (IS_IRONLAKE_D(devid) || IS_IRONLAKE_M(devid))
 
 #endif /* _INTEL_DRIVER_H_ */
index 1900f8c..e867139 100644 (file)
@@ -50,7 +50,6 @@ static int object_heap_expand( object_heap_p heap )
     {
         return -1; /* Out of memory */
     }
-    memset(new_heap_index + heap->heap_size*heap->object_size, 0, heap->heap_increment * new_heap_size);
     heap->heap_index = new_heap_index;
     next_free = heap->next_free;
     for(i = new_heap_size; i-- > heap->heap_size; )
index 24f1aa8..2fd019b 100644 (file)
@@ -1 +1 @@
-SUBDIRS = mpeg2 render
+SUBDIRS = h264 mpeg2 render
diff --git a/i965_drv_video/shaders/h264/Makefile.am b/i965_drv_video/shaders/h264/Makefile.am
new file mode 100644 (file)
index 0000000..d6d106b
--- /dev/null
@@ -0,0 +1 @@
+SUBDIRS = mc
diff --git a/i965_drv_video/shaders/h264/ildb/AVC_ILDB.inc b/i965_drv_video/shaders/h264/ildb/AVC_ILDB.inc
new file mode 100644 (file)
index 0000000..734717f
--- /dev/null
@@ -0,0 +1,718 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+#if !defined(__AVC_ILDB_HEADER__)      // Make sure this file is only included once\r
+#define __AVC_ILDB_HEADER__\r
+\r
+// Module name: AVC_ILDB.inc\r
+\r
+#undef ORIX\r
+#undef ORIY\r
+\r
+//========== Root thread input parameters ==================================================\r
+#define RootParam                              r1              // :w\r
+#define        MBsCntX                                 r1.0    // :w, MB count per row\r
+#define        MBsCntY                                 r1.1    // :w, MB count per col\r
+//#define PicType                                      r1.2    // :w, Picture type\r
+#define        MaxThreads                              r1.3    // :w, Max Thread limit\r
+#define EntrySignature                 r1.4    // :w, Debug flag\r
+#define BitFields                              r1.5    // :uw\r
+#define        MbaffFlag                       BIT0    // :w, mbaff flag, bit 0 in BitFields\r
+#define        BotFieldFlag            BIT1    // :w, bottom field flag, bit 1 in BitFields\r
+#define        CntlDataExpFlag         BIT2    // :w, Control Data Expansion Flag, bit 2 in BitFields\r
+#define RampConst                              r1.12   // 8 :ub, Ramp constant, r1.12 - r1.19:ub\r
+#define StepToNextMB                   r1.20   // :b, 2 bytes\r
+#define Minus2Minus1                   r1.22   // :b, 2 bytes\r
+// next one starts at r1.11:w\r
+\r
+#define        TopFieldFlag            0xFFFD  // :w, top field flag, used to set bit1 to 0.\r
+\r
+\r
+//========== Root Locals =============================================================\r
+\r
+// Variables in root kernel for launching child therad\r
+#define ChildParam                             r2.0    // :w\r
+//Not used  #define    URBOffset                               r2.3    // :w, Each row occupies 4 URB entries.  All children in the same row use the same set of URB entries\r
+#define        CurCol                                  r2.10   // :w, current col\r
+#define        CurColB                                 r2.20   // :b, current col\r
+#define        CurRow                                  r2.11   // :w, current row\r
+#define        CurRowB                                 r2.22   // :b, current row\r
+#define        LastCol                                 r2.12   // :w, last col\r
+#define        LastRow                                 r2.13   // :w, last row\r
+\r
+// Root local constants during spawning process\r
+#define        Col_Boundary                    r3.0    // :w, \r
+#define        Row_Boundary                    r3.1    // :w, \r
+//#define      TotalBlocks                             r3.2    // :w, Total blocks in the frame \r
+#define        URB_EntriesPerMB_2              r3.3    // :w, = URB entries per MB, but in differnt form\r
+#define        URBOffsetUVBase                 r3.4    // :w, UV Base offset in URB\r
+\r
+#define        Temp1_D                                 r3.6    // :d:\r
+#define        Temp1_W                                 r3.12   // :w, Temp1\r
+#define        Temp1_B                                 r3.24   // :b, = Temp1_W\r
+#define        Temp2_W                                 r3.13   // :w, Temp2\r
+#define        Temp2_B                                 r3.26   // :b, = Temp2_W\r
+\r
+// Root local variables\r
+#define JumpTable                              r4              // :d, jump table\r
+#define JUMPTABLE_BASE                 4*32\r
+#define JumpAddr                               a0.7\r
+\r
+#define TopRowForScan                  r5.0    // :w, track the top row for scan. All rows above this row is deblocked already. \r
+\r
+\r
+// Child Thread R0 Header Field\r
+#define MRF0                                   m0              \r
+#define CT_R0Hdr                               m1\r
+\r
+/*\r
+.declare GatewayAperture       Base=r50.0 ElementSize=4 SrcRegion=REGION(8,1) Type=ud  \r
+#define GatewayApertureB       1600    // r50 byte offset from r0.0\r
+\r
+// Chroma root thread updates luma root's ThreadLimit at r10.0:w via gateway\r
+#define ThreadLimit                            r62.0   // :w, thread limit //r56.0\r
+#define THREAD_LIMIT_OFFSET            0x01800000      // Offset from r50 to r56 = 12*32 = 384 = 0x0180.  0x180 << 16 = 0x01800000\r
+       //#define THREAD_LIMIT_OFFSET           0x00C00000      // Offset from r50 to r56 = 6*32 = 192 = 0x00C0.  0xC0 << 16 = 0x00C00000\r
+*/\r
+\r
+// Gateway size is 16 GRF.  68 rows of MBs takes 9 GRFs (r6 - r14)\r
+// For CTG: Expended to support 1280 rows of pixel (80 rows of MBs).  It requires 10 GRFs (r6 - r15)\r
+.declare GatewayAperture       Base=r6.0 ElementSize=4 SrcRegion=REGION(8,1) Type=ud   \r
+#define GatewayApertureB       192     // r0.0 byte offset from r0.0\r
+\r
+// Chroma root thread updates luma root's ThreadLimit at r10.0:w via gateway\r
+#define ThreadLimit                            r18.0   // :w, thread limit \r
+#define THREAD_LIMIT_OFFSET            0x01800000      // Offset from r50 to r56 = 12*32 = 384 = 0x0180.  0x180 << 16 = 0x01800000\r
+#define        TotalBlocks                             r18.1   // :w, Total blocks in the frame \r
+\r
+// Root local variables\r
+#define        ChildThreadsID                  r19.0   // :w, Child thread ID, unique to each child\r
+#define        OutstandingThreads              r20.0   // :w, Outstanding threads \r
+#define ProcessedMBs                   r20.1   // :w, # of MBs processed \r
+\r
+#define        URBOffset                               r21.0   // :w, Each row occupies 4 URB entries.  All children in the same row use the same set of URB entries\r
+\r
+//=================================================================================\r
+\r
+#define ScoreBd_Size                   128 //96 // size of Status[] or ProcCol[]\r
+\r
+#define ScoreBd_Idx                            2               \r
+//#define Saved_Col                            0\r
+\r
+#define StatusAddr                             a0.4    // :w, point to r50\r
+//=================================================================================\r
+\r
+\r
+// Gateway payload\r
+#define GatewayPayload                 r48.0   // :ud\r
+#define GatewayPayloadKey              r48.8   // :uw\r
+#define DispatchID                             r48.20  // :ub\r
+#define RegBase_GatewaySize    r48.5   // :ud, used in open a gateway\r
+#define Offset_Length                  r48.5   // :ud, used in forwardmsg back to root\r
+#define EUID_TID                               r48.9   // :uw, used in forwardmsg back to root\r
+\r
+// Gateway response\r
+#define GatewayResponse                r49.0   // :ud, one GRF\r
+\r
+#define URBWriteMsgDesc                        a0.0    // Used in URB write, :ud\r
+#define URBWriteMsgDescLow             a0.0    // Used in URB write, :uw\r
+#define URBWriteMsgDescHigh            a0.1    // Used in URB write, :uw\r
+\r
+.declare WritebackResponse             Base=r50 ElementSize=4 SrcRegion=REGION(8,1) Type=ud    // 1 GRF for write backs\r
+\r
+\r
+/////////////////////////////////////////////////////////////////////////////////////////////\r
+// IDesc Order                                 Offset\r
+//\r
+// 0) luma root                                        0 from luma root\r
+// 1) luma child                               16 from luma root\r
+// 2) chroma root                              32 from luma root\r
+// 3) chroma child                             16 from chroma root\r
+\r
+// 4) luma field root                  0 from luma field root\r
+// 5) luma field child                 16 from luma field root\r
+// 6) chroma field root                        32 from luma field root\r
+// 7) chroma field child               16 from chroma field root\r
+\r
+// 8) luma Mbaff root                  0 from luma Mbaff root\r
+// 9) luma Mbaff child                         16 from luma Mbaff root\r
+// 10) chroma Mbaff root               32 from luma Mbaff root\r
+// 11) chroma Mbaff child              16 from chroma Mbaff root\r
+\r
+// IDesc offset within non-mbaff or mbaff mode\r
+#define CHROMA_ROOT_OFFSET             32              // Offset from luma root to chroma root\r
+#define CHILD_OFFSET                   16              // Offset from luma root to luma child, \r
+                                                                               // and from chroma root to chroma child\r
+/////////////////////////////////////////////////////////////////////////////////////////////\r
+\r
+\r
+//========== End of Root Variables ======================================================\r
+\r
+\r
+//========== Child thread input parameters ==============================================\r
+//#define      MBsCntX                                 r1.0    // :w, MB count per row (same as root)\r
+//#define      MBsCntY                                 r1.1    // :w, MB count per col (same as root)\r
+//#define PicTypeC                             r1.2    // :w, Picture type             same as root thread (same as root)\r
+#define        URBOffsetC                              r1.3    // :w, \r
+#define EntrySignatureC                        r1.4    // :w, Debug field (same as root)\r
+//#define BitFields                            r1.5    // :w (same as root)\r
+//#define      MbaffFlag                       BIT0    // :w, mbaff flag, bit 0 in BitFields\r
+//#define      BotFieldFlag            BIT1    // :w, bottom field flag, bit 1 in BitFields\r
+//#define      CntlDataExpFlag         BIT2    // :w, Control Data Expansion Flag, bit 2 in BitFields\r
+#define RampConstC                             r1.12   // 8 :ub, Ramp constant, r1.12 - r1.19:ub.\r
+#define        ORIX                                    r1.10   // :w, carry over from root r1 in MB count\r
+#define        ORIY                                    r1.11   // :w, carry over from root r1 in MB count\r
+#define        LastColC                                r1.12   // :w, last col\r
+#define        LastRowC                                r1.13   // :w, last row\r
+\r
+.declare GatewayApertureC              Base=r1.0 ElementSize=4 SrcRegion=REGION(8,1) Type=ud   \r
+#define GatewayApertureCBase   32              // r1 byte offset from r0.0\r
+\r
+\r
+//========== Child Variables ============================================================\r
+\r
+// Mbaff Alpha, Beta, Tc0 vectors for an edge\r
+.declare Mbaff_ALPHA   Base=r14.0  ElementSize=2 SrcRegion=REGION(8,1) Type=uw         // r14\r
+.declare Mbaff_BETA    Base=r15.0  ElementSize=2 SrcRegion=REGION(8,1) Type=uw         // r15\r
+.declare Mbaff_TC0             Base=r16.0  ElementSize=2 SrcRegion=REGION(8,1) Type=uw         // r16\r
+.declare RRampW                        Base=r17.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w           // r17\r
+\r
+.declare Mbaff_ALPHA2  Base=r45.0  ElementSize=2 SrcRegion=REGION(8,1) Type=uw         // alpha2 = (alpha >> 2) + 2\r
+\r
+\r
+#define        ORIX_CUR                                r46.0   // :w, current block origin X in bytes\r
+#define        ORIY_CUR                                r46.1   // :w, current block origin Y in bytes\r
+#define        ORIX_LEFT                               r46.2   // :w, left block origin X in bytes\r
+#define        ORIY_LEFT                               r46.3   // :w, left block origin Y in bytes\r
+#define        ORIX_TOP                                r46.4   // :w, top block origin X in bytes\r
+#define        ORIY_TOP                                r46.5   // :w, top block origin Y in bytes\r
+//#define FilterSampleFlag             r46.6   // :uw,\r
+#define        CTemp0_W                                r46.7   // :w, child Temp0\r
+\r
+#define alpha                                  r46.8   // :w, Scaler version for non Mbaff\r
+#define beta                                   r46.9   // :w, Scaler version for non Mbaff\r
+#define tc0                                            r46.20  // 4 :ub, r46.20 ~ r46.23, Scaler version for non Mbaff\r
+#define MaskA                                  r46.12  // :uw\r
+#define MaskB                                  r46.13  // :uw\r
+\r
+// Child control flags\r
+#define DualFieldMode                  r47.0   // Cur MB is frame based, above MB is field based in mbaff mode\r
+                                                                               // :uw, 0 = not in dual field mode,  1 = in dual field mode, filter both top and bot fields\r
+#define        GateWayOffsetC                  r47.1   // :w, Gateway offset for child writing into root space\r
+#define CntrlDataOffsetY               r47.1   // :ud, MB control data data offset\r
+#define alpha2                                 r47.4   // :uw,         alpha2 = (alpha >> 2) + 2\r
+\r
+#define VertEdgePattern                        r47.5   // :uw, \r
+\r
+#define        CTemp1_W                                r47.6   // :w, child Temp1\r
+#define        CTemp1_B                                r47.12  // :b, = child Temp1_W\r
+#define        CTemp2_W                                r47.7   // :w, child Temp2\r
+#define        CTemp2_B                                r47.14  // :b, = child Temp2_W\r
+\r
+// Used in child\r
+#define ECM_AddrReg                            a0.4    // Edge Control Map register\r
+#define P_AddrReg                              a0.6    // point to P samples in left or top MB\r
+#define Q_AddrReg                              a0.7    // point to Q samples in cur MB\r
+\r
+\r
+.declare       RTempD     Base=r26.0 ElementSize=4 SrcRegion=REGION(8,1) Type=d        // r26-27\r
+.declare       RTempB     Base=r26.0 ElementSize=1 SrcRegion=REGION(8,4) Type=ub       // r26-27\r
+.declare       RTempW     Base=r26.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w        // r26-27\r
+#define        LEFT_TEMP_D             RTempD\r
+#define        LEFT_TEMP_B             RTempB\r
+#define        LEFT_TEMP_W             RTempW\r
+\r
+.declare       TempRow0   Base=r26.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w\r
+.declare       TempRow0B  Base=r26.0 ElementSize=1 SrcRegion=REGION(8,2) Type=ub\r
+.declare       TempRow1   Base=r27.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w\r
+.declare       TempRow1B  Base=r27.0 ElementSize=1 SrcRegion=REGION(8,2) Type=ub\r
+\r
+.declare       CUR_TEMP_D      Base=r28.0 ElementSize=4 SrcRegion=REGION(8,1) Type=d   // 8 GRFs\r
+.declare       CUR_TEMP_B      Base=r28.0 ElementSize=1 SrcRegion=REGION(8,4) Type=ub\r
+.declare       CUR_TEMP_W      Base=r28.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w\r
+\r
+#define FilterSampleFlag               r28.0   // :uw,\r
+\r
+.declare       A               Base=r28.0 ElementSize=2 SrcRegion=REGION(16,1) Type=w\r
+.declare       B               Base=r29.0 ElementSize=2 SrcRegion=REGION(16,1) Type=w\r
+\r
+.declare       TempRow3   Base=r30.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w\r
+.declare       TempRow3B  Base=r30.0 ElementSize=1 SrcRegion=REGION(8,2) Type=ub\r
+\r
+.declare       tc0_exp         Base=r30.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w\r
+.declare       tc8                     Base=r30.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w\r
+\r
+.declare       tc_exp          Base=r31.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w\r
+.declare       tx_exp_8        Base=r31.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w\r
+\r
+.declare       q0_p0           Base=r32.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w\r
+.declare       ABS_q0_p0       Base=r33.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w\r
+\r
+.declare       ap                      Base=r34.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w\r
+.declare       aq                      Base=r35.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w\r
+\r
+// These buffers have the src data for each edge to be beblocked.\r
+// They have modified pixels from previous edges.\r
+//\r
+//     Y:\r
+//     +----+----+----+----+----+----+----+----+\r
+//     | p3 | p2 | P1 | p0 | q0 | q1 | q2 | q3 |\r
+//     +----+----+----+----+----+----+----+----+\r
+//\r
+//     p3 = r[P_AddrReg, 0]<16;16,1>  \r
+//     p2 = r[P_AddrReg, 16]<16;16,1> \r
+//     p1 = r[P_AddrReg, 32]<16;16,1> \r
+//     p0 = r[P_AddrReg, 48]<16;16,1> \r
+//     q0 = r[Q_AddrReg, 0]<16;16,1>  \r
+//     q1 = r[Q_AddrReg, 16]<16;16,1> \r
+//     q2 = r[Q_AddrReg, 32]<16;16,1> \r
+//     q3 = r[Q_AddrReg, 48]<16;16,1> \r
+\r
+.declare       p0123_W         Base=r36.0  ElementSize=2 SrcRegion=REGION(16,1) Type=uw                // r36, r37\r
+.declare       q0123_W         Base=r38.0  ElementSize=2 SrcRegion=REGION(16,1) Type=uw                // r38, r39\r
+.declare       p3                      Base=r36.0  ElementSize=1 SrcRegion=REGION(8,1) Type=ub\r
+.declare       p2                      Base=r36.16 ElementSize=1 SrcRegion=REGION(8,1) Type=ub\r
+.declare       p1                      Base=r37.0  ElementSize=1 SrcRegion=REGION(8,1) Type=ub\r
+.declare       p0                      Base=r37.16 ElementSize=1 SrcRegion=REGION(8,1) Type=ub\r
+.declare       q0                      Base=r38.0  ElementSize=1 SrcRegion=REGION(8,1) Type=ub\r
+.declare       q1                      Base=r38.16 ElementSize=1 SrcRegion=REGION(8,1) Type=ub\r
+.declare       q2                      Base=r39.0  ElementSize=1 SrcRegion=REGION(8,1) Type=ub\r
+.declare       q3                      Base=r39.16 ElementSize=1 SrcRegion=REGION(8,1) Type=ub\r
+\r
+.declare       TempRow2   Base=r38.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w\r
+\r
+// Temp space for mbaff dual field mode\r
+#define                ABOVE_CUR_MB_BASE       40*GRFWIB                                                                                       // Byte offset to r40\r
+.declare    ABOVE_CUR_MB_YW  Base=r40  ElementSize=2 SrcRegion=REGION(8,1) Type=uw\r
+.declare    ABOVE_CUR_MB_UW  Base=r40  ElementSize=2 SrcRegion=REGION(8,1) Type=uw\r
+\r
+.declare       P0_plus_P1   Base=r41.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w\r
+.declare       Q0_plus_Q1   Base=r42.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w\r
+\r
+.declare       P2_plus_P3   Base=r43.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w\r
+.declare       Q2_plus_Q3   Base=r44.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w\r
+\r
+\r
+//////////////////////////////////////////////////////////////////////////////////////////\r
+// MB control data reference\r
+\r
+// Expanded control data is in r18 - r25\r
+.declare    CNTRL_DATA_D Base=r18      ElementSize=4 SrcRegion=REGION(8,1) Type=ud             // For read, 8 GRFs\r
+#define                CNTRL_DATA_BASE 18*GRFWIB                                                                                               // Base offset to r18\r
+\r
+//  Bit mask for extracting bits\r
+#define        MbaffFrameFlag                                  0x01\r
+#define        FieldModeCurrentMbFlag                  0x02\r
+#define FieldModeLeftMbFlag                            0x04\r
+#define FieldModeAboveMbFlag                   0x08\r
+#define FilterInternal8x8EdgesFlag             0x10\r
+#define FilterInternal4x4EdgesFlag             0x20\r
+#define FilterLeftMbEdgeFlag                   0x40\r
+#define FilterTopMbEdgeFlag                            0x80\r
+\r
+#define        DISABLE_ILDB_FLAG                               0x01\r
+\r
+//  Exact bit pattern for left and cur MB coding mode (frame vs. field)\r
+#define LEFT_FRAME_CUR_FRAME                   0x00\r
+#define LEFT_FRAME_CUR_FIELD                   0x02\r
+#define LEFT_FIELD_CUR_FRAME                   0x04\r
+#define LEFT_FIELD_CUR_FIELD                   0x06\r
+\r
+//  Exact bit pattern for above and cur MB coding mode (frame vs. field)\r
+#define ABOVE_FRAME_CUR_FRAME                  0x00\r
+#define ABOVE_FRAME_CUR_FIELD                  0x02\r
+#define ABOVE_FIELD_CUR_FRAME                  0x08\r
+#define ABOVE_FIELD_CUR_FIELD                  0x0A\r
+\r
+\r
+\r
+//========== MB control data field offset in byte ==========\r
+\r
+#if !defined(_APPLE)\r
+\r
+// GRF0 - GRF1 holds original control data\r
+\r
+// GRF0\r
+#define HorizOrigin                                            0\r
+#define VertOrigin                                             1\r
+#define BitFlags                                               2               // Bit flags\r
+\r
+#define bbSinternalLeftVert                            4               // Internal left vertical bS, 2 bits per bS for 4 Y pixels and 2 U/V pixels     \r
+#define bbSinternalMidVert                             5               // Internal mid vertical bS\r
+#define bbSinternalRightVert                   6               // Internal right vertical bS\r
+#define bbSinternalTopHorz                             7               // Internal top horizontal bS\r
+\r
+#define        bbSinternalMidHorz                              8               // Internal mid horizontal bS\r
+#define        bbSinternalBotHorz                              9               // Internal bottom horizontal bS\r
+#define        wbSLeft0                                                10              // External left vertical bS (0), 4 bits per bS for 4 Y pixels and 2 U/V pixels, and byte 11\r
+\r
+#define        wbSLeft1                                                12              // External left vertical bS (1), and byte 13\r
+#define        wbSTop0                                                 14              // External top horizontal bS (0), and byte 15\r
+\r
+#define        wbSTop1                                                 16              // Externaltop horizontal bS (1), and byte 17\r
+#define        bIndexAinternal_Y                               18              // Internal index A for Y\r
+#define        bIndexBinternal_Y                               19              // Internal index B for Y\r
+\r
+#define        bIndexAleft0_Y                                  20              // Left index A for Y (0)\r
+#define        bIndexBleft0_Y                                  21              // Left index B for Y (0)\r
+#define        bIndexAleft1_Y                                  22              // Left index A for Y (1)\r
+#define        bIndexBleft1_Y                                  23              // Left index B for Y (1)\r
+\r
+#define        bIndexAtop0_Y                                   24              // Top index A for Y (0)\r
+#define        bIndexBtop0_Y                                   25              // Top index B for Y (0)\r
+#define        bIndexAtop1_Y                                   26              // Top index A for Y (1)\r
+#define        bIndexBtop1_Y                                   27              // Top index B for Y (1)\r
+\r
+#define        bIndexAinternal_Cb                              28              // Internal index A for Cb\r
+#define        bIndexBinternal_Cb                              29              // Internal index B for Cb\r
+#define        bIndexAleft0_Cb                                 30              // Left index A for Cb (0)\r
+#define        bIndexBleft0_Cb                                 31              // Left index B for Cb (0)\r
+\r
+// GRF1\r
+#define        bIndexAleft1_Cb                                 32              // Left index A for Cb (1)\r
+#define        bIndexBleft1_Cb                                 33              // Left index B for Cb (1)\r
+#define        bIndexAtop0_Cb                                  34              // Top index A for Cb (0)\r
+#define        bIndexBtop0_Cb                                  35              // Top index B for Cb (0)\r
+\r
+#define        bIndexAtop1_Cb                                  36              // Top index A for Cb (1)\r
+#define        bIndexBtop1_Cb                                  37              // Top index B for Cb (1)\r
+#define        bIndexAinternal_Cr                              38              // Internal index A for Cr\r
+#define        bIndexBinternal_Cr                              39              // Internal index B for Cr\r
+\r
+#define        bIndexAleft0_Cr                                 40              // Left index A for Cr (0)\r
+#define bIndexBleft0_Cr                                        41              // Left index B for Cr (0)\r
+#define        bIndexAleft1_Cr                                 42              // Left index A for Cr (1)\r
+#define        bIndexBleft1_Cr                                 43              // Left index B for Cr (1)\r
+\r
+#define        bIndexAtop0_Cr                                  44              // Top index A for Cr (0)\r
+#define        bIndexBtop0_Cr                                  45              // Top index B for Cr (0)\r
+#define        bIndexAtop1_Cr                                  46              // Top index A for Cr (1)\r
+#define        bIndexBtop1_Cr                                  47              // Top index B for Cr (1)\r
+\r
+#define        ExtBitFlags                                             48              // Extended bit flags, such as disable ILDB bits\r
+\r
+// Offset 49 - 63 not used\r
+\r
+\r
+//===== GRF2 - GRF7 hold expanded control data =====\r
+\r
+// GRF2\r
+#define        wEdgeCntlMap_IntLeftVert                64              // Derived from bbSinternalLeftVert, 1 bit per pixel \r
+#define        wEdgeCntlMap_IntMidVert                 66              // Derived from bbSinternalLeftVert\r
+               \r
+#define        wEdgeCntlMap_IntRightVert               68              // Derived from bbSinternalRightVert\r
+#define        wEdgeCntlMap_IntTopHorz                 70              // Derived from bbSinternalTopHorz, 1bit per pixel \r
+               \r
+#define        wEdgeCntlMap_IntMidHorz                 72              // Derived from bbSinternalMidHorz\r
+#define        wEdgeCntlMap_IntBotHorz                 74              // Derived from bbSinternalBotHorz\r
+\r
+// Offset 76 - 79 not used\r
+\r
+#define        wEdgeCntlMapA_ExtLeftVert0              80              // Derived from wbSLeft0, 1bit per pixel\r
+#define        wEdgeCntlMapB_ExtLeftVert0              82              // Derived from wbSLeft0\r
+\r
+#define        wEdgeCntlMapA_ExtTopHorz0               84              // Derived from wbSTop0, 1bit per pixel\r
+#define        wEdgeCntlMapB_ExtTopHorz0               86              // Derived from wbSTop0\r
+\r
+#define        wEdgeCntlMapA_ExtLeftVert1              88              // Derived from wbSLeft1, 1bit per pixel\r
+#define        wEdgeCntlMapB_ExtLeftVert1              90              // Derived from wbSLeft1\r
+\r
+#define        wEdgeCntlMapA_ExtTopHorz1               92              // Derived from wbSTop1, 1bit per pixel\r
+#define        wEdgeCntlMapB_ExtTopHorz1               94              // Derived from wbSTop1\r
+\r
+\r
+// GRF3\r
+#define        bTc0_v00_0_Y                                    96              // Derived from bSv00_0 and bIndexAleft0_Y, 4 pixels per tc0\r
+#define        bTc0_v10_0_Y                                    97              // Derived from bSv10_0 and bIndexAleft0_Y\r
+#define        bTc0_v20_0_Y                                    98              // Derived from bSv20_0 and bIndexAleft0_Y\r
+#define        bTc0_v30_0_Y                                    99              // Derived from bSv30_0 and bIndexAleft0_Y\r
+\r
+#define        bTc0_v01_Y                                              100             // Derived from bSv01 and bIndexAinternal_Y\r
+#define        bTc0_v11_Y                                              101             // Derived from bSv11 and bIndexAinternal_Y\r
+#define        bTc0_v21_Y                                              102             // Derived from bSv21 and bIndexAinternal_Y\r
+#define        bTc0_v31_Y                                              103             // Derived from bSv31 and bIndexAinternal_Y\r
+\r
+#define        bTc0_v02_Y                                              104             // Derived from bSv02 and bIndexAinternal_Y\r
+#define        bTc0_v12_Y                                              105             // Derived from bSv12 and bIndexAinternal_Y\r
+#define        bTc0_v22_Y                                              106             // Derived from bSv22 and bIndexAinternal_Y\r
+#define        bTc0_v32_Y                                              107             // Derived from bSv32 and bIndexAinternal_Y\r
+\r
+#define        bTc0_v03_Y                                              108             // Derived from bSv03 and bIndexAinternal_Y\r
+#define        bTc0_v13_Y                                              109             // Derived from bSv13 and bIndexAinternal_Y\r
+#define        bTc0_v23_Y                                              110             // Derived from bSv23 and bIndexAinternal_Y\r
+#define        bTc0_v33_Y                                              111             // Derived from bSv33 and bIndexAinternal_Y\r
+\r
+#define        bTc0_h00_0_Y                                    112             // Derived from bSh00_0 and bIndexAleft0_Y\r
+#define        bTc0_h01_0_Y                                    113             // Derived from bSh01_0 and bIndexAleft0_Y\r
+#define        bTc0_h02_0_Y                                    114             // Derived from bSh02_0 and bIndexAleft0_Y\r
+#define        bTc0_h03_0_Y                                    115             // Derived from bSh03_0 and bIndexAleft0_Y\r
+\r
+#define        bTc0_h10_Y                                              116             // Derived from bSh10 and bIndexAinternal_Y\r
+#define        bTc0_h11_Y                                              117             // Derived from bSh11 and bIndexAinternal_Y\r
+#define        bTc0_h12_Y                                              118             // Derived from bSh12 and bIndexAinternal_Y\r
+#define        bTc0_h13_Y                                              119             // Derived from bSh13 and bIndexAinternal_Y\r
+               \r
+#define        bTc0_h20_Y                                              120             // Derived from bSh20 and bIndexAinternal_Y\r
+#define        bTc0_h21_Y                                              121             // Derived from bSh21 and bIndexAinternal_Y\r
+#define        bTc0_h22_Y                                              122             // Derived from bSh22 and bIndexAinternal_Y\r
+#define        bTc0_h23_Y                                              123             // Derived from bSh23 and bIndexAinternal_Y\r
+\r
+#define        bTc0_h30_Y                                              124             // Derived from bSh30 and bIndexAinternal_Y\r
+#define        bTc0_h31_Y                                              125             // Derived from bSh31 and bIndexAinternal_Y\r
+#define        bTc0_h32_Y                                              126             // Derived from bSh32 and bIndexAinternal_Y\r
+#define        bTc0_h33_Y                                              127             // Derived from bSh33 and bIndexAinternal_Y\r
+\r
+// GRF4\r
+#define        bAlphaLeft0_Y                                   128             // Derived from bIndexAleft0_Y  \r
+#define        bBetaLeft0_Y                                    129             // Derived from bIndexBleft0_Y  \r
+#define        bAlphaTop0_Y                                    130             // Derived from bIndexAtop0_Y   \r
+#define        bBetaTop0_Y                                             131             // Derived from bIndexBtop0_Y   \r
+\r
+#define        bAlphaInternal_Y                                132             // Derived from bIndexAinternal_Y       \r
+#define        bBetaInternal_Y                                 133             // Derived from bIndexBinternal_Y       \r
+// Offset 134 - 135 not used\r
+\r
+// Offset 136 - 143 not used\r
+#define        bAlphaLeft1_Y                                   144             // Derived from bIndexAleft1_Y  Used in Mbaff mode only\r
+#define        bBetaLeft1_Y                                    145             // Derived from bIndexBleft1_Y  Used in Mbaff mode only\r
+#define        bAlphaTop1_Y                                    146             // Derived from bIndexAtop1_Y   Used in Mbaff mode only\r
+#define        bBetaTop1_Y                                             147             // Derived from bIndexBtop1_Y   Used in Mbaff mode only\r
+\r
+// Offset 148 - 151 not used\r
+#define        bTc0_v00_1_Y                                    152             // Derived from bSv00_1 and bIndexAleft1_Y      Used in Mbaff mode only\r
+#define        bTc0_v10_1_Y                                    153             // Derived from bSv10_1 and bIndexAleft1_Y      Used in Mbaff mode only\r
+#define        bTc0_v20_1_Y                                    154             // Derived from bSv20_1 and bIndexAleft1_Y      Used in Mbaff mode only\r
+#define        bTc0_v30_1_Y                                    155             // Derived from bSv30_1 and bIndexAleft1_Y      Used in Mbaff mode only\r
+\r
+#define        bTc0_h00_1_Y                                    156             // Derived from bSh00_1 and bIndexAleft1_Y      Used in Mbaff mode only\r
+#define        bTc0_h01_1_Y                                    157             // Derived from bSh01_1 and bIndexAleft1_Y      Used in Mbaff mode only\r
+#define        bTc0_h02_1_Y                                    158             // Derived from bSh02_1 and bIndexAleft1_Y      Used in Mbaff mode only\r
+#define        bTc0_h03_1_Y                                    159             // Derived from bSh03_1 and bIndexAleft1_Y      Used in Mbaff mode only\r
+\r
+\r
+// GRF5\r
+#define        bTc0_v00_0_Cb                                   160             // Derived from bSv00_0 and bIndexAleft0_Cb, 2 pixels per tc0   Left0\r
+#define        bTc0_v10_0_Cb                                   161             // Derived from bSv10_0 and bIndexAleft0_Cb     \r
+#define        bTc0_v20_0_Cb                                   162             // Derived from bSv20_0 and bIndexAleft0_Cb     \r
+#define        bTc0_v30_0_Cb                                   163             // Derived from bSv30_0 and bIndexAleft0_Cb     \r
+\r
+#define        bTc0_v02_Cb                                             164             // Derived from bSv02 and bIndexAinternal_Cb    MidVert\r
+#define        bTc0_v12_Cb                                             165             // Derived from bSv12 and bIndexAinternal_Cb    \r
+#define        bTc0_v22_Cb                                             166             // Derived from bSv22 and bIndexAinternal_Cb    \r
+#define        bTc0_v32_Cb                                             167             // Derived from bSv32 and bIndexAinternal_Cb    \r
+\r
+#define        bTc0_h00_0_Cb                                   168             // Derived from bSh00_0 and bIndexAleft0_Cb     Top0\r
+#define        bTc0_h01_0_Cb                                   169             // Derived from bSh01_0 and bIndexAleft0_Cb     \r
+#define        bTc0_h02_0_Cb                                   170             // Derived from bSh02_0 and bIndexAleft0_Cb     \r
+#define        bTc0_h03_0_Cb                                   171             // Derived from bSh03_0 and bIndexAleft0_Cb     \r
+\r
+#define        bTc0_h20_Cb                                             172             // Derived from bSh20 and bIndexAinternal_Cb    MidHorz\r
+#define        bTc0_h21_Cb                                             173             // Derived from bSh21 and bIndexAinternal_Cb    \r
+#define        bTc0_h22_Cb                                             174             // Derived from bSh22 and bIndexAinternal_Cb    \r
+#define        bTc0_h23_Cb                                             175             // Derived from bSh23 and bIndexAinternal_Cb    \r
+\r
+#define        bTc0_v00_0_Cr                                   176             // Derived from bSv00_0 and bIndexAleft0_Cr, 2 pixels per tc0   Left0\r
+#define        bTc0_v10_0_Cr                                   177             // Derived from bSv10_0 and bIndexAleft0_Cr     \r
+#define        bTc0_v20_0_Cr                                   178             // Derived from bSv20_0 and bIndexAleft0_Cr     \r
+#define        bTc0_v30_0_Cr                                   179             // Derived from bSv30_0 and bIndexAleft0_Cr     \r
+\r
+#define        bTc0_v02_Cr                                             180             // Derived from bSv02 and bIndexAinternal_Cr    Mid Vert\r
+#define        bTc0_v12_Cr                                             181             // Derived from bSv12 and bIndexAinternal_Cr    \r
+#define        bTc0_v22_Cr                                             182             // Derived from bSv22 and bIndexAinternal_Cr    \r
+#define        bTc0_v32_Cr                                             183             // Derived from bSv32 and bIndexAinternal_Cr    \r
+\r
+#define        bTc0_h00_0_Cr                                   184             // Derived from bSh00_0 and bIndexAleft0_Cr, 2 pixels per tc0   Top0\r
+#define        bTc0_h01_0_Cr                                   185             // Derived from bSh01_0 and bIndexAleft0_Cr     \r
+#define        bTc0_h02_0_Cr                                   186             // Derived from bSh02_0 and bIndexAleft0_Cr     \r
+#define        bTc0_h03_0_Cr                                   187             // Derived from bSh03_0 and bIndexAleft0_Cr     \r
+\r
+#define        bTc0_h20_Cr                                             188             // Derived from bSh20 and bIndexAinternal_Cr    Mid  Horz\r
+#define        bTc0_h21_Cr                                             189             // Derived from bSh21 and bIndexAinternal_Cr    \r
+#define        bTc0_h22_Cr                                             190             // Derived from bSh22 and bIndexAinternal_Cr    \r
+#define        bTc0_h23_Cr                                             191             // Derived from bSh23 and bIndexAinternal_Cr    \r
+\r
+// GRF6\r
+#define        bAlphaLeft0_Cb                                  192             // Derived from bIndexAleft0_Cb\r
+#define        bBetaLeft0_Cb                                   193             // Derived from bIndexBleft0_Cb\r
+#define        bAlphaTop0_Cb                                   194             // Derived from bIndexAtop0_Cb\r
+#define        bBetaTop0_Cb                                    195             // Derived from bIndexBtop0_Cb\r
+\r
+#define        bAlphaInternal_Cb                               196             // Derived from bIndexAinternal_Cb\r
+#define        bBetaInternal_Cb                                197             // Derived from bIndexBinternal_Cb\r
+// Offset 198 - 199 not used           \r
+\r
+#define        bAlphaLeft0_Cr                                  200             // Derived from bIndexAleft0_Cr\r
+#define        bBetaLeft0_Cr                                   201             // Derived from bIndexBleft0_Cr\r
+#define        bAlphaTop0_Cr                                   202             // Derived from bIndexAtop0_Cr\r
+#define        bBetaTop0_Cr                                    203             // Derived from bIndexBtop0_Cr\r
+\r
+#define        bAlphaInternal_Cr                               204             // Derived from bIndexAinternal_Cr\r
+#define        bBetaInternal_Cr                                205             // Derived from bIndexBinternal_Cr\r
+// Offset 206 - 223 not used           \r
+\r
+// GRF7\r
+#define        bAlphaLeft1_Cb                                  224             // Derived from bIndexAleft1_Cb Used in Mbaff mode only\r
+#define        bBetaLeft1_Cb                                   225             // Derived from bIndexBleft1_Cb Used in Mbaff mode only\r
+#define        bAlphaTop1_Cb                                   226             // Derived from bIndexAtop1_Cb  Used in Mbaff mode only\r
+#define        bBetaTop1_Cb                                    227             // Derived from bIndexBtop1_Cb  Used in Mbaff mode only\r
+\r
+// Offset 228 - 231 not used\r
+\r
+#define        bTc0_v00_1_Cb                                   232             // Derived from bSv00_1 and bIndexAleft1_Cb     Used in Mbaff mode only\r
+#define        bTc0_v10_1_Cb                                   233             // Derived from bSv10_1 and bIndexAleft1_Cb     Used in Mbaff mode only\r
+#define        bTc0_v20_1_Cb                                   234             // Derived from bSv20_1 and bIndexAleft1_Cb     Used in Mbaff mode only\r
+#define        bTc0_v30_1_Cb                                   235             // Derived from bSv30_1 and bIndexAleft1_Cb     Used in Mbaff mode only\r
+\r
+#define        bTc0_h00_1_Cb                                   236             // Derived from bSh00_1 and bIndexAleft1_Cb     Used in Mbaff mode only\r
+#define        bTc0_h01_1_Cb                                   237             //      Derived from bSh01_1 and bIndexAleft1_Cb        Used in Mbaff mode only\r
+#define        bTc0_h02_1_Cb                                   238             // Derived from bSh02_1 and bIndexAleft1_Cb     Used in Mbaff mode only\r
+#define        bTc0_h03_1_Cb                                   239             // Derived from bSh03_1 and bIndexAleft1_Cb     Used in Mbaff mode only\r
+\r
+#define        bAlphaLeft1_Cr                                  240             // Derived from bIndexAleft1_Cr Used in Mbaff mode only\r
+#define        bBetaLeft1_Cr                                   241             // Derived from bIndexBleft1_Cr Used in Mbaff mode only\r
+#define        bAlphaTop1_Cr                                   242             // Derived from bIndexAtop1_Cr  Used in Mbaff mode only\r
+#define        bBetaTop1_Cr                                    243             // Derived from bIndexBtop1_Cr  Used in Mbaff mode only\r
+\r
+// Offset 244 - 247 not used           \r
+\r
+#define        bTc0_v00_1_Cr                                   248             // Derived from bSv00_1 and bIndexAleft1_Cr     Used in Mbaff mode only\r
+#define        bTc0_v10_1_Cr                                   249             // Derived from bSv10_1 and bIndexAleft1_Cr     Used in Mbaff mode only\r
+#define        bTc0_v20_1_Cr                                   250             // Derived from bSv20_1 and bIndexAleft1_Cr     Used in Mbaff mode only\r
+#define        bTc0_v30_1_Cr                                   251             // Derived from bSv30_1 and bIndexAleft1_Cr     Used in Mbaff mode only\r
+\r
+#define        bTc0_h00_1_Cr                                   252             // Derived from bSh00_1 and bIndexAleft1_Cr     Used in Mbaff mode only\r
+#define        bTc0_h01_1_Cr                                   253             // Derived from bSh01_1 and bIndexAleft1_Cr     Used in Mbaff mode only\r
+#define        bTc0_h02_1_Cr                                   254             // Derived from bSh02_1 and bIndexAleft1_Cr     Used in Mbaff mode only\r
+#define        bTc0_h03_1_Cr                                   255             // Derived from bSh03_1 and bIndexAleft1_Cr     Used in Mbaff mode only\r
+\r
+\r
+#else          // _APPLE is defined\r
+\r
+//******** Crestline for Apple, progressive only, 88 bytes **********\r
+\r
+// GRF0\r
+#define HorizOrigin                                            0\r
+#define VertOrigin                                             1\r
+#define BitFlags                                               2               // Bit flags\r
+\r
+#define        wEdgeCntlMap_IntLeftVert                4               // Derived from bbSinternalLeftVert, 1 bit per pixel \r
+#define        wEdgeCntlMap_IntMidVert                 6               // Derived from bbSinternalLeftVert\r
+#define        wEdgeCntlMap_IntRightVert               8               // Derived from bbSinternalRightVert\r
+#define        wEdgeCntlMap_IntTopHorz                 10              // Derived from bbSinternalTopHorz, 1bit per pixel \r
+#define        wEdgeCntlMap_IntMidHorz                 12              // Derived from bbSinternalMidHorz\r
+#define        wEdgeCntlMap_IntBotHorz                 14              // Derived from bbSinternalBotHorz\r
+#define        wEdgeCntlMapA_ExtLeftVert0              16              // Derived from wbSLeft0, 1bit per pixel\r
+#define        wEdgeCntlMapB_ExtLeftVert0              18              // Derived from wbSLeft0\r
+#define        wEdgeCntlMapA_ExtTopHorz0               20              // Derived from wbSTop0, 1bit per pixel\r
+#define        wEdgeCntlMapB_ExtTopHorz0               22              // Derived from wbSTop0\r
+\r
+#define        bAlphaLeft0_Y                                   24              // Derived from bIndexAleft0_Y  \r
+#define        bBetaLeft0_Y                                    25              // Derived from bIndexBleft0_Y  \r
+#define        bAlphaTop0_Y                                    26              // Derived from bIndexAtop0_Y   \r
+#define        bBetaTop0_Y                                             27              // Derived from bIndexBtop0_Y   \r
+#define        bAlphaInternal_Y                                28              // Derived from bIndexAinternal_Y       \r
+#define        bBetaInternal_Y                                 29              // Derived from bIndexBinternal_Y       \r
+\r
+// GRF1\r
+#define        bTc0_v00_0_Y                                    32              // Derived from bSv00_0 and bIndexAleft0_Y, 4 pixels per tc0\r
+#define        bTc0_v10_0_Y                                    33              // Derived from bSv10_0 and bIndexAleft0_Y\r
+#define        bTc0_v20_0_Y                                    34              // Derived from bSv20_0 and bIndexAleft0_Y\r
+#define        bTc0_v30_0_Y                                    35              // Derived from bSv30_0 and bIndexAleft0_Y\r
+#define        bTc0_v01_Y                                              36              // Derived from bSv01 and bIndexAinternal_Y\r
+#define        bTc0_v11_Y                                              37              // Derived from bSv11 and bIndexAinternal_Y\r
+#define        bTc0_v21_Y                                              38              // Derived from bSv21 and bIndexAinternal_Y\r
+#define        bTc0_v31_Y                                              39              // Derived from bSv31 and bIndexAinternal_Y\r
+#define        bTc0_v02_Y                                              40              // Derived from bSv02 and bIndexAinternal_Y\r
+#define        bTc0_v12_Y                                              41              // Derived from bSv12 and bIndexAinternal_Y\r
+#define        bTc0_v22_Y                                              42              // Derived from bSv22 and bIndexAinternal_Y\r
+#define        bTc0_v32_Y                                              43              // Derived from bSv32 and bIndexAinternal_Y\r
+#define        bTc0_v03_Y                                              44              // Derived from bSv03 and bIndexAinternal_Y\r
+#define        bTc0_v13_Y                                              45              // Derived from bSv13 and bIndexAinternal_Y\r
+#define        bTc0_v23_Y                                              46              // Derived from bSv23 and bIndexAinternal_Y\r
+#define        bTc0_v33_Y                                              47              // Derived from bSv33 and bIndexAinternal_Y\r
+\r
+#define        bTc0_h00_0_Y                                    48              // Derived from bSh00_0 and bIndexAleft0_Y\r
+#define        bTc0_h01_0_Y                                    49              // Derived from bSh01_0 and bIndexAleft0_Y\r
+#define        bTc0_h02_0_Y                                    50              // Derived from bSh02_0 and bIndexAleft0_Y\r
+#define        bTc0_h03_0_Y                                    51              // Derived from bSh03_0 and bIndexAleft0_Y\r
+#define        bTc0_h10_Y                                              52              // Derived from bSh10 and bIndexAinternal_Y\r
+#define        bTc0_h11_Y                                              53              // Derived from bSh11 and bIndexAinternal_Y\r
+#define        bTc0_h12_Y                                              54              // Derived from bSh12 and bIndexAinternal_Y\r
+#define        bTc0_h13_Y                                              55              // Derived from bSh13 and bIndexAinternal_Y\r
+#define        bTc0_h20_Y                                              56              // Derived from bSh20 and bIndexAinternal_Y\r
+#define        bTc0_h21_Y                                              57              // Derived from bSh21 and bIndexAinternal_Y\r
+#define        bTc0_h22_Y                                              58              // Derived from bSh22 and bIndexAinternal_Y\r
+#define        bTc0_h23_Y                                              59              // Derived from bSh23 and bIndexAinternal_Y\r
+#define        bTc0_h30_Y                                              60              // Derived from bSh30 and bIndexAinternal_Y\r
+#define        bTc0_h31_Y                                              61              // Derived from bSh31 and bIndexAinternal_Y\r
+#define        bTc0_h32_Y                                              62              // Derived from bSh32 and bIndexAinternal_Y\r
+#define        bTc0_h33_Y                                              63              // Derived from bSh33 and bIndexAinternal_Y\r
+\r
+// GRF2, \r
+#define        bTc0_v00_0_Cb                                   64              // Derived from bSv00_0 and bIndexAleft0_Cb, 2 pixels per tc0   Left0\r
+#define        bTc0_v10_0_Cb                                   65              // Derived from bSv10_0 and bIndexAleft0_Cb     \r
+#define        bTc0_v20_0_Cb                                   66              // Derived from bSv20_0 and bIndexAleft0_Cb     \r
+#define        bTc0_v30_0_Cb                                   67              // Derived from bSv30_0 and bIndexAleft0_Cb     \r
+#define        bTc0_v02_Cb                                             68              // Derived from bSv02 and bIndexAinternal_Cb    MidVert\r
+#define        bTc0_v12_Cb                                             69              // Derived from bSv12 and bIndexAinternal_Cb    \r
+#define        bTc0_v22_Cb                                             70              // Derived from bSv22 and bIndexAinternal_Cb    \r
+#define        bTc0_v32_Cb                                             71              // Derived from bSv32 and bIndexAinternal_Cb    \r
+#define        bTc0_h00_0_Cb                                   72              // Derived from bSh00_0 and bIndexAleft0_Cb     Top0\r
+#define        bTc0_h01_0_Cb                                   73              // Derived from bSh01_0 and bIndexAleft0_Cb     \r
+#define        bTc0_h02_0_Cb                                   74              // Derived from bSh02_0 and bIndexAleft0_Cb     \r
+#define        bTc0_h03_0_Cb                                   75              // Derived from bSh03_0 and bIndexAleft0_Cb     \r
+#define        bTc0_h20_Cb                                             76              // Derived from bSh20 and bIndexAinternal_Cb    MidHorz\r
+#define        bTc0_h21_Cb                                             77              // Derived from bSh21 and bIndexAinternal_Cb    \r
+#define        bTc0_h22_Cb                                             78              // Derived from bSh22 and bIndexAinternal_Cb    \r
+#define        bTc0_h23_Cb                                             79              // Derived from bSh23 and bIndexAinternal_Cb    \r
+\r
+#define        bAlphaLeft0_Cb                                  80              // Derived from bIndexAleft0_Cb\r
+#define        bBetaLeft0_Cb                                   81              // Derived from bIndexBleft0_Cb\r
+#define        bAlphaTop0_Cb                                   82              // Derived from bIndexAtop0_Cb\r
+#define        bBetaTop0_Cb                                    83              // Derived from bIndexBtop0_Cb\r
+#define        bAlphaInternal_Cb                               84              // Derived from bIndexAinternal_Cb\r
+#define        bBetaInternal_Cb                                85              // Derived from bIndexBinternal_Cb\r
+\r
+#define        ExtBitFlags                                             86              // Extended bit flags, such as disable ILDB bits\r
+\r
+// Shared between Cb and Cr\r
+#define        bTc0_v00_0_Cr                   bTc0_v00_0_Cb   \r
+#define        bTc0_v10_0_Cr               bTc0_v10_0_Cb       \r
+#define        bTc0_v20_0_Cr               bTc0_v20_0_Cb       \r
+#define        bTc0_v30_0_Cr               bTc0_v30_0_Cb       \r
+#define        bTc0_v02_Cr                         bTc0_v02_Cb         \r
+#define        bTc0_v12_Cr                         bTc0_v12_Cb         \r
+#define        bTc0_v22_Cr                         bTc0_v22_Cb         \r
+#define        bTc0_v32_Cr                         bTc0_v32_Cb         \r
+#define        bTc0_h00_0_Cr               bTc0_h00_0_Cb       \r
+#define        bTc0_h01_0_Cr               bTc0_h01_0_Cb       \r
+#define        bTc0_h02_0_Cr               bTc0_h02_0_Cb       \r
+#define        bTc0_h03_0_Cr               bTc0_h03_0_Cb       \r
+#define        bTc0_h20_Cr                         bTc0_h20_Cb         \r
+#define        bTc0_h21_Cr                         bTc0_h21_Cb         \r
+#define        bTc0_h22_Cr                         bTc0_h22_Cb         \r
+#define        bTc0_h23_Cr                         bTc0_h23_Cb         \r
+                                \r
+#define        bAlphaLeft0_Cr              bAlphaLeft0_Cb                 \r
+#define        bBetaLeft0_Cr               bBetaLeft0_Cb                  \r
+#define        bAlphaTop0_Cr               bAlphaTop0_Cb                  \r
+#define        bBetaTop0_Cr                bBetaTop0_Cb                   \r
+#define        bAlphaInternal_Cr           bAlphaInternal_Cb      \r
+#define        bBetaInternal_Cr            bBetaInternal_Cb       \r
+\r
+\r
+#endif\r
+\r
+\r
+//========== End of Child Variables ===============================================================\r
+\r
+#if !defined(COMBINED_KERNEL)\r
+#define ILDB_LABEL(x)  x               // No symbol extension for standalone kernels\r
+#endif\r
+\r
+#endif // !defined(__AVC_ILDB_HEADER__)\r
diff --git a/i965_drv_video/shaders/h264/ildb/AVC_ILDB_Child_Field_UV.asm b/i965_drv_video/shaders/h264/ildb/AVC_ILDB_Child_Field_UV.asm
new file mode 100644 (file)
index 0000000..b0986b5
--- /dev/null
@@ -0,0 +1,9 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+#include "AVC_ILDB_Child_UV.asm"\r
diff --git a/i965_drv_video/shaders/h264/ildb/AVC_ILDB_Child_Field_Y.asm b/i965_drv_video/shaders/h264/ildb/AVC_ILDB_Child_Field_Y.asm
new file mode 100644 (file)
index 0000000..fafd6c0
--- /dev/null
@@ -0,0 +1,9 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+#include "AVC_ILDB_Child_Y.asm"\r
diff --git a/i965_drv_video/shaders/h264/ildb/AVC_ILDB_Child_Mbaff_UV.asm b/i965_drv_video/shaders/h264/ildb/AVC_ILDB_Child_Mbaff_UV.asm
new file mode 100644 (file)
index 0000000..64dd802
--- /dev/null
@@ -0,0 +1,173 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////\r
+// AVC Child Kernel (Vertical and horizontal de-block a 4:2:0 MB UV comp)\r
+//\r
+// First de-block vertical edges from left to right.\r
+// Second de-block horizontal edge from top to bottom.\r
+// \r
+// For 4:2:0, chroma is always de-blocked at 8x8.\r
+// NV12 format allows to filter U and V together.\r
+//\r
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////\r
+#define AVC_ILDB\r
+\r
+.kernel AVC_ILDB_CHILD_MBAFF_UV\r
+#if defined(COMBINED_KERNEL)\r
+ILDB_LABEL(AVC_ILDB_CHILD_UV):\r
+#endif\r
+\r
+#include "setupVPKernel.asm"\r
+#include "AVC_ILDB.inc"\r
+\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0xE997:w\r
+#endif\r
+\r
+       // Setup temp buf used by load and save code\r
+       #define BUF_B           RTempB\r
+       #define BUF_W           RTempW\r
+       #define BUF_D           RTempD\r
+\r
+       // Init local variables\r
+       mul (4)         ORIX_CUR<2>:w           ORIX<0;1,0>:w           16:w    { NoDDClr }             // Expand X addr to bytes, repeat 4 times\r
+       mul (4)         ORIY_CUR<2>:w           ORIY<0;1,0>:w           32:w    { NoDDChk }             // Expand Y addr to bytes, repeat 4 times\r
+\r
+       mov (2)         f0.0<1>:w               0:w\r
+\r
+       mov     (1)             GateWayOffsetC:uw       ORIY:uw                                         // Use row # as Gateway offset\r
+\r
+       //=== Null Kernel ===============================================================\r
+//     jmpi ILDB_LABEL(POST_ILDB_UV)\r
+       //===============================================================================\r
+\r
+       //====================================================================================\r
+       // Assuming the MB control data is laid out in scan line order in a rectangle with width = 16 bytes.\r
+       // Control data has dimension of X x Y = 16 x N bytes, where N = W x H / 16\r
+       // Each MB has 256 bytes of control data\r
+\r
+       // For CRESTLINE, 256 bytes are stored in memory and fetched into GRF.\r
+       // MB_offset = MBsCntX * CurRow + CurCol\r
+       // Byte_offset = MB_offset * (256 << Mbaff_flag),       Mbaff_flag = 0 or 1.\r
+       // Base address of a control data block = (x, y) = (0, y'=y/x), region width is 16 bytes\r
+       // where y' = Byte_offset / 16 = MB_offset * (16 << Mbaff_flag)\r
+       // MBCntrlDataOffsetY holds y'.\r
+\r
+       // For BearLake-C, 64 bytes are stored in memory and dataport expands to 256 bytes.  Need to use a special read command on BL-C.\r
+       // MB_offset = MBsCntX * CurRow + CurCol\r
+       // Byte_offset = MB_offset * (64 << Mbaff_flag),        Mbaff_flag = 0 or 1.\r
+       // MBCntrlDataOffsetY holds globel byte offset.\r
+       \r
+#if !defined(DEV_CL)\r
+       mul (1) CntrlDataOffsetY:ud             MBsCntX:w                               ORIY:w\r
+       add (1) CntrlDataOffsetY:ud             CntrlDataOffsetY:ud             ORIX:w\r
+       mul (1) CntrlDataOffsetY:ud             CntrlDataOffsetY:ud             128:uw\r
+#endif\r
+       //====================================================================================\r
+\r
+       add (1)         ORIX_LEFT:w                     ORIX_LEFT:w                     -4:w\r
+       add (1)         ORIY_TOP:w                      ORIY_TOP:w                      -4:w\r
+\r
+       //=========== Process Top MB ============\r
+    and (1)    BitFields:w             BitFields:w             TopFieldFlag:w          // Reset BotFieldFlag\r
+\r
+       // Build a ramp from 0 to 15\r
+       mov     (16)    RRampW(0)<1>            RampConstC<0;8,1>:ub\r
+       add (8)         RRampW(0,8)<1>          RRampW(0,8)                     8:w                             // RRampW = ramp 15-0\r
+\r
+ILDB_LABEL(RE_ENTRY_UV):       // for bootom field\r
+\r
+       // Load current MB control data\r
+#if defined(DEV_CL)\r
+       #include "load_ILDB_Cntrl_Data_64DW.asm"        // Crestline\r
+#else\r
+       #include "load_ILDB_Cntrl_Data_16DW.asm"        // Cantiga and beyond\r
+#endif\r
+\r
+       // Init addr register for vertical control data\r
+       mov (1)         ECM_AddrReg<1>:w                CNTRL_DATA_BASE:w               // Init ECM_AddrReg\r
+\r
+       // Use free cycles here\r
+       // Check loaded control data\r
+       and.z.f0.1  (16) null<1>:uw     r[ECM_AddrReg, wEdgeCntlMap_IntLeftVert]<16;16,1>:uw    0xFFFF:uw               // Skip ILDB?           \r
+       and.nz.f0.0  (1) null:w         r[ECM_AddrReg, ExtBitFlags]:ub          DISABLE_ILDB_FLAG:w             // Skip ILDB?\r
+\r
+       // Set DualFieldMode for all data read, write and deblocking\r
+       and     (1)     CTemp1_W:uw             r[ECM_AddrReg, BitFlags]:ub             FieldModeAboveMbFlag+FieldModeCurrentMbFlag:uw\r
+\r
+       // Get Vert Edge Pattern (frame vs. field MBs)\r
+       and     (1)     VertEdgePattern:uw              r[ECM_AddrReg, BitFlags]:ub             FieldModeLeftMbFlag+FieldModeCurrentMbFlag:uw\r
+\r
+       (f0.1.all16h)   jmpi    ILDB_LABEL(SKIP_ILDB_UV)                                                // Skip ILDB\r
+       (f0.0)                  jmpi    ILDB_LABEL(SKIP_ILDB_UV)                                                // Skip ILDB\r
+\r
+       // Set DualFieldMode for all data read, write and deblocking\r
+//     and     (1)     CTemp1_W:uw             r[ECM_AddrReg, BitFlags]:ub             FieldModeAboveMbFlag+FieldModeCurrentMbFlag:uw\r
+       cmp.z.f0.0      (1)     null:w  CTemp1_W:uw             ABOVE_FIELD_CUR_FRAME:w\r
+       and (1)         DualFieldMode:w         f0.0:w          0x0001:w\r
+\r
+       #include "load_Cur_UV_8x8T_Mbaff.asm"           // Load transposed data 8x8\r
+       #include "load_Left_UV_2x8T_Mbaff.asm"                          // Load left MB (2x8) UV data from memory if exists\r
+\r
+       #include "Transpose_Cur_UV_8x8.asm"\r
+       #include "Transpose_Left_UV_2x8.asm"\r
+       \r
+\r
+       //---------- Perform vertical ILDB filting on UV ----------\r
+       #include "AVC_ILDB_Filter_Mbaff_UV_v.asm"       \r
+       //---------------------------------------------------------\r
+\r
+       #include "save_Left_UV_8x2T_Mbaff.asm"                          // Write left MB (2x8) Y data to memory if exists\r
+       #include "load_Top_UV_8x2_Mbaff.asm"                            // Load top MB (8x2) Y data from memory if exists\r
+\r
+       #include "Transpose_Cur_UV_8x8.asm"                                     // Transpose a MB for horizontal edge de-blocking \r
+\r
+       //---------- Perform horizontal ILDB filting on UV ----------\r
+       #include "AVC_ILDB_Filter_Mbaff_UV_h.asm"       \r
+       //-----------------------------------------------------------\r
+\r
+       #include "save_Cur_UV_8x8_Mbaff.asm"                            // Write 8x8\r
+       #include "save_Top_UV_8x2_Mbaff.asm"                            // Write top MB (8x2) if not the top row\r
+\r
+       //-----------------------------------------------------------\r
+ILDB_LABEL(SKIP_ILDB_UV):\r
+       \r
+       and.z.f0.0 (1)  null:w          BitFields:w             BotFieldFlag:w\r
+\r
+       //=========== Process Bottom MB ============\r
+    or (1)     BitFields:w     BitFields:w             BotFieldFlag:w  // Set BotFieldFlag to 1\r
+       (f0.0) jmpi             ILDB_LABEL(RE_ENTRY_UV)                                                 // Loop back for bottom deblocking\r
+\r
+       // Fall through to finish\r
+\r
+       //=========== Check write commit of the last write ============\r
+    mov (8)    WritebackResponse(0)<1>         WritebackResponse(0)    \r
+\r
+ILDB_LABEL(POST_ILDB_UV):      \r
+       \r
+       // Send notification thru Gateway to root thread, update chroma Status[CurRow]\r
+       #include "AVC_ILDB_ForwardMsg.asm"\r
+\r
+#if !defined(GW_DCN)           // For non-ILK chipsets\r
+       //child send EOT : Request type = 1\r
+       END_CHILD_THREAD\r
+#endif // !defined(DEV_ILK)\r
+       \r
+       // The thread finishs here\r
+       //------------------------------------------------------------------------------\r
+       \r
+       ////////////////////////////////////////////////////////////////////////////////\r
+       // Include other subrutines being called\r
+       #include "AVC_ILDB_Chroma_Core_Mbaff.asm"\r
+       \r
+#if !defined(COMBINED_KERNEL)          // For standalone kernel only\r
+.end_code\r
+\r
+.end_kernel\r
+#endif\r
diff --git a/i965_drv_video/shaders/h264/ildb/AVC_ILDB_Child_Mbaff_Y.asm b/i965_drv_video/shaders/h264/ildb/AVC_ILDB_Child_Mbaff_Y.asm
new file mode 100644 (file)
index 0000000..b4b1937
--- /dev/null
@@ -0,0 +1,188 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////\r
+// AVC Child Kernel (Vertical and horizontal de-block a 4:2:0 MB Y comp)\r
+//\r
+// First, de-block vertical edges from left to right.\r
+// Second, de-block horizontal edge from top to bottom.\r
+// \r
+//     ***** MBAFF Mode *****\r
+//     This version deblocks top MB first, followed by bottom MB.\r
+//\r
+//     Need variable CurMB     to indicate top MB or bottom MB (CurMB = 0 or 1).  \r
+//     We can use BotFieldFlag in BitFields to represent it.\r
+//\r
+//  Usage:\r
+//     1) Access control data for top \r
+//             CntrlDataOffsetY + CurMB  * Control data block size             (64 DWs for CL, 16 DWs for BLC)\r
+//\r
+//     2) Load frame/field video data based on flags: FieldModeCurrentMbFlag, FieldModeLeftMbFlag, FieldModeaboveMbFlag, \r
+//\r
+//     E.g. \r
+//     if (pCntlData->BitField & FieldModeCurrentMbFlag)\r
+//             cur_y = ORIX_CUR.y + CurMB * 1;                         // Add field vertical offset for bot field MB .\r
+//     else\r
+//             cur_y = ORIX_CUR.y + CurMB * MB_Rows_Y;         // Add bottom MB vertical offset for bot MB\r
+//\r
+//\r
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////\r
+#define AVC_ILDB\r
+\r
+.kernel AVC_ILDB_CHILD_MBAFF_Y\r
+#if defined(COMBINED_KERNEL)\r
+ILDB_LABEL(AVC_ILDB_CHILD_Y):\r
+#endif\r
+\r
+#include "setupVPKernel.asm"\r
+#include "AVC_ILDB.inc"\r
+\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0xE998:w\r
+#endif\r
+\r
+       // Setup temp buf used by load and save code\r
+       #define BUF_B           RTempB                          \r
+       #define BUF_D           RTempD\r
+       \r
+       // Init local variables\r
+       // These coordinates are in progressive fashion\r
+       mul (4)         ORIX_CUR<2>:w           ORIX<0;1,0>:w           16:w    { NoDDClr }             // Expand X addr to bytes, repeat 4 times\r
+       mul (4)         ORIY_CUR<2>:w           ORIY<0;1,0>:w           32:w    { NoDDChk }             // Expand Y addr to bytes, repeat 4 times\r
+\r
+       mov (2)         f0.0<1>:w               0:w\r
+       \r
+       mov     (1)             GateWayOffsetC:uw       ORIY:uw                                         // Use row # as Gateway offset\r
+\r
+       //=== Null Kernel ===============================================================\r
+//     jmpi POST_ILDB\r
+       //===============================================================================\r
+\r
+       //====================================================================================\r
+       // Assuming the MB control data is laid out in scan line order in a rectangle with width = 16 bytes.\r
+       // Control data has dimension of X x Y = 16 x N bytes, where N = W x H / 16\r
+       // Each MB has 256 bytes of control data\r
+\r
+       // For CRESTLINE, 256 bytes are stored in memory and fetched into GRF.\r
+       // MB_offset = MBsCntX * CurRow + CurCol\r
+       // Byte_offset = MB_offset * (256 << Mbaff_flag),       Mbaff_flag = 0 or 1.\r
+       // Base address of a control data block = (x, y) = (0, y'=y/x), region width is 16 bytes\r
+       // where y' = Byte_offset / 16 = MB_offset * (16 << Mbaff_flag)\r
+       // MBCntrlDataOffsetY holds y'.\r
+\r
+       // For BearLake-C, 64 bytes are stored in memory and dataport expands to 256 bytes.  Need to use a special read command on BL-C.\r
+       // MB_offset = MBsCntX * CurRow + CurCol\r
+       // Byte_offset = MB_offset * (64 << Mbaff_flag),        Mbaff_flag = 0 or 1.\r
+       // MBCntrlDataOffsetY holds globel byte offset.\r
+\r
+#if !defined(DEV_CL)   \r
+       mul (1) CntrlDataOffsetY:ud             MBsCntX:w                               ORIY:w\r
+       add (1) CntrlDataOffsetY:ud             CntrlDataOffsetY:ud             ORIX:w\r
+       mul (1) CntrlDataOffsetY:ud             CntrlDataOffsetY:ud             128:uw\r
+#endif\r
+\r
+       //====================================================================================\r
+       \r
+       add (1)         ORIX_LEFT:w                     ORIX_LEFT:w                     -4:w\r
+       add (1)         ORIY_TOP:w                      ORIY_TOP:w                      -4:w\r
+\r
+\r
+       //=========== Process Top MB ============\r
+    and (1)    BitFields:w             BitFields:w             TopFieldFlag:w  // Reset BotFieldFlag\r
+\r
+RE_ENTRY:      // for bootom field\r
+\r
+       // Load current MB control data\r
+#if defined(DEV_CL)\r
+       #include "load_ILDB_Cntrl_Data_64DW.asm"        // Crestline\r
+#else\r
+       #include "load_ILDB_Cntrl_Data_16DW.asm"        // Cantiga and beyond\r
+#endif\r
+\r
+       // Init addr register for vertical control data\r
+       mov (1)         ECM_AddrReg<1>:w        CNTRL_DATA_BASE:w                       // Init edge control map AddrReg\r
+\r
+       // Check loaded control data\r
+       and.z.f0.1  (16) null<1>:uw     r[ECM_AddrReg, wEdgeCntlMap_IntLeftVert]<16;16,1>:uw    0xFFFF:uw               // Skip ILDB?           \r
+       and.nz.f0.0  (1) null:w         r[ECM_AddrReg, ExtBitFlags]:ub          DISABLE_ILDB_FLAG:w             // Skip ILDB?\r
+\r
+       // Use free cycles here\r
+       // Set DualFieldMode for all data read, write and deblocking\r
+       and     (1)     CTemp1_W:uw             r[ECM_AddrReg, BitFlags]:ub             FieldModeAboveMbFlag+FieldModeCurrentMbFlag:uw\r
+\r
+       // Get Vert Edge Pattern (frame vs. field MBs)\r
+       and     (1)     VertEdgePattern:uw              r[ECM_AddrReg, BitFlags]:ub             FieldModeLeftMbFlag+FieldModeCurrentMbFlag:uw\r
+\r
+       (f0.1.all16h)   jmpi    SKIP_ILDB                                               // Skip ILDB\r
+       (f0.0)                  jmpi    SKIP_ILDB                                               // Skip ILDB\r
+\r
+       // Set DualFieldMode for all data read, write and deblocking\r
+//     and     (1)     CTemp1_W:uw             r[ECM_AddrReg, BitFlags]:ub             FieldModeAboveMbFlag+FieldModeCurrentMbFlag:uw\r
+       cmp.z.f0.0      (1)     null:w  CTemp1_W:uw             ABOVE_FIELD_CUR_FRAME:w\r
+       and (1)         DualFieldMode:w         f0.0:w          0x0001:w\r
+\r
+       // Load current MB                              // DDD1\r
+       #include "load_Cur_Y_16x16T_Mbaff.asm"                          // Load cur Y, 16x16, transpose\r
+       #include "load_Left_Y_4x16T_Mbaff.asm"                          // Load left MB (4x16) Y data from memory if exists\r
+\r
+       #include "Transpose_Cur_Y_16x16.asm"\r
+       #include "Transpose_Left_Y_4x16.asm"\r
+\r
+       //---------- Perform vertical ILDB filting on Y----------\r
+       #include "AVC_ILDB_Filter_Mbaff_Y_v.asm"        \r
+       //-------------------------------------------------------\r
+\r
+       #include "save_Left_Y_16x4T_Mbaff.asm"                          // Write left MB (4x16) Y data to memory if exists\r
+       #include "load_Top_Y_16x4_Mbaff.asm"                            // Load top MB (16x4) Y data from memory if exists\r
+       #include "Transpose_Cur_Y_16x16.asm"                            // Transpose a MB for horizontal edge de-blocking \r
+\r
+       //---------- Perform horizontal ILDB filting on Y ----------\r
+       #include "AVC_ILDB_Filter_Mbaff_Y_h.asm"        \r
+       //----------------------------------------------------------\r
+\r
+       #include "save_Cur_Y_16x16_Mbaff.asm"                                   // Write cur MB (16x16)\r
+       #include "save_Top_Y_16x4_Mbaff.asm"                                    // Write top MB (16x4) if not the top row\r
+\r
+SKIP_ILDB:\r
+       //----------------------------------------------------------\r
+       and.z.f0.0 (1)  null:w          BitFields:w             BotFieldFlag:w\r
+\r
+       //=========== Process Bottom MB ============\r
+    or (1)     BitFields:w     BitFields:w             BotFieldFlag:w  // Set BotFieldFlag to 1\r
+       (f0.0) jmpi             RE_ENTRY                                                                // Loop back for bottom deblocking\r
+\r
+       // Fall through to finish\r
+\r
+       //=========== Check write commit of the last write ============\r
+    mov (8)    WritebackResponse(0)<1>         WritebackResponse(0)    \r
+\r
+POST_ILDB:\r
+       \r
+       //---------------------------------------------------------------------------\r
+       // Send notification thru Gateway to root thread, update luma Status[CurRow]\r
+       \r
+       #include "AVC_ILDB_ForwardMsg.asm"      \r
+\r
+#if !defined(GW_DCN)           // For non-ILK chipsets\r
+       //child send EOT : Request type = 1\r
+       END_CHILD_THREAD\r
+#endif // !defined(DEV_ILK)\r
+       \r
+       // The thread finishs here\r
+       //------------------------------------------------------------------------------\r
+       \r
+       \r
+       ////////////////////////////////////////////////////////////////////////////////\r
+       // Include other subrutines being called\r
+       #include "AVC_ILDB_Luma_Core_Mbaff.asm"\r
+\r
+#if !defined(COMBINED_KERNEL)          // For standalone kernel only\r
+.end_code\r
+\r
+.end_kernel\r
+#endif\r
diff --git a/i965_drv_video/shaders/h264/ildb/AVC_ILDB_Child_UV.asm b/i965_drv_video/shaders/h264/ildb/AVC_ILDB_Child_UV.asm
new file mode 100644 (file)
index 0000000..df40ad4
--- /dev/null
@@ -0,0 +1,186 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////\r
+// AVC Child Kernel (Vertical and horizontal de-block a 4:2:0 MB UV comp)\r
+//\r
+// First de-block vertical edges from left to right.\r
+// Second de-block horizontal edge from top to bottom.\r
+// \r
+// For 4:2:0, chroma is always de-blocked at 8x8.\r
+// NV12 format allows to filter U and V together.\r
+//\r
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////\r
+#define AVC_ILDB\r
+\r
+.kernel AVC_ILDB_CHILD_UV\r
+#if defined(COMBINED_KERNEL)\r
+ILDB_LABEL(AVC_ILDB_CHILD_UV):\r
+#endif\r
+\r
+#include "setupVPKernel.asm"\r
+#include "AVC_ILDB.inc"\r
+\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0x9997:w\r
+#endif\r
+\r
+       // Init local variables\r
+       shl (8)         ORIX_CUR<1>:w           ORIX<0;2,1>:w           4:w             // Expand addr to bytes, repeat (x,y) 4 times\r
+\r
+       // Init addr register for vertical control data\r
+       mov (1)         ECM_AddrReg<1>:w                CNTRL_DATA_BASE:w               // Init ECM_AddrReg\r
+\r
+       //=== Null Kernel ===============================================================\r
+//     jmpi ILDB_LABEL(POST_ILDB_UV_UV)\r
+       //===============================================================================\r
+\r
+#if defined(DEV_CL)    \r
+       mov     (1)             acc0.0:w                240:w   \r
+#else\r
+       //====================================================================================\r
+       // For BearLake-C, 64 bytes are stored in memory and dataport expands to 256 bytes.  Need to use a special read command on BL-C.\r
+       // MB_offset = MBsCntX * CurRow + CurCol\r
+       // MBCntrlDataOffsetY = globel_byte_offset = MB_offset * 64\r
+       mul (1) CntrlDataOffsetY:ud             MBsCntX:w                               ORIY:w\r
+       add (1) CntrlDataOffsetY:ud             CntrlDataOffsetY:ud             ORIX:w\r
+\r
+       // Assign to MSGSRC.2:ud for memory access\r
+       // mul (1) CntrlDataOffsetY:ud          CntrlDataOffsetY:ud             64:uw\r
+       mul (1) MSGSRC.2:ud             CntrlDataOffsetY:ud             64:uw\r
+               \r
+       mov     (1)             acc0.0:w                320:w   \r
+#endif\r
+       mac (1)         URBOffsetC:w    ORIY:w                  4:w                             // UV URB entries are right after Y entries             \r
+\r
+\r
+       // Init local variables\r
+//     shl (8)         ORIX_CUR<1>:w           ORIX<0;2,1>:w           4:w             // Expand addr to bytes, repeat (x,y) 4 times\r
+       add (1)         ORIX_LEFT:w                     ORIX_LEFT:w                     -4:w\r
+       add (1)         ORIY_TOP:w                      ORIY_TOP:w                      -4:w\r
+\r
+       // Build a ramp from 0 to 15\r
+       mov     (16)    RRampW(0)<1>            RampConstC<0;8,1>:ub\r
+       add (8)         RRampW(0,8)<1>          RRampW(0,8)                     8:w             // RRampW = ramp 15-0\r
+\r
+       // Load current MB control data\r
+#if defined(DEV_CL)\r
+       #if defined(_APPLE)\r
+               #include "load_ILDB_Cntrl_Data_22DW.asm"        // Crestline for Apple, progressive only\r
+       #else\r
+               #include "load_ILDB_Cntrl_Data_64DW.asm"        // Crestline\r
+       #endif  \r
+#else\r
+       #include "load_ILDB_Cntrl_Data_16DW.asm"        // Cantiga and beyond\r
+#endif\r
+\r
+       // Check loaded control data\r
+       #if defined(_APPLE)\r
+               and.z.f0.1  (8) null<1>:uw      r[ECM_AddrReg, wEdgeCntlMap_IntLeftVert]<8;8,1>:uw              0xFFFF:uw               // Skip ILDB?\r
+               (f0.1) and.z.f0.1 (2) null<1>:uw        r[ECM_AddrReg, wEdgeCntlMapA_ExtTopHorz0]<2;2,1>:uw             0xFFFF:uw               // Skip ILDB?\r
+       #else\r
+               and.z.f0.1  (16) null<1>:uw     r[ECM_AddrReg, wEdgeCntlMap_IntLeftVert]<16;16,1>:uw    0xFFFF:uw               // Skip ILDB?           \r
+       #endif  \r
+               \r
+       and.nz.f0.0  (1) null:w         r[ECM_AddrReg, ExtBitFlags]:ub          DISABLE_ILDB_FLAG:w             // Skip ILDB?\r
+       \r
+       mov     (1)             GateWayOffsetC:uw       ORIY:uw         // Use row # as Gateway offset\r
+\r
+       #if defined(_APPLE)\r
+               (f0.1.all8h)    jmpi    ILDB_LABEL(READ_FOR_URB_UV)                             // Skip ILDB\r
+       #else\r
+               (f0.1.all16h)   jmpi    ILDB_LABEL(READ_FOR_URB_UV)                             // Skip ILDB\r
+       #endif  \r
+\r
+       (f0.0)                  jmpi    ILDB_LABEL(READ_FOR_URB_UV)                                     // Skip ILDB\r
+\r
+\r
+\r
+       #include "load_Cur_UV_8x8T.asm"                         // Load transposed data 8x8\r
+//     #include "load_Left_UV_2x8T.asm"\r
+       #include "load_Top_UV_8x2.asm"                          // Load top MB (8x2) Y data from memory if exists\r
+\r
+       #include "Transpose_Cur_UV_8x8.asm"\r
+//     #include "Transpose_Left_UV_2x8.asm"\r
+\r
+\r
+       //---------- Perform vertical ILDB filting on UV ----------\r
+       #include "AVC_ILDB_Filter_UV_v.asm"     \r
+       //---------------------------------------------------------\r
+\r
+       #include "save_Left_UV_8x2T.asm"                        // Write left MB (2x8) Y data to memory if exists\r
+       #include "Transpose_Cur_UV_8x8.asm"                     // Transpose a MB for horizontal edge de-blocking \r
+\r
+       //---------- Perform horizontal ILDB filting on UV ----------\r
+       #include "AVC_ILDB_Filter_UV_h.asm"     \r
+       //-----------------------------------------------------------\r
+\r
+       #include "save_Cur_UV_8x8.asm"                          // Write 8x8\r
+       #include "save_Top_UV_8x2.asm"                          // Write top MB (8x2) if not the top row\r
+\r
+       //---------- Write right most 4 columns of cur MB to URB ----------\r
+       // Transpose the right most 2 cols 2x8 (word) in GRF to 8x2 in BUF_D.  It is 2 left most cols in cur MB.\r
+       #include "Transpose_Cur_UV_2x8.asm"                                             \r
+               \r
+ILDB_LABEL(WRITE_URB_UV):\r
+       mov (8)         m1<1>:ud                LEFT_TEMP_D(1)<8;8,1>                   // Copy 1 GRF to 1 URB entry (U+V)\r
+       \r
+       #include "writeURB_UV_Child.asm"        \r
+       //-----------------------------------------------------------------\r
+\r
+       //=========== Check write commit of the last write ============\r
+    mov (8)    WritebackResponse(0)<1>         WritebackResponse(0)    \r
+\r
+ILDB_LABEL(POST_ILDB_UV):\r
+       //---------------------------------             \r
+       \r
+       // Send notification thru Gateway to root thread, update chroma Status[CurRow]\r
+       #include "AVC_ILDB_ForwardMsg.asm"\r
+\r
+#if !defined(GW_DCN)           // For non-ILK chipsets\r
+       //child send EOT : Request type = 1\r
+       END_CHILD_THREAD\r
+#endif // !defined(DEV_ILK)\r
+       \r
+       // The thread finishs here\r
+       //------------------------------------------------------------------------------\r
+       \r
+ILDB_LABEL(READ_FOR_URB_UV):\r
+       // Still need to prepare URB data for the right neighbor MB\r
+       #include "load_Cur_UV_Right_Most_2x8.asm"               // Load cur MB ( right most 4x16) Y data from memory\r
+       #include "Transpose_Cur_UV_Right_Most_2x8.asm"                                          \r
+//     jmpi ILDB_LABEL(WRITE_URB_UV)\r
+\r
+       mov (8)         m1<1>:ud                LEFT_TEMP_D(1)<8;8,1>                   // Copy 1 GRF to 1 URB entry (U+V)\r
+       \r
+       #include "writeURB_UV_Child.asm"        \r
+       //-----------------------------------------------------------------\r
+\r
+       // Send notification thru Gateway to root thread, update chroma Status[CurRow]\r
+       #include "AVC_ILDB_ForwardMsg.asm"\r
+\r
+#if !defined(GW_DCN)           // For non-ILK chipsets\r
+       //child send EOT : Request type = 1\r
+       END_CHILD_THREAD\r
+#endif // !defined(DEV_ILK)\r
+       \r
+       // The thread finishs here\r
+       //------------------------------------------------------------------------------\r
+       \r
+       \r
+       ////////////////////////////////////////////////////////////////////////////////\r
+       // Include other subrutines being called\r
+//     #include "AVC_ILDB_Luma_Core.asm"\r
+       #include "AVC_ILDB_Chroma_Core.asm"\r
+\r
+       \r
+#if !defined(COMBINED_KERNEL)          // For standalone kernel only\r
+.end_code\r
+\r
+.end_kernel\r
+#endif\r
diff --git a/i965_drv_video/shaders/h264/ildb/AVC_ILDB_Child_Y.asm b/i965_drv_video/shaders/h264/ildb/AVC_ILDB_Child_Y.asm
new file mode 100644 (file)
index 0000000..bef52d8
--- /dev/null
@@ -0,0 +1,176 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////\r
+// AVC Child Kernel (Vertical and horizontal de-block a 4:2:0 MB Y comp)\r
+//\r
+// First, de-block vertical edges from left to right.\r
+// Second, de-block horizontal edge from top to bottom.\r
+// \r
+// If transform_size_8x8_flag = 1, luma is de-blocked at 8x8.  Otherwise, luma is de-blocked at 4x4.\r
+//\r
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////\r
+#define AVC_ILDB\r
+\r
+.kernel AVC_ILDB_CHILD_Y\r
+#if defined(COMBINED_KERNEL)\r
+ILDB_LABEL(AVC_ILDB_CHILD_Y):\r
+#endif\r
+\r
+#include "setupVPKernel.asm"\r
+#include "AVC_ILDB.inc"\r
+\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0x9998:w\r
+#endif\r
+\r
+       // Init local variables\r
+       shl (8)         ORIX_CUR<1>:w           ORIX<0;2,1>:w           4:w             // Expand addr to bytes, repeat (x,y) 4 times\r
+\r
+       // Init addr register for vertical control data\r
+       mov (1)         ECM_AddrReg<1>:w        CNTRL_DATA_BASE:w                       // Init edge control map AddrReg\r
+\r
+       //=== Null Kernel ===============================================================\r
+//     jmpi ILDB_LABEL(POST_ILDB_Y)\r
+       //===============================================================================\r
+\r
+       mul     (1)             URBOffsetC:uw   ORIY:uw         4:w     \r
+       \r
+#if !defined(DEV_CL)   \r
+       //====================================================================================\r
+       // For BearLake-C, 64 bytes are stored in memory and dataport expands to 256 bytes.  Need to use a special read command on BL-C.\r
+       // MB_offset = MBsCntX * CurRow + CurCol\r
+       // MBCntrlDataOffsetY = globel_byte_offset = MB_offset * 64\r
+       mul (1) CntrlDataOffsetY:ud             MBsCntX:w                               ORIY:w\r
+       add (1) CntrlDataOffsetY:ud             CntrlDataOffsetY:ud             ORIX:w\r
+               \r
+       // Assign to MSGSRC.2:ud for memory access\r
+       // mul (1) CntrlDataOffsetY:ud          CntrlDataOffsetY:ud             64:uw\r
+       mul (1) MSGSRC.2:ud             CntrlDataOffsetY:ud             64:uw           \r
+       \r
+#endif\r
+\r
+       // Load current MB control data\r
+#if defined(DEV_CL) \r
+       #if defined(_APPLE)\r
+               #include "load_ILDB_Cntrl_Data_22DW.asm"        // Crestline for Apple, progressive only\r
+       #else\r
+               #include "load_ILDB_Cntrl_Data_64DW.asm"        // Crestline\r
+       #endif  \r
+#else\r
+       #include "load_ILDB_Cntrl_Data_16DW.asm"        // Cantiga and beyond\r
+#endif\r
+\r
+       // Check loaded control data\r
+       #if defined(_APPLE)\r
+               and.z.f0.1  (8) null<1>:uw      r[ECM_AddrReg, wEdgeCntlMap_IntLeftVert]<8;8,1>:uw              0xFFFF:uw               // Skip ILDB?\r
+               (f0.1) and.z.f0.1 (2) null<1>:uw        r[ECM_AddrReg, wEdgeCntlMapA_ExtTopHorz0]<2;2,1>:uw             0xFFFF:uw               // Skip ILDB?\r
+       #else\r
+               and.z.f0.1  (16) null<1>:uw     r[ECM_AddrReg, wEdgeCntlMap_IntLeftVert]<16;16,1>:uw    0xFFFF:uw               // Skip ILDB?           \r
+       #endif  \r
+\r
+       and.nz.f0.0  (1) null:w         r[ECM_AddrReg, ExtBitFlags]:ub          DISABLE_ILDB_FLAG:w             // Skip ILDB?\r
+\r
+       // Use free cycles here\r
+       add (1)         ORIX_LEFT:w                     ORIX_LEFT:w                     -4:w\r
+//     add (1)         ORIY_TOP:w                      ORIY_TOP:w                      -4:w\r
+       mov     (1)             GateWayOffsetC:uw       ORIY:uw                                         // Use row # as Gateway offset\r
+\r
+       #if defined(_APPLE)\r
+               (f0.1.all8h)    jmpi    ILDB_LABEL(READ_FOR_URB_Y)                              // Skip ILDB\r
+       #else\r
+               (f0.1.all16h)   jmpi    ILDB_LABEL(READ_FOR_URB_Y)                              // Skip ILDB\r
+       #endif\r
+\r
+       (f0.0)                  jmpi    ILDB_LABEL(READ_FOR_URB_Y)                                      // Skip ILDB\r
+\r
+       add (1)         ORIY_TOP:w                      ORIY_TOP:w                      -4:w\r
+\r
+       // Bettr performance is observed if boundary MBs are not checked and skipped.\r
+       \r
+       #include "load_Cur_Y_16x16T.asm"                                // Load cur MB Y, 16x16, transpose\r
+//     #include "load_Left_Y_4x16T.asm"                                // Load left MB (4x16) Y data from memory\r
+       #include "load_Top_Y_16x4.asm"                                  // Load top MB (16x4) Y data from memory\r
+\r
+       #include "Transpose_Cur_Y_16x16.asm"\r
+//     #include "Transpose_Left_Y_4x16.asm"\r
+\r
+       //---------- Perform vertical ILDB filting on Y ---------\r
+       #include "AVC_ILDB_Filter_Y_v.asm"      \r
+       //-------------------------------------------------------\r
+\r
+       #include "save_Left_Y_16x4T.asm"                                // Write left MB (4x16) Y data to memory\r
+       #include "Transpose_Cur_Y_16x16.asm"                    // Transpose a MB for horizontal edge de-blocking \r
+\r
+       //---------- Perform horizontal ILDB filting on Y -------\r
+       #include "AVC_ILDB_Filter_Y_h.asm"      \r
+       //-------------------------------------------------------\r
+\r
+       #include "save_Cur_Y_16x16.asm"                                 // Write cur MB (16x16)\r
+       #include "save_Top_Y_16x4.asm"                                  // Write top MB (16x4)\r
+\r
+       //---------- Write right most 4 columns of cur MB to URB ----------\r
+       // Transpose the right most 4 cols 4x16 in GRF to 16x4 in LEFT_TEMP_B.  It is 4 left most cols in cur MB.       \r
+       #include "Transpose_Cur_Y_4x16.asm"                                             \r
+       \r
+ILDB_LABEL(WRITE_URB_Y):\r
+       // Note: LEFT_TEMP_B(2) = TOP_TEMP_B(0), TOP_TEMP_B must be avail\r
+       mov (16)        m1<1>:ud                LEFT_TEMP_D(2)<8;8,1>           // Copy 2 GRFs to 2 URB entries (Y)\r
+       \r
+       #include "writeURB_Y_Child.asm" \r
+       //-----------------------------------------------------------------\r
+\r
+       //=========== Check write commit of the last write ============\r
+    mov (8)    WritebackResponse(0)<1>         WritebackResponse(0)    \r
+\r
+ILDB_LABEL(POST_ILDB_Y):\r
+       // Send notification thru Gateway to root thread, update luma Status[CurRow]\r
+       #include "AVC_ILDB_ForwardMsg.asm"      \r
+\r
+#if !defined(GW_DCN)           // For non-ILK chipsets\r
+       //child send EOT : Request type = 1\r
+       END_CHILD_THREAD\r
+#endif // !defined(DEV_ILK)\r
+       \r
+       // The thread finishs here\r
+       //------------------------------------------------------------------------------\r
+\r
+ILDB_LABEL(READ_FOR_URB_Y):\r
+       // Still need to prepare URB data for the right neighbor MB\r
+       #include "load_Cur_Y_Right_Most_4x16.asm"               // Load cur MB ( right most 4x16) Y data from memory\r
+       #include "Transpose_Cur_Y_Right_Most_4x16.asm"                                          \r
+//     jmpi ILDB_LABEL(WRITE_URB_Y)\r
+\r
+       // Note: LEFT_TEMP_B(2) = TOP_TEMP_B(0), TOP_TEMP_B must be avail\r
+       mov (16)        m1<1>:ud                LEFT_TEMP_D(2)<8;8,1>           // Copy 2 GRFs to 2 URB entries (Y)\r
+       \r
+       #include "writeURB_Y_Child.asm" \r
+       //-----------------------------------------------------------------\r
+\r
+       // Send notification thru Gateway to root thread, update luma Status[CurRow]\r
+       #include "AVC_ILDB_ForwardMsg.asm"      \r
+\r
+#if !defined(GW_DCN)           // For non-ILK chipsets\r
+       //child send EOT : Request type = 1\r
+       END_CHILD_THREAD\r
+#endif // !defined(DEV_ILK)\r
+       \r
+       // The thread finishs here\r
+       //------------------------------------------------------------------------------\r
+       \r
+       ////////////////////////////////////////////////////////////////////////////////\r
+       // Include other subrutines being called\r
+       #include "AVC_ILDB_Luma_Core.asm"\r
+//     #include "AVC_ILDB_Chroma_Core.asm"\r
+\r
+       \r
+#if !defined(COMBINED_KERNEL)          // For standalone kernel only\r
+.end_code\r
+\r
+.end_kernel\r
+#endif\r
diff --git a/i965_drv_video/shaders/h264/ildb/AVC_ILDB_Chroma_Core.asm b/i965_drv_video/shaders/h264/ildb/AVC_ILDB_Chroma_Core.asm
new file mode 100644 (file)
index 0000000..e33d022
--- /dev/null
@@ -0,0 +1,165 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+#if !defined(__AVC_ILDB_CHROMA_CORE__) // Make sure this file is only included once\r
+#define __AVC_ILDB_CHROMA_CORE__\r
+\r
+////////// AVC ILDB Chroma Core /////////////////////////////////////////////////////////////////////////////////\r
+//\r
+//     This core performs AVC U or V ILDB filtering on one horizontal edge (8 pixels) of a MB.\r
+//     If data is transposed, it can also de-block a vertical edge.\r
+//\r
+//     Bafore calling this subroutine, caller needs to set the following parameters.\r
+//\r
+//     - EdgeCntlMap1                          //      Edge control map A\r
+//     - EdgeCntlMap2                          //      Edge control map B\r
+//     - P_AddrReg                                     //      Src and dest address register for P pixels\r
+//     - Q_AddrReg                                     //      Src and dest address register for Q pixels      \r
+//     - alpha                                         //  alpha corresponding to the edge to be filtered\r
+//     - beta                                          //  beta corresponding to the edge to be filtered\r
+//     - tc0                                           //      tc0  corresponding to the edge to be filtered\r
+//\r
+//     U or V:\r
+//     +----+----+----+----+\r
+//     | P1 | p0 | q0 | q1 |\r
+//     +----+----+----+----+\r
+//\r
+//     p1 = r[P_AddrReg, 0]<16;8,2> \r
+//     p0 = r[P_AddrReg, 16]<16;8,2> \r
+//     q0 = r[Q_AddrReg, 0]<16;8,2>  \r
+//     q1 = r[Q_AddrReg, 16]<16;8,2> \r
+//\r
+/////////////////////////////////////////////////////////////////////////////////////////////////////////////////\r
+\r
+// The region is both src and dest\r
+// P0-P3 and Q0-Q3 should be only used if they have not been modified to new values  \r
+#undef         P1\r
+#undef         P0\r
+#undef         Q0\r
+#undef         Q1\r
+\r
+#define P1             r[P_AddrReg,  0]<16;8,2>:ub\r
+#define P0             r[P_AddrReg, 16]<16;8,2>:ub\r
+#define Q0             r[Q_AddrReg,  0]<16;8,2>:ub\r
+#define Q1             r[Q_AddrReg, 16]<16;8,2>:ub\r
+\r
+// New region as dest\r
+#undef         NewP0\r
+#undef         NewQ0\r
+\r
+#define NewP0  r[P_AddrReg, 16]<2>:ub\r
+#define NewQ0  r[Q_AddrReg,  0]<2>:ub\r
+\r
+// Filter one chroma edge \r
+FILTER_UV:\r
+\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0x1112:w\r
+#endif\r
+       //---------- Derive filterSampleflag in AVC spec, equition (8-469) ----------\r
+       // bS is in MaskA\r
+\r
+       // Src copy of the p1, p0, q0, q1\r
+//     mov (8) p1(0)<1>                r[P_AddrReg, 0]<16;8,2>:ub\r
+//     mov (8) p0(0)<1>                r[P_AddrReg, 16]<16;8,2>:ub\r
+//     mov (8) q0(0)<1>                r[Q_AddrReg, 0]<16;8,2>:ub\r
+//     mov (8) q1(0)<1>                r[Q_AddrReg, 16]<16;8,2>:ub\r
+\r
+//     mov (1) f0.0:uw         MaskA:uw\r
+\r
+       add (8) q0_p0(0)<1>                     Q0              -P0                             // q0-p0\r
+       add (8) TempRow0(0)<1>          P1              -P0                             // p1-p0\r
+       add (8) TempRow1(0)<1>          Q1              -Q0                             // q1-q0\r
+\r
+       // Build FilterSampleFlag\r
+       // abs(q0-p0) < alpha\r
+       (f0.0) cmp.l.f0.0 (16) null:w           (abs)q0_p0(0)                   alpha:w\r
+       // abs(p1-p0) < Beta\r
+       (f0.0) cmp.l.f0.0 (16) null:w           (abs)TempRow0(0)                beta:w\r
+       // abs(q1-q0) < Beta\r
+       (f0.0) cmp.l.f0.0 (16) null:w           (abs)TempRow1(0)                beta:w\r
+\r
+       //-----------------------------------------------------------------------------------------\r
+\r
+       // if \r
+    (f0.0)     if      (8)             UV_ENDIF1\r
+               // For channels whose edge control map1 = 1 ---> perform de-blocking\r
+\r
+//             mov (1)         f0.1:w          MaskB:w         {NoMask}                // Now check for which algorithm to apply\r
+\r
+               (f0.1)  if      (8)             UV_ELSE2\r
+\r
+                       // For channels whose edge control map2 = 1 ---> bS = 4 algorithm \r
+                       // p0' = (2*p1 + p0 + q1 + 2) >> 2\r
+                       // q0' = (2*q1 + q0 + p1 + 2) >> 2\r
+\r
+                       // Optimized version:\r
+                       // A = (p1 + q1 + 2)\r
+                       // p0' = (p0 + p1 + A) >> 2\r
+                       // q0' = (q0 + q1 + A) >> 2\r
+                       //------------------------------------------------------------------------------------\r
+                       \r
+                       // p0' = (2*p1 + p0 + q1 + 2) >> 2\r
+                       add (8) acc0<1>:w               Q1                              2:w\r
+                       mac (8) acc0<1>:w               P1                              2:w\r
+                       add (8) acc0<1>:w               acc0<8;8,1>:w   P0\r
+                       shr.sat (8)     TempRow0B(0)<2>         acc0<8;8,1>:w           2:w\r
+                       \r
+                       // q0' = (2*q1 + q0 + p1 + 2) >> 2\r
+                       add (8) acc0<1>:w               P1                              2:w\r
+                       mac (8) acc0<1>:w               Q1                              2:w\r
+                       add (8) acc0<1>:w               acc0<8;8,1>:w   Q0\r
+                       shr.sat (8)     TempRow1B(0)<2>         acc0<8;8,1>:w           2:w\r
+\r
+                       mov (8) NewP0           TempRow0B(0)                                    // p0'\r
+                       mov (8) NewQ0           TempRow1B(0)                                    // q0'\r
+                       \r
+                       \r
+UV_ELSE2: \r
+               else    (8)             UV_ENDIF2\r
+                       // For channels whose edge control map2 = 0 ---> bS < 4 algorithm\r
+                       \r
+                       // Expand tc0   (tc0 has 4 bytes)\r
+//                     mov (8) tc0_exp(0)<1>   tc0<1;2,0>:ub   {NoMask}                                // tc0_exp = tc0, each tc0 is duplicated 2 times for 2 adjcent pixels   \r
+                       mov (8) acc0<1>:w       tc0<1;2,0>:ub   {NoMask}                                // tc0_exp = tc0, each tc0 is duplicated 2 times for 2 adjcent pixels   \r
+                       \r
+                       // tc_exp = tc0_exp + 1\r
+//                     add (8) tc_exp(0)<1>    tc0_exp(0)              1:w\r
+                       add (8) tc_exp(0)<1>    acc0<8;8,1>:w           1:w\r
+\r
+                       // delta = Clip3(-tc, tc, ((((q0 - p0)<<2) + (p1-q1) + 4) >> 3))\r
+                       // 4 * (q0-p0) + p1 - q1 + 4\r
+                       add (8) acc0<1>:w               P1                      4:w\r
+                       mac (8) acc0<1>:w               q0_p0(0)        4:w     \r
+                       add (8) acc0<1>:w               acc0<8;8,1>:w           -Q1\r
+                       shr (8) TempRow0(0)<1>  acc0<8;8,1>:w           3:w\r
+\r
+                       // tc clip\r
+                       cmp.g.f0.0      (8) null:w              TempRow0(0)             tc_exp(0)                               // Clip if > tc0\r
+                       cmp.l.f0.1      (8) null:w              TempRow0(0)             -tc_exp(0)                              // Clip if < -tc0\r
+                       \r
+                       (f0.0) mov (8) TempRow0(0)<1>                           tc_exp(0)\r
+                       (f0.1) mov (8) TempRow0(0)<1>                           -tc_exp(0)\r
+                       \r
+                       // p0' = Clip1(p0 + delta) = Clip3(0, 0xFF, p0 + delta)\r
+                       add.sat (8)     TempRow1B(0)<2>         P0                      TempRow0(0)                             // p0+delta\r
+               \r
+                       // q0' = Clip1(q0 - delta) = Clip3(0, 0xFF, q0 - delta)\r
+                       add.sat (8)     TempRow0B(0)<2>         Q0                      -TempRow0(0)                    // q0-delta\r
+\r
+                       mov (8) NewP0                           TempRow1B(0)                    // p0'\r
+                       mov (8) NewQ0                           TempRow0B(0)                    // q0'\r
+\r
+               endif\r
+UV_ENDIF2:\r
+UV_ENDIF1:\r
+       endif\r
+\r
+RETURN\r
+\r
+#endif // !defined(__AVC_ILDB_CHROMA_CORE__)\r
diff --git a/i965_drv_video/shaders/h264/ildb/AVC_ILDB_Chroma_Core_Mbaff.asm b/i965_drv_video/shaders/h264/ildb/AVC_ILDB_Chroma_Core_Mbaff.asm
new file mode 100644 (file)
index 0000000..f567d95
--- /dev/null
@@ -0,0 +1,146 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+////////// AVC ILDB Chroma Core Mbaff /////////////////////////////////////////////////////////////////////////////////\r
+//\r
+//     This core performs AVC U or V ILDB filtering on one horizontal edge (8 pixels) of a MB.\r
+//     If data is transposed, it can also de-block a vertical edge.\r
+//\r
+//     Bafore calling this subroutine, caller needs to set the following parameters.\r
+//\r
+//     - EdgeCntlMap1                          //      Edge control map A\r
+//     - EdgeCntlMap2                          //      Edge control map B\r
+//     - P_AddrReg                                     //      Src and dest address register for P pixels\r
+//     - Q_AddrReg                                     //      Src and dest address register for Q pixels      \r
+//     - alpha                                         //  alpha corresponding to the edge to be filtered\r
+//     - beta                                          //  beta corresponding to the edge to be filtered\r
+//     - tc0                                           //      tc0  corresponding to the edge to be filtered\r
+//\r
+//     U or V:\r
+//     +----+----+----+----+\r
+//     | P1 | p0 | q0 | q1 |\r
+//     +----+----+----+----+\r
+//\r
+//     p1 = r[P_AddrReg, 0]<16;8,2> \r
+//     p0 = r[P_AddrReg, 16]<16;8,2> \r
+//     q0 = r[Q_AddrReg, 0]<16;8,2>  \r
+//     q1 = r[Q_AddrReg, 16]<16;8,2> \r
+//\r
+/////////////////////////////////////////////////////////////////////////////////////////////////////////////////\r
+\r
+// The region is both src and dest\r
+// P0-P3 and Q0-Q3 should be only used if they have not been modified to new values  \r
+#undef         P1\r
+#undef         P0\r
+#undef         Q0\r
+#undef         Q1\r
+\r
+#define P1             r[P_AddrReg,  0]<16;8,2>:ub\r
+#define P0             r[P_AddrReg, 16]<16;8,2>:ub\r
+#define Q0             r[Q_AddrReg,  0]<16;8,2>:ub\r
+#define Q1             r[Q_AddrReg, 16]<16;8,2>:ub\r
+\r
+// New region as dest\r
+#undef         NewP0\r
+#undef         NewQ0\r
+\r
+#define NewP0  r[P_AddrReg, 16]<2>:ub\r
+#define NewQ0  r[Q_AddrReg,  0]<2>:ub\r
+\r
+// Filter one chroma edge - mbaff\r
+FILTER_UV_MBAFF:\r
+\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0x1112:w\r
+#endif\r
+       //---------- Derive filterSampleflag in AVC spec, equition (8-469) ----------\r
+\r
+       //===== Assume f0.0 contains MaskA when entering this routine\r
+//     mov (1) f0.0:uw         MaskA:uw\r
+\r
+       add (8) q0_p0(0)<1>                     Q0              -P0                             // q0-p0\r
+       add (8) TempRow0(0)<1>          P1              -P0                             // p1-p0\r
+       add (8) TempRow1(0)<1>          Q1              -Q0                             // q1-q0\r
+\r
+       // Build FilterSampleFlag\r
+       // abs(q0-p0) < alpha\r
+       (f0.0) cmp.l.f0.0 (16) null:w           (abs)q0_p0(0)                   Mbaff_ALPHA(0)\r
+       // abs(p1-p0) < Beta\r
+       (f0.0) cmp.l.f0.0 (16) null:w           (abs)TempRow0(0)                Mbaff_BETA(0)\r
+       // abs(q1-q0) < Beta\r
+       (f0.0) cmp.l.f0.0 (16) null:w           (abs)TempRow1(0)                Mbaff_BETA(0)\r
+\r
+       //-----------------------------------------------------------------------------------------\r
+\r
+       // if \r
+    (f0.0)     if      (8)             MBAFF_UV_ENDIF1\r
+               // For channels whose edge control map1 = 1 ---> perform de-blocking\r
+\r
+//             mov (1)         f0.1:w          MaskB:w         {NoMask}                // Now check for which algorithm to apply\r
+\r
+               (f0.1)  if      (8)             MBAFF_UV_ELSE2\r
+\r
+                       // For channels whose edge control map2 = 1 ---> bS = 4 algorithm \r
+                       // p0' = (2*p1 + P0 + q1 + 2) >> 2\r
+                       // q0' = (2*q1 + q0 + p1 + 2) >> 2\r
+                       //------------------------------------------------------------------------------------\r
+\r
+                       // p0' = (2*p1 + p0 + q1 + 2) >> 2\r
+                       add (8) acc0<1>:w               Q1                              2:w\r
+                       mac (8) acc0<1>:w               P1                              2:w\r
+                       add (8) acc0<1>:w               acc0<8;8,1>:w   P0\r
+                       shr.sat (8)     TempRow0B(0)<2>         acc0<8;8,1>:w           2:w\r
+\r
+                       // q0' = (2*q1 + q0 + p1 + 2) >> 2\r
+                       add (8) acc0<1>:w               P1                              2:w\r
+                       mac (8) acc0<1>:w               Q1                              2:w\r
+                       add (8) acc0<1>:w               acc0<8;8,1>:w   Q0\r
+                       shr.sat (8)     TempRow1B(0)<2>         acc0<8;8,1>:w           2:w\r
+\r
+                       mov (8) NewP0           TempRow0B(0)                                    // p0'\r
+                       mov (8) NewQ0           TempRow1B(0)                                    // q0'\r
+                       \r
+MBAFF_UV_ELSE2: \r
+               else    (8)             MBAFF_UV_ENDIF2\r
+                       // For channels whose edge control map2 = 0 ---> bS < 4 algorithm\r
+                       \r
+                       // tc_exp = tc0_exp + 1\r
+                       add (8) tc_exp(0)<1>    Mbaff_TC0(0)            1:w\r
+\r
+                       // delta = Clip3(-tc, tc, ((((q0 - p0)<<2) + (p1-q1) + 4) >> 3))\r
+                       // 4 * (q0-p0) + p1 - q1 + 4\r
+                       add (8) acc0<1>:w               P1                      4:w\r
+                       mac (8) acc0<1>:w               q0_p0(0)        4:w     \r
+                       add (8) acc0<1>:w               acc0<8;8,1>:w           -Q1\r
+                       shr (8) TempRow0(0)<1>  acc0<8;8,1>:w           3:w\r
+\r
+                       // tc clip\r
+                       cmp.g.f0.0      (8) null:w              TempRow0(0)             tc_exp(0)                               // Clip if > tc0\r
+                       cmp.l.f0.1      (8) null:w              TempRow0(0)             -tc_exp(0)                              // Clip if < -tc0\r
+                       \r
+                       (f0.0) mov (8) TempRow0(0)<1>                           tc_exp(0)\r
+                       (f0.1) mov (8) TempRow0(0)<1>                           -tc_exp(0)\r
+                       \r
+                       // p0' = Clip1(p0 + delta) = Clip3(0, 0xFF, p0 + delta)\r
+                       add.sat (8)     TempRow1B(0)<2>         P0                      TempRow0(0)                             // p0+delta\r
+               \r
+                       // q0' = Clip1(q0 - delta) = Clip3(0, 0xFF, q0 - delta)\r
+                       add.sat (8)     TempRow0B(0)<2>         Q0                      -TempRow0(0)                    // q0-delta\r
+\r
+                       mov (8) NewP0                           TempRow1B(0)                    // p0'\r
+                       mov (8) NewQ0                           TempRow0B(0)                    // q0'\r
+\r
+               endif\r
+MBAFF_UV_ENDIF2:\r
+MBAFF_UV_ENDIF1:\r
+       endif\r
+\r
+RETURN\r
+\r
+\r
+\r
diff --git a/i965_drv_video/shaders/h264/ildb/AVC_ILDB_CloseGateway.asm b/i965_drv_video/shaders/h264/ildb/AVC_ILDB_CloseGateway.asm
new file mode 100644 (file)
index 0000000..e522ce4
--- /dev/null
@@ -0,0 +1,22 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+//----- Close a Message Gateway -----\r
+\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignature:b                        0x4444:w\r
+#endif\r
+\r
+// Message descriptor\r
+// bit 31      EOD\r
+// 27:24       FFID = 0x0011 for msg gateway\r
+// 23:20       msg length = 1 MRF\r
+// 19:16       Response length = 0\r
+// 1:0         SubFuncID = 01 for CloseGateway\r
+// Message descriptor: 0 000 0011 0001 0000 + 0 0 000000000000 01 ==> 0000 0011 0001 0000 0000 0000 0000 0001\r
+send (8)       null:ud         m7        r0.0:ud    MSG_GW     CGWMSGDSC\r
diff --git a/i965_drv_video/shaders/h264/ildb/AVC_ILDB_Dep_Check.asm b/i965_drv_video/shaders/h264/ildb/AVC_ILDB_Dep_Check.asm
new file mode 100644 (file)
index 0000000..70f8a55
--- /dev/null
@@ -0,0 +1,186 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+//---------- Check dependency and spawn all MBs ----------\r
+\r
+// Launch the 1st round of child threads for Vertical ILDB\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignature:w                        0x3333:w\r
+#endif\r
+\r
+//=====================================================================\r
+// Jump Table 1\r
+       // 0 0 ---> Goto ALL_SPAWNED\r
+       // 0 1 ---> Goto ALL_SPAWNED\r
+       // 1 0 ---> Goto SLEEP_ENTRY\r
+       // 1 1 ---> Goto POST_SLEEP\r
+       mov (2)         JumpTable.0<1>:d        0:d                             { NoDDClr }\r
+#if defined(CHROMA_ROOT)               \r
+       mov (1)         JumpTable.2:d   SLEEP_ENTRY_UV_ILDB_FRAME_IP-ALL_SPAWNED_UV_ILDB_FRAME_IP:d             { NoDDClr, NoDDChk }\r
+       mov (1)         JumpTable.3:d   POST_SLEEP_UV_ILDB_FRAME_IP-ALL_SPAWNED_UV_ILDB_FRAME_IP:d              { NoDDChk }\r
+#else\r
+       mov (1)         JumpTable.2:d   SLEEP_ENTRY_Y_ILDB_FRAME_IP-ALL_SPAWNED_Y_ILDB_FRAME_IP:d               { NoDDClr, NoDDChk }\r
+       mov (1)         JumpTable.3:d   POST_SLEEP_Y_ILDB_FRAME_IP-ALL_SPAWNED_Y_ILDB_FRAME_IP:d                { NoDDChk }\r
+#endif\r
+//=====================================================================\r
+\r
+       mov (2)         f0.0<1>:w               0:w\r
+\r
+       // Get m0 most of fields ready for URB write\r
+       mov     (8)                     MRF0<1>:ud              MSGSRC.0<8;8,1>:ud\r
+\r
+       // Add child kernel offset\r
+       add (1)         CT_R0Hdr.2:ud                   r0.2:ud                 CHILD_OFFSET:w\r
+\r
+       // Init\r
+       mov (1)         Col_Boundary:w                  2:w\r
+       mov (1)         Row_Boundary:w                  LastRow:w\r
+       mov (1)         TopRowForScan:w                 0:w\r
+       mov (2)         OutstandingThreads<1>:w 0:w\r
+\r
+       // Init Scoreboard  (idle = 0x00FF, busy = 0x0000)\r
+       // Low word is saved col.  High word is busy/idle status\r
+       mov     (16)            GatewayAperture(0)<1>   0x00FF00FF:ud           // Init r6-r7\r
+       mov     (16)            GatewayAperture(2)<1>   0x00FF00FF:ud           // Init r8-r9\r
+       mov     (16)            GatewayAperture(4)<1>   0x00FF00FF:ud           // Init r10-r11\r
+       mov     (16)            GatewayAperture(6)<1>   0x00FF00FF:ud           // Init r12-r13\r
+       mov     (16)            GatewayAperture(8)<1>   0x00FF00FF:ud           // Init r14-r15\r
+\r
+       mul     (1)                     StatusAddr:w            CurRow:w                4:w             // dword to bytes offset conversion\r
+\r
+       //=====================================================================\r
+\r
+//SPAWN_LOOP:\r
+       //===== OutstandingThreads < ThreadLimit ? ============================\r
+       cmp.l.f0.1 (1)  null:w          OutstandingThreads:w    ThreadLimit:w           // Check the thread limit\r
+#if defined(CHROMA_ROOT) \r
+    (f0.1) jmpi                ILDB_LABEL(POST_SLEEP_UV)\r
+#else  // LUMA_ROOT\r
+    (f0.1) jmpi                ILDB_LABEL(POST_SLEEP_Y)\r
+#endif\r
+\r
+#if defined(CHROMA_ROOT) \r
+ILDB_LABEL(SLEEP_ENTRY_UV):\r
+#else  // LUMA_ROOT\r
+ILDB_LABEL(SLEEP_ENTRY_Y):\r
+#endif\r
+    //===== Goto Sleep ====================================================\r
+    // Either reached max thread limit or no child thread can be spawned due to dependency.\r
+       add     (1)                     OutstandingThreads:w    OutstandingThreads:w    -1:w // Do this before wait is faster\r
+       wait                    n0.0:d                                                                                          \r
+\r
+#if defined(CHROMA_ROOT) \r
+ILDB_LABEL(POST_SLEEP_UV):\r
+#else  // LUMA_ROOT\r
+ILDB_LABEL(POST_SLEEP_Y):\r
+#endif\r
+       //===== Luma Status[CurRow] == busy ? =====\r
+       cmp.z.f0.0 (1)  null:uw         r[StatusAddr, GatewayApertureB+ScoreBd_Idx]:uw          0:uw                    // Check west neighbor\r
+       cmp.g.f0.1 (1)  null:w          CurCol:w                LastCol:w               // Check if the curCol > LastCol\r
+\r
+#if defined(CHROMA_ROOT) \r
+       mov     (16)            acc0.0<1>:w             URBOffsetUVBase<0;1,0>:w                        // Add offset to UV base (MBsCntY * URB_EBTRIES_PER_MB)\r
+       mac (1)                 URBOffset:w             CurRow:w                        4:w                             // 4 entries per row\r
+#else\r
+       mul     (1)                     URBOffset:w             CurRow:w                        4:w                             // 4 entries per row\r
+#endif\r
+\r
+#if defined(CHROMA_ROOT) \r
+       (f0.0) jmpi             ILDB_LABEL(SLEEP_ENTRY_UV)                                                              // Current row has a child thread running, can not spawn a new child thread, go back to sleep\r
+       (f0.1) jmpi             ILDB_LABEL(NEXT_MB_UV)                                                                  // skip MB if the curCol > LastCol \r
+#else  // LUMA_ROOT\r
+       (f0.0) jmpi             ILDB_LABEL(SLEEP_ENTRY_Y)                                                               // Current row has a child thread running, can not spawn a new child thread, go back to sleep\r
+       (f0.1) jmpi             ILDB_LABEL(NEXT_MB_Y)                                                                   // skip MB if the curCol > LastCol \r
+#endif\r
+               \r
+       //========== Spwan a child thread ========================================\r
+       // Save cur col and set Status[CurRow] to busy\r
+       mov (2)                 r[StatusAddr, GatewayApertureB]<1>:uw           CurColB<2;2,1>:ub               // Store the new col\r
+                       \r
+       // Increase OutstandingThreads and ProcessedMBs by 1\r
+       add     (2)                     OutstandingThreads<1>:w         OutstandingThreads<2;2,1>:w             1:w  \r
+\r
+       #include "AVC_ILDB_SpawnChild.asm"\r
+\r
+       //===== Find next MB ===================================================\r
+#if defined(CHROMA_ROOT) \r
+ILDB_LABEL(NEXT_MB_UV):\r
+#else  // LUMA_ROOT\r
+ILDB_LABEL(NEXT_MB_Y):\r
+#endif\r
+       // Check pic boundary, results are in f0.0 bit0 and bit1\r
+       cmp.ge.f0.0     (2)     null<1>:w   CurCol<2;2,1>:w     Col_Boundary<2;2,1>:w\r
+\r
+       // Update TopRowForScan if the curCol = LastCol\r
+       (f0.1) add (1)  TopRowForScan:w         CurRow:w                1:w     \r
+\r
+//     cmp.l.f0.1 (1)  null<1>:w               ProcessedMBs:w          TotalBlocks:w           // Processed all blocks ?\r
+       // 2 sets compare\r
+       // ProcessedMBs:w < TotalBlocks:w               OutstandingThreads:w < ThreadLimit:wProcessedMBs:w\r
+       // 0 0 ---> Goto ALL_SPAWNED\r
+       // 0 1 ---> Goto ALL_SPAWNED\r
+       // 1 0 ---> Goto SLEEP_ENTRY\r
+       // 1 1 ---> Goto POST_SLEEP\r
+       cmp.l.f0.1 (2)  null<1>:w               OutstandingThreads<2;2,1>:w     ThreadLimit<2;2,1>:w\r
+\r
+       // Just do it in stalled cycles\r
+       mov (1)         acc0.0:w                4:w\r
+       mac     (1)             StatusAddr:w            CurRow:w                4:w                                             // dword to bytes offset conversion     \r
+       add (2)         CurCol<1>:w             CurCol<2;2,1>:w         StepToNextMB<2;2,1>:b   // CurCol -= 2 and CurRow += 1\r
+               \r
+       // Set f0.0 if turning around is needed, assuming bit 15 - 2 are zeros for correct comparison.\r
+       cmp.nz.f0.0 (1) null<1>:w       f0.0:w          0x01:w\r
+               \r
+       mul (1)         JumpAddr:w              f0.1:w          4:w             // byte offet in dword count\r
+               \r
+       // The next MB is at the row TopRowForScan\r
+       (f0.0) mul (1)          StatusAddr:w    TopRowForScan:w         4:w                             // dword to bytes offset conversion\r
+       (f0.0) mov (1)          CurRow:w                TopRowForScan:w                                                         { NoDDClr }     // Restart from the top row that has MBs not deblocked yet.\r
+       (f0.0) add (1)          CurCol:w                r[StatusAddr, GatewayApertureB]:uw              1:w             { NoDDChk }\r
+       \r
+       //===== Processed all blocks ? =========================================\r
+       // (f0.1) jmpi          SPAWN_LOOP\r
+\r
+       jmpi    r[JumpAddr, JUMPTABLE_BASE]:d\r
+//JUMP_BASE:\r
+\r
+       //======================================================================\r
+\r
+       // All MB are spawned at this point, check for outstanding thread count\r
+#if defined(CHROMA_ROOT) \r
+ILDB_LABEL(ALL_SPAWNED_UV):\r
+#else  // LUMA_ROOT\r
+ILDB_LABEL(ALL_SPAWNED_Y):\r
+#endif\r
+       cmp.e.f0.1 (1)  null:w          OutstandingThreads:w            0:w                     // Check before goto sleep\r
+#if defined(CHROMA_ROOT) \r
+       (f0.1) jmpi             ILDB_LABEL(ALL_DONE_UV)\r
+#else  // LUMA_ROOT\r
+       (f0.1) jmpi             ILDB_LABEL(ALL_DONE_Y)\r
+#endif\r
+       \r
+       wait                    n0.0:d                                                                                          // Wake up by a finished child thread\r
+       add     (1)                     OutstandingThreads:w    OutstandingThreads:w    -1:w\r
+\r
+#if defined(CHROMA_ROOT) \r
+       // One thread is free and give it to luma thread limit --- Increase luma thread limit by one.\r
+       #include "AVC_ILDB_LumaThrdLimit.asm"\r
+#endif\r
+\r
+#if defined(CHROMA_ROOT) \r
+    jmpi                       ILDB_LABEL(ALL_SPAWNED_UV)                                                      // Waked up and goto dependency check\r
+#else  // LUMA_ROOT\r
+    jmpi                       ILDB_LABEL(ALL_SPAWNED_Y)                                                       // Waked up and goto dependency check\r
+#endif\r
+\r
+       // All child threads are finsihed at this point \r
+#if defined(CHROMA_ROOT) \r
+ILDB_LABEL(ALL_DONE_UV):\r
+#else  // LUMA_ROOT\r
+ILDB_LABEL(ALL_DONE_Y):\r
+#endif\r
diff --git a/i965_drv_video/shaders/h264/ildb/AVC_ILDB_Filter_Mbaff_UV_h.asm b/i965_drv_video/shaders/h264/ildb/AVC_ILDB_Filter_Mbaff_UV_h.asm
new file mode 100644 (file)
index 0000000..ff807d5
--- /dev/null
@@ -0,0 +1,223 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+////////// AVC ILDB filter horizontal Mbaff UV ///////////////////////////////////////////////////////\r
+//\r
+//     This filter code prepares the src data and control data for ILDB filtering on all horizontal edges of UV.\r
+//\r
+//     It sssumes the data for horizontal de-blocking is already transposed.  \r
+//\r
+//             Chroma:\r
+//\r
+//             +-------+-------+               H0 Edge\r
+//             |               |               |\r
+//             |               |               |\r
+//             |               |               |\r
+//             +-------+-------+               H1 Edge\r
+//             |               |               |\r
+//             |               |               |\r
+//             |               |               |\r
+//             +-------+-------+\r
+//\r
+/////////////////////////////////////////////////////////////////////////////\r
+\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0xBBBC:w\r
+#endif \r
+\r
+//=============== Chroma deblocking ================\r
+\r
+//---------- Deblock UV external top edge ----------\r
+\r
+       and.z.f0.0  (1) null:w          r[ECM_AddrReg, BitFlags]:ub             FilterTopMbEdgeFlag:w           // Check for FilterTopMbEdgeFlag \r
+\r
+       mov     (1)     f0.1:w          DualFieldMode:w         // Check for dual field mode\r
+\r
+       // Get Luma maskA and maskB     \r
+       shr (16)        TempRow0(0)<1>          r[ECM_AddrReg, wEdgeCntlMapA_ExtTopHorz0]<0;1,0>:uw             RRampW(0)\r
+       shr (16)        TempRow1(0)<1>          r[ECM_AddrReg, wEdgeCntlMapB_ExtTopHorz0]<0;1,0>:uw             RRampW(0)\r
+\r
+    (f0.0)     jmpi    H0_UV_DONE                              // Skip H0 UV edge\r
+\r
+       (f0.1) jmpi DUAL_FIELD_UV\r
+\r
+       // Non dual field mode  \r
+\r
+       // Extract UV MaskA and MaskB from every other bit of Y masks\r
+       and.nz.f0.0 (8) null:w                  TempRow0(0)<16;8,2>             1:w\r
+       and.nz.f0.1 (8) null:w                  TempRow1(0)<16;8,2>             1:w\r
+\r
+       // Ext U\r
+       //      p1 = Prev MB U row 0\r
+       //      p0 = Prev MB U row 1\r
+       //      q0 = Cur MB U row 0\r
+       //      q1 = Cur MB U row 1\r
+       mov (1) P_AddrReg:w             PREV_MB_U_BASE:w        { NoDDClr }\r
+       mov (1) Q_AddrReg:w             SRC_MB_U_BASE:w         { NoDDChk }\r
+       \r
+       mov     (8) Mbaff_ALPHA(0,0)<1>         r[ECM_AddrReg, bAlphaTop0_Cb]<0;1,0>:ub\r
+       mov     (8) Mbaff_BETA(0,0)<1>          r[ECM_AddrReg, bBetaTop0_Cb]<0;1,0>:ub\r
+       mov (8) Mbaff_TC0(0,0)<1>               r[ECM_AddrReg, bTc0_h00_0_Cb]<1;2,0>:ub\r
+\r
+       // Store UV MaskA and MaskB\r
+       mov (2)         MaskA<1>:uw                     f0.0<2;2,1>:uw\r
+\r
+       CALL(FILTER_UV_MBAFF, 1)        \r
+\r
+       // Ext V\r
+       mov (1) P_AddrReg:w             PREV_MB_V_BASE:w        { NoDDClr }\r
+       mov (1) Q_AddrReg:w             SRC_MB_V_BASE:w         { NoDDChk }\r
+\r
+       mov     (8) Mbaff_ALPHA(0,0)<1>         r[ECM_AddrReg, bAlphaTop0_Cr]<0;1,0>:ub\r
+       mov     (8) Mbaff_BETA(0,0)<1>          r[ECM_AddrReg, bBetaTop0_Cr]<0;1,0>:ub\r
+       mov (8) Mbaff_TC0(0,0)<1>               r[ECM_AddrReg, bTc0_h00_0_Cr]<1;2,0>:ub\r
+\r
+       // Set UV MaskA and MaskB\r
+       mov (2)         f0.0<1>:uw              MaskA<2;2,1>:uw\r
+\r
+       CALL(FILTER_UV_MBAFF, 1)        \r
+\r
+       jmpi H0_UV_DONE \r
+       \r
+DUAL_FIELD_UV:\r
+       // Dual field mode, FieldModeCurrentMbFlag=0 && FieldModeAboveMbFlag=1\r
+\r
+       //===== Ext U, Top field\r
+\r
+       // Extract UV MaskA and MaskB from every other bit of Y masks\r
+       and.nz.f0.0 (8) null:w                  TempRow0(0)<16;8,2>             1:w\r
+       and.nz.f0.1 (8) null:w                  TempRow1(0)<16;8,2>             1:w\r
+\r
+       mov (1) P_AddrReg:w             ABOVE_CUR_MB_BASE:w                     { NoDDClr }\r
+       mov (1) Q_AddrReg:w             ABOVE_CUR_MB_BASE+32:w          { NoDDChk }\r
+\r
+       mov (16) ABOVE_CUR_MB_UW(0)<1>  PREV_MB_UW(0, 0)<16;8,1>        // Copy p1, p0\r
+       mov (16) ABOVE_CUR_MB_UW(1)<1>  SRC_UW(0, 0)<16;8,1>            // Copy q1, q0\r
+\r
+       //===== Ext U, top field\r
+       mov     (8) Mbaff_ALPHA(0,0)<1>         r[ECM_AddrReg, bAlphaTop0_Cb]<0;1,0>:ub\r
+       mov     (8) Mbaff_BETA(0,0)<1>          r[ECM_AddrReg, bBetaTop0_Cb]<0;1,0>:ub\r
+       mov (8) Mbaff_TC0(0,0)<1>               r[ECM_AddrReg, bTc0_h00_0_Cb]<1;2,0>:ub\r
+\r
+       // Store UV MaskA and MaskB\r
+       mov (2)         MaskA<1>:uw                     f0.0<2;2,1>:uw\r
+\r
+       CALL(FILTER_UV_MBAFF, 1)        // Ext U, top field\r
+\r
+       //===== Ext V, top field\r
+       mov (1) P_AddrReg:w             ABOVE_CUR_MB_BASE+1:w           { NoDDClr }\r
+       mov (1) Q_AddrReg:w             ABOVE_CUR_MB_BASE+33:w          { NoDDChk }\r
+\r
+       mov     (8) Mbaff_ALPHA(0,0)<1>         r[ECM_AddrReg, bAlphaTop0_Cr]<0;1,0>:ub\r
+       mov     (8) Mbaff_BETA(0,0)<1>          r[ECM_AddrReg, bBetaTop0_Cr]<0;1,0>:ub\r
+       mov (8) Mbaff_TC0(0,0)<1>               r[ECM_AddrReg, bTc0_h00_0_Cr]<1;2,0>:ub\r
+\r
+       // Set UV MaskA and MaskB\r
+       mov (2)         f0.0<1>:uw              MaskA<2;2,1>:uw\r
+\r
+       CALL(FILTER_UV_MBAFF, 1)        // Ext U, top field\r
+\r
+       // Prefetch for bottom field\r
+       // Get bot field Luma maskA and maskB   \r
+       shr (16)        TempRow0(0)<1>          r[ECM_AddrReg, wEdgeCntlMapA_ExtTopHorz1]<0;1,0>:uw             RRampW(0)\r
+       shr (16)        TempRow1(0)<1>          r[ECM_AddrReg, wEdgeCntlMapB_ExtTopHorz1]<0;1,0>:uw             RRampW(0)\r
+\r
+       // Save deblocked top field rows\r
+       mov (8) PREV_MB_UW(1, 0)<1>             ABOVE_CUR_MB_UW(0, 8)   // Copy p0\r
+       mov (8) SRC_UW(0, 0)<1>                 ABOVE_CUR_MB_UW(1, 0)   // Copy q0\r
+       //==========================================================================\r
+\r
+       //===== Ext U, Bot field \r
+       \r
+       // Extract UV MaskA and MaskB from every other bit of Y masks\r
+       and.nz.f0.0 (8) null:w                  TempRow0(0)<16;8,2>             1:w\r
+       and.nz.f0.1 (8) null:w                  TempRow1(0)<16;8,2>             1:w\r
+\r
+       mov (1) P_AddrReg:w             ABOVE_CUR_MB_BASE:w                     { NoDDClr }\r
+       mov (1) Q_AddrReg:w             ABOVE_CUR_MB_BASE+32:w          { NoDDChk }\r
+\r
+       mov (16) ABOVE_CUR_MB_UW(0)<1>  PREV_MB_UW(0, 8)<16;8,1>        // Copy p1, p0\r
+       mov (16) ABOVE_CUR_MB_UW(1)<1>  SRC_UW(0, 8)<16;8,1>            // Copy q1, q0\r
+\r
+       //===== Ext U, bottom field\r
+       mov     (8) Mbaff_ALPHA(0,0)<1>         r[ECM_AddrReg, bAlphaTop1_Cb]<0;1,0>:ub\r
+       mov     (8) Mbaff_BETA(0,0)<1>          r[ECM_AddrReg, bBetaTop1_Cb]<0;1,0>:ub\r
+       mov (8) Mbaff_TC0(0,0)<1>               r[ECM_AddrReg, bTc0_h00_1_Cb]<1;2,0>:ub\r
+\r
+       // Store UV MaskA and MaskB\r
+       mov (2)         MaskA<1>:uw                     f0.0<2;2,1>:uw\r
+\r
+       CALL(FILTER_UV_MBAFF, 1)        // Ext U, bottom field\r
+\r
+       //===== Ext V, bot field\r
+       mov (1) P_AddrReg:w             ABOVE_CUR_MB_BASE+1:w           { NoDDClr }\r
+       mov (1) Q_AddrReg:w             ABOVE_CUR_MB_BASE+33:w          { NoDDChk }\r
+\r
+       mov     (8) Mbaff_ALPHA(0,0)<1>         r[ECM_AddrReg, bAlphaTop1_Cr]<0;1,0>:ub\r
+       mov     (8) Mbaff_BETA(0,0)<1>          r[ECM_AddrReg, bBetaTop1_Cr]<0;1,0>:ub\r
+       mov (8) Mbaff_TC0(0,0)<1>               r[ECM_AddrReg, bTc0_h00_1_Cr]<1;2,0>:ub\r
+\r
+       // Set UV MaskA and MaskB\r
+       mov (2)         f0.0<1>:uw              MaskA<2;2,1>:uw\r
+\r
+       CALL(FILTER_UV_MBAFF, 1)        // Ext V, bottom field\r
+       \r
+       // Save deblocked bot field rows\r
+       mov (8) PREV_MB_UW(1, 8)<1>             ABOVE_CUR_MB_UW(0, 8)   // Copy p0\r
+       mov (8) SRC_UW(0, 8)<1>                 ABOVE_CUR_MB_UW(1, 0)   // Copy q0\r
+       //========================================\r
+\r
+H0_UV_DONE:\r
+\r
+//---------- Deblock U internal horz middle edge ----------\r
+\r
+       //***** Need to take every other bit to form U maskA in core\r
+       shr (16)        TempRow0(0)<1>          r[ECM_AddrReg, wEdgeCntlMap_IntMidHorz]<0;1,0>:uw               RRampW(0)\r
+\r
+       //      p1 = Cur MB U row 2\r
+       //      p0 = Cur MB U row 3\r
+       //      q0 = Cur MB U row 4\r
+       //      q1 = Cur MB U row 5\r
+       mov (1) P_AddrReg:w             4*UV_ROW_WIDTH+SRC_MB_U_BASE:w          { NoDDClr }\r
+       mov (1) Q_AddrReg:w             8*UV_ROW_WIDTH+SRC_MB_U_BASE:w          { NoDDChk }\r
+\r
+       mov     (8) Mbaff_ALPHA(0,0)<1>         r[ECM_AddrReg, bAlphaInternal_Cb]<0;1,0>:ub\r
+       mov     (8) Mbaff_BETA(0,0)<1>          r[ECM_AddrReg, bBetaInternal_Cb]<0;1,0>:ub\r
+       mov (8) Mbaff_TC0(0,0)<1>               r[ECM_AddrReg, bTc0_h20_Cb]<1;2,0>:ub\r
+\r
+       and.nz.f0.0 (8) null:w                  TempRow0(0)<16;8,2>             1:w\r
+\r
+       // Store UV MaskA and MaskB\r
+       mov (1) f0.1:uw         0:w\r
+       mov (1) MaskB:uw        0:w                     { NoDDClr }\r
+       mov (1) MaskA:uw        f0.0:uw         { NoDDChk }\r
+\r
+       CALL(FILTER_UV_MBAFF, 1)        \r
+//-----------------------------------------------\r
+\r
+\r
+//---------- Deblock V internal horz middle edge ----------\r
+\r
+       //      p1 = Cur MB V row 2\r
+       //      p0 = Cur MB V row 3\r
+       //      q0 = Cur MB V row 4\r
+       //      q1 = Cur MB V row 5\r
+       mov (1) P_AddrReg:w             4*UV_ROW_WIDTH+SRC_MB_V_BASE:w          { NoDDClr }\r
+       mov (1) Q_AddrReg:w             8*UV_ROW_WIDTH+SRC_MB_V_BASE:w          { NoDDChk }\r
+\r
+       mov     (8) Mbaff_ALPHA(0,0)<1>         r[ECM_AddrReg, bAlphaInternal_Cr]<0;1,0>:ub\r
+       mov     (8) Mbaff_BETA(0,0)<1>          r[ECM_AddrReg, bBetaInternal_Cr]<0;1,0>:ub\r
+       mov (8) Mbaff_TC0(0,0)<1>               r[ECM_AddrReg, bTc0_h20_Cr]<1;2,0>:ub\r
+\r
+       // Set UV MaskA and MaskB\r
+       mov (2)         f0.0<1>:uw              MaskA<2;2,1>:uw\r
+\r
+       CALL(FILTER_UV_MBAFF, 1)        \r
+//-----------------------------------------------\r
+\r
+\r
diff --git a/i965_drv_video/shaders/h264/ildb/AVC_ILDB_Filter_Mbaff_UV_v.asm b/i965_drv_video/shaders/h264/ildb/AVC_ILDB_Filter_Mbaff_UV_v.asm
new file mode 100644 (file)
index 0000000..c0f2678
--- /dev/null
@@ -0,0 +1,209 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+////////// AVC LDB filter vertical Mbaff UV ///////////////////////////////////////////////////////\r
+//\r
+//     This filter code prepares the src data and control data for ILDB filtering on all vertical edges of UV.\r
+//\r
+//     It sssumes the data for vertical de-blocking is already transposed.  \r
+//\r
+//             Chroma:\r
+//\r
+//             +-------+-------+\r
+//             |               |               |\r
+//             |               |               |\r
+//             |               |               |\r
+//             +-------+-------+\r
+//             |               |               |\r
+//             |               |               |\r
+//             |               |               |\r
+//             +-------+-------+\r
+//\r
+//             V0              V1              \r
+//             Edge    Edge    \r
+//\r
+/////////////////////////////////////////////////////////////////////////////\r
+\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0xBBBC:w\r
+#endif \r
+\r
+//=============== Chroma deblocking ================\r
+\r
+//---------- Deblock U external left edge ----------\r
+\r
+       and.z.f0.0  (1) null:w          r[ECM_AddrReg, BitFlags]:ub             FilterLeftMbEdgeFlag:w          // Check for FilterLeftMbEdgeFlag \r
+\r
+       cmp.z.f0.1      (1)     null:w  VertEdgePattern:uw              LEFT_FIELD_CUR_FRAME:w\r
+\r
+       // Get Luma maskA and maskB     \r
+       shr (16)        TempRow0(0)<1>          r[ECM_AddrReg, wEdgeCntlMapA_ExtLeftVert0]<0;1,0>:uw            RRampW(0)\r
+       shr (16)        TempRow1(0)<1>          r[ECM_AddrReg, wEdgeCntlMapB_ExtLeftVert0]<0;1,0>:uw            RRampW(0)\r
+       \r
+    (f0.0)     jmpi    BYPASS_V0_UV    // Do not deblock Left ext edge\r
+\r
+       cmp.z.f0.0      (1)     null:w  VertEdgePattern:uw              LEFT_FRAME_CUR_FIELD:w\r
+\r
+       (-f0.1) jmpi V0_U_NEXT1 // Jump if not LEFT_FIELD_CUR_FRAME\r
+\r
+       //----- For LEFT_FIELD_CUR_FRAME\r
+       \r
+       // Extract UV MaskA and MaskB from every other 2 bits of Y masks\r
+       and.nz.f0.0 (8) null:w                  TempRow0(0)<4;2,1>              1:w\r
+       and.nz.f0.1 (8) null:w                  TempRow1(0)<4;2,1>              1:w\r
+\r
+       // For FieldModeLeftMbFlag=1 && FieldModeCurrentMbFlag=0 \r
+       mov     (4)     Mbaff_ALPHA(0,0)<2>             r[ECM_AddrReg, bAlphaLeft0_Cb]<0;1,0>:ub        { NoDDClr }\r
+       mov     (4)     Mbaff_ALPHA(0,1)<2>             r[ECM_AddrReg, bAlphaLeft1_Cb]<0;1,0>:ub        { NoDDChk }\r
+       mov     (4)     Mbaff_BETA(0,0)<2>              r[ECM_AddrReg, bBetaLeft0_Cb]<0;1,0>:ub         { NoDDClr }\r
+       mov     (4)     Mbaff_BETA(0,1)<2>              r[ECM_AddrReg, bBetaLeft1_Cb]<0;1,0>:ub         { NoDDChk }\r
+       mov (4) Mbaff_TC0(0,0)<2>               r[ECM_AddrReg, bTc0_v00_0_Cb]<4;4,1>:ub         { NoDDClr }\r
+       mov (4) Mbaff_TC0(0,1)<2>               r[ECM_AddrReg, bTc0_v00_1_Cb]<4;4,1>:ub         { NoDDChk }\r
+\r
+       jmpi    V0_U_NEXT3\r
+\r
+V0_U_NEXT1:\r
+       \r
+       (-f0.0) jmpi V0_U_NEXT2                 // Jump if not LEFT_FRAME_CUR_FIELD\r
+       \r
+       //----- For LEFT_FRAME_CUR_FIELD\r
+               \r
+       // Extract UV MaskA and MaskB from every other bit of Y masks\r
+       and.nz.f0.0 (8) null:w                  TempRow0(0)<16;8,2>             1:w\r
+       and.nz.f0.1 (8) null:w                  TempRow1(0)<16;8,2>             1:w\r
+\r
+       // For FieldModeLeftMbFlag=0 && FieldModeCurrentMbFlag=1\r
+       mov     (4)     Mbaff_ALPHA(0,0)<1>             r[ECM_AddrReg, bAlphaLeft0_Cb]<0;1,0>:ub        { NoDDClr }\r
+       mov     (4)     Mbaff_ALPHA(0,4)<1>             r[ECM_AddrReg, bAlphaLeft1_Cb]<0;1,0>:ub        { NoDDChk }\r
+       mov     (4)     Mbaff_BETA(0,0)<1>              r[ECM_AddrReg, bBetaLeft0_Cb]<0;1,0>:ub         { NoDDClr }\r
+       mov     (4)     Mbaff_BETA(0,4)<1>              r[ECM_AddrReg, bBetaLeft1_Cb]<0;1,0>:ub         { NoDDChk }\r
+       mov (4) Mbaff_TC0(0,0)<1>               r[ECM_AddrReg, bTc0_v00_0_Cb]<4;4,1>:ub         { NoDDClr }\r
+       mov (4) Mbaff_TC0(0,4)<1>               r[ECM_AddrReg, bTc0_v00_1_Cb]<4;4,1>:ub         { NoDDChk }\r
+\r
+       jmpi    V0_U_NEXT3\r
+       \r
+V0_U_NEXT2:\r
+\r
+       // Extract UV MaskA and MaskB from every other bit of Y masks\r
+       and.nz.f0.0 (8) null:w                  TempRow0(0)<16;8,2>             1:w\r
+       and.nz.f0.1 (8) null:w                  TempRow1(0)<16;8,2>             1:w\r
+       \r
+       // Both are frames or fields\r
+       mov     (8) Mbaff_ALPHA(0,0)<1>         r[ECM_AddrReg, bAlphaLeft0_Cb]<0;1,0>:ub\r
+       mov     (8) Mbaff_BETA(0,0)<1>          r[ECM_AddrReg, bBetaLeft0_Cb]<0;1,0>:ub\r
+       mov (8) Mbaff_TC0(0,0)<1>               r[ECM_AddrReg, bTc0_v00_0_Cb]<1;2,0>:ub\r
+\r
+V0_U_NEXT3:    \r
+\r
+       //      p1 = Prev MB U row 0\r
+       //      p0 = Prev MB U row 1\r
+       //      q0 = Cur MB U row 0\r
+       //      q1 = Cur MB U row 1\r
+       mov (1) P_AddrReg:w             PREV_MB_U_BASE:w        { NoDDClr }\r
+       mov (1) Q_AddrReg:w             SRC_MB_U_BASE:w         { NoDDChk }\r
+\r
+       // Store UV MaskA and MaskB\r
+       mov (2)         MaskA<1>:uw                     f0.0<2;2,1>:uw\r
+\r
+       CALL(FILTER_UV_MBAFF, 1)        \r
+//-----------------------------------------------\r
+\r
+//---------- Deblock V external left edge ----------\r
+\r
+       // No change to MaskA and MaskB\r
+\r
+       cmp.z.f0.0      (4)     null:w  VertEdgePattern:uw              LEFT_FIELD_CUR_FRAME:w\r
+       cmp.z.f0.1      (4)     null:w  VertEdgePattern:uw              LEFT_FRAME_CUR_FIELD:w\r
+\r
+       // both are frame or field\r
+       mov     (8) Mbaff_ALPHA(0,0)<1>         r[ECM_AddrReg, bAlphaLeft0_Cr]<0;1,0>:ub\r
+       mov     (8) Mbaff_BETA(0,0)<1>          r[ECM_AddrReg, bBetaLeft0_Cr]<0;1,0>:ub\r
+       mov (8) Mbaff_TC0(0,0)<1>               r[ECM_AddrReg, bTc0_v00_0_Cr]<1;2,0>:ub\r
+                               \r
+       //      p1 = Prev MB V row 0\r
+       //      p0 = Prev MB V row 1\r
+       //      q0 = Cur MB V row 0\r
+       //      q1 = Cur MB V row 1\r
+       mov (1) P_AddrReg:w             PREV_MB_V_BASE:w        { NoDDClr }\r
+       mov (1) Q_AddrReg:w             SRC_MB_V_BASE:w         { NoDDChk }\r
+                               \r
+       // For FieldModeLeftMbFlag=1 && FieldModeCurrentMbFlag=0 \r
+       (f0.0) mov (4)  Mbaff_ALPHA(0,0)<2>             r[ECM_AddrReg, bAlphaLeft0_Cr]<0;1,0>:ub        { NoDDClr }\r
+       (f0.0) mov (4)  Mbaff_ALPHA(0,1)<2>             r[ECM_AddrReg, bAlphaLeft1_Cr]<0;1,0>:ub        { NoDDChk }     \r
+       (f0.0) mov (4)  Mbaff_BETA(0,0)<2>              r[ECM_AddrReg, bBetaLeft0_Cr]<0;1,0>:ub         { NoDDClr }\r
+       (f0.0) mov (4)  Mbaff_BETA(0,1)<2>              r[ECM_AddrReg, bBetaLeft1_Cr]<0;1,0>:ub         { NoDDChk }\r
+       (f0.0) mov (4)  Mbaff_TC0(0,0)<2>               r[ECM_AddrReg, bTc0_v00_0_Cr]<4;4,1>:ub         { NoDDClr }\r
+       (f0.0) mov (4)  Mbaff_TC0(0,1)<2>               r[ECM_AddrReg, bTc0_v00_1_Cr]<4;4,1>:ub         { NoDDChk }\r
+\r
+       // For FieldModeLeftMbFlag=0 && FieldModeCurrentMbFlag=1\r
+       (f0.1) mov (4)  Mbaff_ALPHA(0,0)<1>             r[ECM_AddrReg, bAlphaLeft0_Cr]<0;1,0>:ub        { NoDDClr }\r
+       (f0.1) mov (4)  Mbaff_ALPHA(0,4)<1>             r[ECM_AddrReg, bAlphaLeft1_Cr]<0;1,0>:ub        { NoDDChk }\r
+       (f0.1) mov (4)  Mbaff_BETA(0,0)<1>              r[ECM_AddrReg, bBetaLeft0_Cr]<0;1,0>:ub         { NoDDClr }\r
+       (f0.1) mov (4)  Mbaff_BETA(0,4)<1>              r[ECM_AddrReg, bBetaLeft1_Cr]<0;1,0>:ub         { NoDDChk }\r
+       (f0.1) mov (4)  Mbaff_TC0(0,0)<1>               r[ECM_AddrReg, bTc0_v00_0_Cr]<4;4,1>:ub         { NoDDClr }\r
+       (f0.1) mov (4)  Mbaff_TC0(0,4)<1>               r[ECM_AddrReg, bTc0_v00_1_Cr]<4;4,1>:ub         { NoDDChk }\r
+\r
+       // Set UV MaskA and MaskB\r
+       mov (2)         f0.0<1>:uw              MaskA<2;2,1>:uw\r
+\r
+       CALL(FILTER_UV_MBAFF, 1)        \r
+//-----------------------------------------------\r
+\r
+BYPASS_V0_UV:\r
+       // Set EdgeCntlMap2 = 0, so it always uses bS < 4 algorithm.\r
+       // Same alpha and beta for all internal vert and horiz edges \r
+\r
+//---------- Deblock U internal vert middle edge ----------\r
+\r
+       //***** Need to take every other bit to form U or V maskA\r
+       shr (16) TempRow0(0)<1>                 r[ECM_AddrReg, wEdgeCntlMap_IntMidVert]<0;1,0>:uw               RRampW(0)\r
+\r
+       //      p1 = Cur MB U row 2\r
+       //      p0 = Cur MB U row 3\r
+       //      q0 = Cur MB U row 4\r
+       //      q1 = Cur MB U row 5\r
+       mov (1) P_AddrReg:w             4*UV_ROW_WIDTH+SRC_MB_U_BASE:w          { NoDDClr }             // Skip 2 U rows and 2 V rows\r
+       mov (1) Q_AddrReg:w             8*UV_ROW_WIDTH+SRC_MB_U_BASE:w          { NoDDChk }\r
+\r
+       mov     (8) Mbaff_ALPHA(0,0)<1>         r[ECM_AddrReg, bAlphaInternal_Cb]<0;1,0>:ub\r
+       mov     (8) Mbaff_BETA(0,0)<1>          r[ECM_AddrReg, bBetaInternal_Cb]<0;1,0>:ub\r
+       mov (8) Mbaff_TC0(0,0)<1>               r[ECM_AddrReg, bTc0_v02_Cb]<1;2,0>:ub\r
+\r
+       and.nz.f0.0 (8) null:w                  TempRow0(0)<16;8,2>             1:w\r
+\r
+       // Store MaskA and MaskB\r
+       mov (1) f0.1:uw         0:w                     \r
+       mov (1) MaskB:uw        0:w                     { NoDDClr }\r
+       mov (1) MaskA:uw        f0.0:uw         { NoDDChk }\r
+\r
+       CALL(FILTER_UV_MBAFF, 1)        \r
+       \r
+//-----------------------------------------------\r
+\r
+\r
+//---------- Deblock V internal vert middle edge ----------\r
+\r
+       //      P1 = Cur MB V row 2\r
+       //      P0 = Cur MB V row 3\r
+       //      Q0 = Cur MB V row 4\r
+       //      Q1 = Cur MB V row 5\r
+       mov (1) P_AddrReg:w             4*UV_ROW_WIDTH+SRC_MB_V_BASE:w          { NoDDClr }             // Skip 2 U rows and 2 V rows\r
+       mov (1) Q_AddrReg:w             8*UV_ROW_WIDTH+SRC_MB_V_BASE:w          { NoDDChk }\r
+\r
+       // Put MaskA into f0.0\r
+       // Put MaskB into f0.1\r
+       mov (2) f0.0<1>:uw              MaskA<2;2,1>:uw\r
+\r
+       mov     (8) Mbaff_ALPHA(0,0)<1>         r[ECM_AddrReg, bAlphaInternal_Cr]<0;1,0>:ub\r
+       mov     (8) Mbaff_BETA(0,0)<1>          r[ECM_AddrReg, bBetaInternal_Cr]<0;1,0>:ub\r
+       mov (8) Mbaff_TC0(0,0)<1>               r[ECM_AddrReg, bTc0_v02_Cr]<1;2,0>:ub\r
+\r
+       CALL(FILTER_UV_MBAFF, 1)        \r
+\r
+//-----------------------------------------------\r
+\r
diff --git a/i965_drv_video/shaders/h264/ildb/AVC_ILDB_Filter_Mbaff_Y_h.asm b/i965_drv_video/shaders/h264/ildb/AVC_ILDB_Filter_Mbaff_Y_h.asm
new file mode 100644 (file)
index 0000000..a98b024
--- /dev/null
@@ -0,0 +1,234 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+////////// AVC ILDB filter horizontal Mbaff Y ///////////////////////////////////////////////////////\r
+//\r
+//     This filter code prepares the src data and control data for ILDB filtering on all horizontal edges of Y.\r
+//\r
+//     It sssumes the data for horizontal de-blocking is already transposed.  \r
+//\r
+//             Luma:\r
+//\r
+//             +-------+-------+-------+-------+               H0  Edge\r
+//             |               |               |               |               |\r
+//             |               |               |               |               |\r
+//             |               |               |               |               |\r
+//             +-------+-------+-------+-------+               H1 Edge\r
+//             |               |               |               |               |\r
+//             |               |               |               |               |\r
+//             |               |               |               |               |\r
+//             +-------+-------+-------+-------+               H2      Edge\r
+//             |               |               |               |               |\r
+//             |               |               |               |               |\r
+//             |               |               |               |               |\r
+//             +-------+-------+-------+-------+               H3 Edge\r
+//             |               |               |               |               |\r
+//             |               |               |               |               |\r
+//             |               |               |               |               |\r
+//             +-------+-------+-------+-------+\r
+//\r
+/////////////////////////////////////////////////////////////////////////////\r
+\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0xBBBB:w\r
+#endif \r
+       \r
+\r
+//========== Luma deblocking ==========\r
+\r
+\r
+//---------- Deblock Y external top edge (H0)  ----------      \r
+\r
+       // Bypass deblocking if it is the top edge of the picture.  \r
+       and.z.f0.0  (1) null:w          r[ECM_AddrReg, BitFlags]:ub             FilterTopMbEdgeFlag:w           // Check for FilterTopMbEdgeFlag \r
+       mov     (1)     f0.1:w          DualFieldMode:w                 // Check for dual field mode\r
+               \r
+       // Non dual field mode  \r
+\r
+       // Get (alpha >> 2) + 2\r
+       shr (16) Mbaff_ALPHA2(0,0)<1>   r[ECM_AddrReg, bAlphaTop0_Y]<0;1,0>:ub          2:w                     // alpha >> 2\r
+\r
+       mov (2) MaskA<1>:uw     r[ECM_AddrReg, wEdgeCntlMapA_ExtTopHorz0]<2;2,1>:uw\r
+\r
+       // Ext Y\r
+       mov     (16) Mbaff_ALPHA(0,0)<1>        r[ECM_AddrReg, bAlphaTop0_Y]<0;1,0>:ub\r
+       mov     (16) Mbaff_BETA(0,0)<1>         r[ECM_AddrReg, bBetaTop0_Y]<0;1,0>:ub\r
+       mov (16) Mbaff_TC0(0,0)<1>              r[ECM_AddrReg, bTc0_h00_0_Y]<1;4,0>:ub\r
+\r
+       add (16) Mbaff_ALPHA2(0,0)<1>           Mbaff_ALPHA2(0,0)<16;16,1>              2:w                                     // alpha2 = (alpha >> 2) + 2  \r
+\r
+    (f0.0) jmpi        H0_Y_DONE                               // Skip Ext Y deblocking\r
+       (f0.1) jmpi     DUAL_FIELD_Y\r
+       \r
+       mov (1) P_AddrReg:w             PREV_MB_Y_BASE:w                { NoDDClr }\r
+       mov (1) Q_AddrReg:w             SRC_MB_Y_BASE:w                 { NoDDChk }\r
+       \r
+       CALL(FILTER_Y_MBAFF, 1)                 // Non dual field deblocking\r
+               \r
+       jmpi    H0_Y_DONE\r
+\r
+DUAL_FIELD_Y:\r
+       // Dual field mode, FieldModeCurrentMbFlag=0 && FieldModeAboveMbFlag=1\r
+\r
+       mov (1) P_AddrReg:w             ABOVE_CUR_MB_BASE:w             { NoDDClr }\r
+       mov (1) Q_AddrReg:w             ABOVE_CUR_MB_BASE+64:w  { NoDDChk }\r
+\r
+       //  Must use PREV_MB_YW.  TOP_MB_YW is not big enough.\r
+       // Get top field rows\r
+       mov (16) ABOVE_CUR_MB_YW(0)<1>  PREV_MB_YW(0, 0)<16;8,1>        // Copy p3, p2\r
+       mov (16) ABOVE_CUR_MB_YW(1)<1>  PREV_MB_YW(2, 0)<16;8,1>        // Copy p1, p0\r
+       mov (16) ABOVE_CUR_MB_YW(2)<1>  SRC_YW(0, 0)<16;8,1>            // Copy q0, q1\r
+       mov (16) ABOVE_CUR_MB_YW(3)<1>  SRC_YW(2, 0)<16;8,1>            // Copy q2, q3\r
+\r
+       CALL(FILTER_Y_MBAFF, 1)                         // Ext Y, top field\r
+\r
+       // Save deblocked top field rows\r
+       mov (8) PREV_MB_YW(1, 0)<1>             ABOVE_CUR_MB_YW(0, 8)   // Copy p2\r
+       mov (8) PREV_MB_YW(2, 0)<1>             ABOVE_CUR_MB_YW(1, 0)   // Copy p1\r
+       mov (8) PREV_MB_YW(3, 0)<1>             ABOVE_CUR_MB_YW(1, 8)   // Copy p0\r
+       mov (8) SRC_YW(0, 0)<1>                 ABOVE_CUR_MB_YW(2, 0)   // Copy q0\r
+       mov (8) SRC_YW(1, 0)<1>                 ABOVE_CUR_MB_YW(2, 8)   // Copy q1\r
+       mov (8) SRC_YW(2, 0)<1>                 ABOVE_CUR_MB_YW(3, 0)   // Copy q2\r
+\r
+       //==================================================================================\r
+       // Bottom field\r
+       \r
+       // Get (alpha >> 2) + 2\r
+       shr (16) Mbaff_ALPHA2(0,0)<1>   r[ECM_AddrReg, bAlphaTop1_Y]<0;1,0>:ub          2:w                     // alpha >> 2\r
+\r
+       mov (1) P_AddrReg:w             ABOVE_CUR_MB_BASE:w             { NoDDClr }\r
+       mov (1) Q_AddrReg:w             ABOVE_CUR_MB_BASE+64:w  { NoDDChk }\r
+       \r
+       // Get bot field rows\r
+       mov (16) ABOVE_CUR_MB_YW(0)<1>  PREV_MB_YW(0, 8)<16;8,1>        // Copy p3, p2\r
+       mov (16) ABOVE_CUR_MB_YW(1)<1>  PREV_MB_YW(2, 8)<16;8,1>        // Copy p1, p0\r
+       mov (16) ABOVE_CUR_MB_YW(2)<1>  SRC_YW(0, 8)<16;8,1>            // Copy q0, q1\r
+       mov (16) ABOVE_CUR_MB_YW(3)<1>  SRC_YW(2, 8)<16;8,1>            // Copy q2, q3\r
+\r
+       mov (2) MaskA<1>:uw     r[ECM_AddrReg, wEdgeCntlMapA_ExtTopHorz1]<2;2,1>:uw\r
+\r
+       mov     (16) Mbaff_ALPHA(0,0)<1>        r[ECM_AddrReg, bAlphaTop1_Y]<0;1,0>:ub\r
+       mov     (16) Mbaff_BETA(0,0)<1>         r[ECM_AddrReg, bBetaTop1_Y]<0;1,0>:ub\r
+       mov (16) Mbaff_TC0(0,0)<1>              r[ECM_AddrReg, bTc0_h00_1_Y]<1;4,0>:ub\r
+\r
+       add (16) Mbaff_ALPHA2(0,0)<1>           Mbaff_ALPHA2(0,0)<16;16,1>              2:w                                     // alpha2 = (alpha >> 2) + 2  \r
+\r
+       CALL(FILTER_Y_MBAFF, 1)                         // Ext Y, bot field\r
+\r
+       // Save deblocked top field rows\r
+       mov (8) PREV_MB_YW(1, 8)<1>             ABOVE_CUR_MB_YW(0, 8)   // Copy p2\r
+       mov (8) PREV_MB_YW(2, 8)<1>             ABOVE_CUR_MB_YW(1, 0)   // Copy p1\r
+       mov (8) PREV_MB_YW(3, 8)<1>             ABOVE_CUR_MB_YW(1, 8)   // Copy p0\r
+       mov (8) SRC_YW(0, 8)<1>                 ABOVE_CUR_MB_YW(2, 0)   // Copy q0\r
+       mov (8) SRC_YW(1, 8)<1>                 ABOVE_CUR_MB_YW(2, 8)   // Copy q1\r
+       mov (8) SRC_YW(2, 8)<1>                 ABOVE_CUR_MB_YW(3, 0)   // Copy q2\r
+       //==================================================================================\r
+\r
+H0_Y_DONE:\r
+\r
+//BYPASS_H0_Y:\r
+//------------------------------------------------------------------\r
+       // Same alpha, alpha2, beta and MaskB for all internal edges \r
+\r
+       // Get (alpha >> 2) + 2\r
+       shr (16) Mbaff_ALPHA2(0,0)<1>   r[ECM_AddrReg, bAlphaInternal_Y]<0;1,0>:ub              2:w                     // alpha >> 2\r
+\r
+       // alpha = bAlphaInternal_Y \r
+       // beta = bBetaInternal_Y\r
+       mov     (16) Mbaff_ALPHA(0,0)<1>        r[ECM_AddrReg, bAlphaInternal_Y]<0;1,0>:ub\r
+       mov     (16) Mbaff_BETA(0,0)<1>         r[ECM_AddrReg, bBetaInternal_Y]<0;1,0>:ub\r
+\r
+       mov (1) MaskB:uw        0:w                                             // Set MaskB = 0 for all 3 edges, so it always uses bS < 4 algorithm.\r
+\r
+       add (16) Mbaff_ALPHA2(0,0)<1>           Mbaff_ALPHA2(0,0)<16;16,1>              2:w                                     // alpha2 = (alpha >> 2) + 2  \r
+\r
+//---------- Deblock Y internal top edge (H1)  ----------\r
+\r
+       // Bypass deblocking if FilterInternal4x4EdgesFlag = 0  \r
+       and.z.f0.0 (1) null:w   r[ECM_AddrReg, BitFlags]:ub             FilterInternal4x4EdgesFlag:w            // Check for FilterInternal4x4EdgesFlag \r
+//    (f0.0)   jmpi    BYPASS_H1_Y\r
+\r
+       //      p3 = Cur MB Y row 0 = r[P_AddrReg, 0]<16;16,1> \r
+       //      p2 = Cur MB Y row 1 = r[P_AddrReg, 16]<16;16,1>\r
+       //      p1 = Cur MB Y row 2 = r[P_AddrReg, 32]<16;16,1>\r
+       //      p0 = Cur MB Y row 3 = r[P_AddrReg, 48]<16;16,1>\r
+       //      q0 = Cur MB Y row 4 = r[Q_AddrReg, 0]<16;16,1> \r
+       //      q1 = Cur MB Y row 5 = r[Q_AddrReg, 16]<16;16,1>\r
+       //      q2 = Cur MB Y row 6 = r[Q_AddrReg, 32]<16;16,1>\r
+       //      q3 = Cur MB Y row 7 = r[Q_AddrReg, 48]<16;16,1>\r
+       mov (1) P_AddrReg:w             SRC_MB_Y_BASE:w                                 { NoDDClr }\r
+       mov (1) Q_AddrReg:w             4*Y_ROW_WIDTH+SRC_MB_Y_BASE:w   { NoDDChk }\r
+\r
+       mov (1) MaskA:uw        r[ECM_AddrReg, wEdgeCntlMap_IntTopHorz]:uw\r
+\r
+       // tc0 has bTc0_h13_Y + bTc0_h12_Y + bTc0_h11_Y + bTc0_h10_Y            \r
+       mov (16) Mbaff_TC0(0,0)<1>              r[ECM_AddrReg, bTc0_h10_Y]<1;4,0>:ub\r
+\r
+//     CALL(FILTER_Y_MBAFF, 1)\r
+       PRED_CALL(-f0.0, FILTER_Y_MBAFF, 1)\r
+\r
+//BYPASS_H1_Y:\r
+//------------------------------------------------------------------\r
+\r
+\r
+//---------- Deblock Y internal mid horizontal edge (H2) ----------\r
+\r
+       // Bypass deblocking if FilterInternal8x8EdgesFlag = 0  \r
+       and.z.f0.0 (1) null:w   r[ECM_AddrReg, BitFlags]:ub             FilterInternal8x8EdgesFlag:w            // Check for FilterInternal4x4EdgesFlag \r
+//    (f0.0)   jmpi    BYPASS_H2_Y\r
+\r
+       //      p3 = Cur MB Y row 4  = r[P_AddrReg, 0]<16;16,1> \r
+       //      p2 = Cur MB Y row 5  = r[P_AddrReg, 16]<16;16,1>\r
+       //      p1 = Cur MB Y row 6  = r[P_AddrReg, 32]<16;16,1>\r
+       //      p0 = Cur MB Y row 7  = r[P_AddrReg, 48]<16;16,1>\r
+       //      q0 = Cur MB Y row 8  = r[Q_AddrReg, 0]<16;16,1> \r
+       //      q1 = Cur MB Y row 9  = r[Q_AddrReg, 16]<16;16,1>\r
+       //      q2 = Cur MB Y row 10 = r[Q_AddrReg, 32]<16;16,1>\r
+       //      q3 = Cur MB Y row 11 = r[Q_AddrReg, 48]<16;16,1>\r
+       mov (1) P_AddrReg:w             4*Y_ROW_WIDTH+SRC_MB_Y_BASE:w           { NoDDClr }\r
+       mov (1) Q_AddrReg:w             8*Y_ROW_WIDTH+SRC_MB_Y_BASE:w           { NoDDChk }\r
+\r
+       mov (1) MaskA:uw        r[ECM_AddrReg, wEdgeCntlMap_IntMidHorz]:uw\r
+\r
+       // tc0 has bTc0_h23_Y + bTc0_h22_Y + bTc0_h21_Y + bTc0_h20_Y            \r
+       mov (16) Mbaff_TC0(0,0)<1>              r[ECM_AddrReg, bTc0_h20_Y]<1;4,0>:ub\r
+\r
+//     CALL(FILTER_Y_MBAFF, 1)\r
+       PRED_CALL(-f0.0, FILTER_Y_MBAFF, 1)\r
+\r
+//BYPASS_H2_Y:\r
+//-----------------------------------------------\r
+\r
+\r
+//---------- Deblock Y internal bottom edge (H3) ----------     \r
+\r
+       // Bypass deblocking if FilterInternal4x4EdgesFlag = 0  \r
+       and.z.f0.0 (1) null:w   r[ECM_AddrReg, BitFlags]:ub             FilterInternal4x4EdgesFlag:w            // Check for FilterInternal4x4EdgesFlag \r
+//    (f0.0)   jmpi    BYPASS_H3_Y\r
+\r
+       //      p3 = Cur MB Y row 8  = r[P_AddrReg, 0]<16;16,1> \r
+       //      p2 = Cur MB Y row 9  = r[P_AddrReg, 16]<16;16,1>\r
+       //      p1 = Cur MB Y row 10 = r[P_AddrReg, 32]<16;16,1>\r
+       //      p0 = Cur MB Y row 11 = r[P_AddrReg, 48]<16;16,1>\r
+       //      q0 = Cur MB Y row 12 = r[Q_AddrReg, 0]<16;16,1> \r
+       //      q1 = Cur MB Y row 13 = r[Q_AddrReg, 16]<16;16,1>\r
+       //      q2 = Cur MB Y row 14 = r[Q_AddrReg, 32]<16;16,1>\r
+       //      q3 = Cur MB Y row 15 = r[Q_AddrReg, 48]<16;16,1>\r
+       mov (1) P_AddrReg:w             8*Y_ROW_WIDTH+SRC_MB_Y_BASE:w           { NoDDClr }\r
+       mov (1) Q_AddrReg:w             12*Y_ROW_WIDTH+SRC_MB_Y_BASE:w          { NoDDChk }\r
+       \r
+       mov (1) MaskA:uw        r[ECM_AddrReg, wEdgeCntlMap_IntBotHorz]:uw\r
+\r
+       // tc0 has bTc0_h33_Y + bTc0_h32_Y + bTc0_h31_Y + bTc0_h30_Y\r
+       mov (16) Mbaff_TC0(0,0)<1>              r[ECM_AddrReg, bTc0_h30_Y]<1;4,0>:ub\r
+\r
+//     CALL(FILTER_Y_MBAFF, 1)\r
+       PRED_CALL(-f0.0, FILTER_Y_MBAFF, 1)\r
+\r
+//BYPASS_H3_Y:\r
+//-----------------------------------------------\r
diff --git a/i965_drv_video/shaders/h264/ildb/AVC_ILDB_Filter_Mbaff_Y_v.asm b/i965_drv_video/shaders/h264/ildb/AVC_ILDB_Filter_Mbaff_Y_v.asm
new file mode 100644 (file)
index 0000000..7846168
--- /dev/null
@@ -0,0 +1,269 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+////////// AVC ILDB filter vertical Mbaff Y ///////////////////////////////////////////////////////\r
+//\r
+//     This filter code prepares the src data and control data for ILDB filtering on all vertical edges of Y.\r
+//\r
+//     It sssumes the data for vertical de-blocking is already transposed.  \r
+//\r
+//             Luma:\r
+//\r
+//             +-------+-------+-------+-------+\r
+//             |               |               |               |               |\r
+//             |               |               |               |               |\r
+//             |               |               |               |               |\r
+//             +-------+-------+-------+-------+\r
+//             |               |               |               |               |\r
+//             |               |               |               |               |\r
+//             |               |               |               |               |\r
+//             +-------+-------+-------+-------+\r
+//             |               |               |               |               |\r
+//             |               |               |               |               |\r
+//             |               |               |               |               |\r
+//             +-------+-------+-------+-------+\r
+//             |               |               |               |               |\r
+//             |               |               |               |               |\r
+//             |               |               |               |               |\r
+//             +-------+-------+-------+-------+\r
+//\r
+//             V0              V1              V2              V3\r
+//             Edge    Edge    Edge    Edge\r
+//\r
+/////////////////////////////////////////////////////////////////////////////\r
+\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0xBBBB:w\r
+#endif \r
+       \r
+\r
+//========== Luma deblocking ==========\r
+\r
+\r
+//---------- Deblock Y external left edge (V0) ----------      \r
+\r
+       cmp.z.f0.0      (8)     null:w  VertEdgePattern:uw              LEFT_FIELD_CUR_FRAME:w\r
+       cmp.z.f0.1      (8)     null:w  VertEdgePattern:uw              LEFT_FRAME_CUR_FIELD:w\r
+\r
+       // Intial set for both are frame or field\r
+       mov     (16) Mbaff_ALPHA(0,0)<1>        r[ECM_AddrReg, bAlphaLeft0_Y]<0;1,0>:ub\r
+       mov     (16) Mbaff_BETA(0,0)<1>         r[ECM_AddrReg, bBetaLeft0_Y]<0;1,0>:ub\r
+       mov (16) Mbaff_TC0(0,0)<1>              r[ECM_AddrReg, bTc0_v00_0_Y]<1;4,0>:ub\r
+               \r
+       // For FieldModeCurrentMbFlag=1 && FieldModeLeftMbFlag=0\r
+       (f0.0) mov (8)  Mbaff_ALPHA(0,0)<2>             r[ECM_AddrReg, bAlphaLeft0_Y]<0;1,0>:ub         { NoDDClr }\r
+       (f0.0) mov (8)  Mbaff_ALPHA(0,1)<2>             r[ECM_AddrReg, bAlphaLeft1_Y]<0;1,0>:ub         { NoDDChk }\r
+       (f0.0) mov (8)  Mbaff_BETA(0,0)<2>              r[ECM_AddrReg, bBetaLeft0_Y]<0;1,0>:ub          { NoDDClr }\r
+       (f0.0) mov (8)  Mbaff_BETA(0,1)<2>              r[ECM_AddrReg, bBetaLeft1_Y]<0;1,0>:ub          { NoDDChk }\r
+       (f0.0) mov (8)  Mbaff_TC0(0,0)<2>               r[ECM_AddrReg, bTc0_v00_0_Y]<1;2,0>:ub          { NoDDClr }\r
+       (f0.0) mov (8)  Mbaff_TC0(0,1)<2>               r[ECM_AddrReg, bTc0_v00_1_Y]<1;2,0>:ub          { NoDDChk }\r
+\r
+       and.z.f0.0  (1) null:w          r[ECM_AddrReg, BitFlags]:ub             FilterLeftMbEdgeFlag:w          // Check for FilterLeftMbEdgeFlag \r
+\r
+       // For FieldModeCurrentMbFlag=0 && FieldModeLeftMbFlag=1\r
+       (f0.1) mov (8)  Mbaff_ALPHA(0,0)<1>             r[ECM_AddrReg, bAlphaLeft0_Y]<0;1,0>:ub         { NoDDClr }\r
+       (f0.1) mov (8)  Mbaff_ALPHA(0,8)<1>             r[ECM_AddrReg, bAlphaLeft1_Y]<0;1,0>:ub         { NoDDChk }\r
+       (f0.1) mov (8)  Mbaff_BETA(0,0)<1>              r[ECM_AddrReg, bBetaLeft0_Y]<0;1,0>:ub          { NoDDClr }\r
+       (f0.1) mov (8)  Mbaff_BETA(0,8)<1>              r[ECM_AddrReg, bBetaLeft1_Y]<0;1,0>:ub          { NoDDChk }\r
+       (f0.1) mov (8)  Mbaff_TC0(0,0)<1>               r[ECM_AddrReg, bTc0_v00_0_Y]<1;2,0>:ub          { NoDDClr }\r
+       (f0.1) mov (8)  Mbaff_TC0(0,8)<1>               r[ECM_AddrReg, bTc0_v00_1_Y]<1;2,0>:ub          { NoDDChk }\r
+\r
+       // Get (alpha >> 2) + 2\r
+       shr (16) Mbaff_ALPHA2(0,0)<1>   Mbaff_ALPHA(0)          2:w                     // alpha >> 2\r
+\r
+       //      p3 = Prev MB Y row 0 = r[P_AddrReg, 0]<16;16,1>\r
+       //      p2 = Prev MB Y row 1 = r[P_AddrReg, 16]<16;16,1>\r
+       //      p1 = Prev MB Y row 2 = r[P_AddrReg, 32]<16;16,1>\r
+       //      p0 = Prev MB Y row 3 = r[P_AddrReg, 48]<16;16,1>\r
+       //      q0 = Cur MB Y row 0  = r[Q_AddrReg, 0]<16;16,1>\r
+       //      q1 = Cur MB Y row 1  = r[Q_AddrReg, 16]<16;16,1>\r
+       //      q2 = Cur MB Y row 2  = r[Q_AddrReg, 32]<16;16,1>\r
+       //      q3 = Cur MB Y row 3  = r[Q_AddrReg, 48]<16;16,1>\r
+       mov (1) P_AddrReg:w             PREV_MB_Y_BASE:w        { NoDDClr }\r
+       mov (1) Q_AddrReg:w             SRC_MB_Y_BASE:w         { NoDDChk }\r
+\r
+       // Set MaskA and MaskB  \r
+       mov (2) MaskA<1>:uw             r[ECM_AddrReg, wEdgeCntlMapA_ExtLeftVert0]<2;2,1>:uw\r
+\r
+       add (16) Mbaff_ALPHA2(0,0)<1>           Mbaff_ALPHA2(0,0)<16;16,1>              2:w                                     // alpha2 = (alpha >> 2) + 2  \r
+\r
+//     CALL(FILTER_Y_MBAFF, 1)\r
+       PRED_CALL(-f0.0, FILTER_Y_MBAFF, 1)\r
+       \r
+//BYPASS_V0_Y:\r
+//------------------------------------------------------------------\r
+\r
+\r
+/*\r
+//---------- Deblock Y external left edge (V0) ----------      \r
+\r
+       and.z.f0.0  (1) null:w          r[ECM_AddrReg, BitFlags]:ub             FilterLeftMbEdgeFlag:w          // Check for FilterLeftMbEdgeFlag \r
+    (f0.0)     jmpi    ILDB_LABEL(BYPASS_EXT_LEFT_EDGE_Y)      \r
+\r
+       // Get vertical border edge control data  \r
+\r
+//     mov     (1)     f0.0            0:w\r
+       and     (1)     CTemp1_W:uw             r[ECM_AddrReg, BitFlags]:ub             FieldModeLeftMbFlag+FieldModeCurrentMbFlag:uw\r
+       cmp.z.f0.0      (1)     null:w  CTemp1_W:uw             LEFT_FIELD_CUR_FRAME:w\r
+       (-f0.0) jmpi LEFT_EDGE_Y_NEXT1\r
+\r
+       // For FieldModeCurrentMbFlag=1 && FieldModeLeftMbFlag=0\r
+       mov     (8)     Mbaff_ALPHA(0,0)<2>             r[ECM_AddrReg, bAlphaLeft0_Y]<0;1,0>:ub         { NoDDClr }\r
+       mov     (8)     Mbaff_ALPHA(0,1)<2>             r[ECM_AddrReg, bAlphaLeft1_Y]<0;1,0>:ub         { NoDDChk }\r
+       mov     (8)     Mbaff_BETA(0,0)<2>              r[ECM_AddrReg, bBetaLeft0_Y]<0;1,0>:ub          { NoDDClr }\r
+       mov     (8)     Mbaff_BETA(0,1)<2>              r[ECM_AddrReg, bBetaLeft1_Y]<0;1,0>:ub          { NoDDChk }\r
+       mov (8) Mbaff_TC0(0,0)<2>               r[ECM_AddrReg, bTc0_v00_0_Y]<1;2,0>:ub          { NoDDClr }\r
+       mov (8) Mbaff_TC0(0,1)<2>               r[ECM_AddrReg, bTc0_v00_1_Y]<1;2,0>:ub          { NoDDChk }\r
+\r
+       jmpi    LEFT_EDGE_Y_ALPHA_BETA_TC0_SELECTED\r
+\r
+LEFT_EDGE_Y_NEXT1:\r
+       cmp.z.f0.0      (1)     null:w  CTemp1_W:uw             LEFT_FRAME_CUR_FIELD:w\r
+       (-f0.0) jmpi LEFT_EDGE_Y_NEXT2\r
+\r
+\r
+       // For FieldModeCurrentMbFlag=0 && FieldModeLeftMbFlag=1\r
+       mov     (8)     Mbaff_ALPHA(0,0)<1>             r[ECM_AddrReg, bAlphaLeft0_Y]<0;1,0>:ub         { NoDDClr }\r
+       mov     (8)     Mbaff_ALPHA(0,8)<1>             r[ECM_AddrReg, bAlphaLeft1_Y]<0;1,0>:ub         { NoDDChk }\r
+       mov     (8)     Mbaff_BETA(0,0)<1>              r[ECM_AddrReg, bBetaLeft0_Y]<0;1,0>:ub          { NoDDClr }\r
+       mov     (8)     Mbaff_BETA(0,8)<1>              r[ECM_AddrReg, bBetaLeft1_Y]<0;1,0>:ub          { NoDDChk }\r
+       mov (8) Mbaff_TC0(0,0)<1>               r[ECM_AddrReg, bTc0_v00_0_Y]<1;2,0>:ub          { NoDDClr }\r
+       mov (8) Mbaff_TC0(0,8)<1>               r[ECM_AddrReg, bTc0_v00_1_Y]<1;2,0>:ub          { NoDDChk }\r
+\r
+       jmpi    LEFT_EDGE_Y_ALPHA_BETA_TC0_SELECTED\r
+       \r
+LEFT_EDGE_Y_NEXT2:\r
+       // both are frame or field\r
+       mov     (16) Mbaff_ALPHA(0,0)<1>        r[ECM_AddrReg, bAlphaLeft0_Y]<0;1,0>:ub\r
+       mov     (16) Mbaff_BETA(0,0)<1>         r[ECM_AddrReg, bBetaLeft0_Y]<0;1,0>:ub\r
+       mov (16) Mbaff_TC0(0,0)<1>              r[ECM_AddrReg, bTc0_v00_0_Y]<1;4,0>:ub\r
+\r
+LEFT_EDGE_Y_ALPHA_BETA_TC0_SELECTED:\r
+\r
+       mov (2) MaskA<1>:uw             r[ECM_AddrReg, wEdgeCntlMapA_ExtLeftVert0]<2;2,1>:uw\r
+\r
+       //      p3 = Prev MB Y row 0 = r[P_AddrReg, 0]<16;16,1>\r
+       //      p2 = Prev MB Y row 1 = r[P_AddrReg, 16]<16;16,1>\r
+       //      p1 = Prev MB Y row 2 = r[P_AddrReg, 32]<16;16,1>\r
+       //      p0 = Prev MB Y row 3 = r[P_AddrReg, 48]<16;16,1>\r
+       //      q0 = Cur MB Y row 0  = r[Q_AddrReg, 0]<16;16,1>\r
+       //      q1 = Cur MB Y row 1  = r[Q_AddrReg, 16]<16;16,1>\r
+       //      q2 = Cur MB Y row 2  = r[Q_AddrReg, 32]<16;16,1>\r
+       //      q3 = Cur MB Y row 3  = r[Q_AddrReg, 48]<16;16,1>\r
+       mov (1) P_AddrReg:w             PREV_MB_Y_BASE:w        { NoDDClr }\r
+       mov (1) Q_AddrReg:w             SRC_MB_Y_BASE:w         { NoDDChk }\r
+       \r
+       // Get (alpha >> 2) + 2\r
+       shr (16) Mbaff_ALPHA2(0,0)<1>   r[ECM_AddrReg, bAlphaLeft0_Y]<0;1,0>:ub         2:w                     // alpha >> 2\r
+       add (16) Mbaff_ALPHA2(0,0)<1>           Mbaff_ALPHA2(0,0)<16;16,1>              2:w                                     // alpha2 = (alpha >> 2) + 2  \r
+       \r
+       CALL(FILTER_Y_MBAFF, 1)\r
+\r
+ILDB_LABEL(BYPASS_EXT_LEFT_EDGE_Y):\r
+//------------------------------------------------------------------\r
+*/\r
+\r
+       // Same alpha, alpha2, beta and MaskB for all internal edges \r
+       \r
+       // Get (alpha >> 2) + 2\r
+       shr (16) Mbaff_ALPHA2(0,0)<1>   r[ECM_AddrReg, bAlphaInternal_Y]<0;1,0>:ub              2:w                     // alpha >> 2\r
+       \r
+       // alpha = bAlphaInternal_Y\r
+       // beta = bBetaInternal_Y\r
+       mov     (16) Mbaff_ALPHA(0,0)<1>        r[ECM_AddrReg, bAlphaInternal_Y]<0;1,0>:ub\r
+       mov     (16) Mbaff_BETA(0,0)<1>         r[ECM_AddrReg, bBetaInternal_Y]<0;1,0>:ub\r
+\r
+       mov (1) MaskB:uw        0:w                                             // Set MaskB = 0 for all 3 edges, so it always uses bS < 4 algorithm.\r
+\r
+       add (16) Mbaff_ALPHA2(0,0)<1>           Mbaff_ALPHA2(0,0)<16;16,1>              2:w                                             // alpha2 = (alpha >> 2) + 2  \r
+\r
+//---------- Deblock Y internal left edge (V1) ----------\r
+\r
+       // Bypass deblocking if FilterInternal4x4EdgesFlag = 0  \r
+       and.z.f0.0  (1) null:w          r[ECM_AddrReg, BitFlags]:ub             FilterInternal4x4EdgesFlag:w            // Check for FilterInternal4x4EdgesFlag \r
+//    (f0.0)   jmpi    BYPASS_V1_Y\r
+\r
+       //      p3 = Cur MB Y row 0 = r[P_AddrReg, 0]<16;16,1>  \r
+       //      p2 = Cur MB Y row 1 = r[P_AddrReg, 16]<16;16,1>\r
+       //      p1 = Cur MB Y row 2 = r[P_AddrReg, 32]<16;16,1>\r
+       //      p0 = Cur MB Y row 3 = r[P_AddrReg, 48]<16;16,1>\r
+       //      q0 = Cur MB Y row 4 = r[Q_AddrReg, 0]<16;16,1> \r
+       //      q1 = Cur MB Y row 5 = r[Q_AddrReg, 16]<16;16,1>\r
+       //      q2 = Cur MB Y row 6 = r[Q_AddrReg, 32]<16;16,1>\r
+       //      q3 = Cur MB Y row 7 = r[Q_AddrReg, 48]<16;16,1>\r
+       mov (1) P_AddrReg:w             SRC_MB_Y_BASE:w                                         { NoDDClr }\r
+       mov (1) Q_AddrReg:w             4*Y_ROW_WIDTH+SRC_MB_Y_BASE:w           { NoDDChk }\r
+\r
+       mov (1) MaskA:uw        r[ECM_AddrReg, wEdgeCntlMap_IntLeftVert]:uw\r
+\r
+       // tc0 has bTc0_v31_Y + bTc0_v21_Y + bTc0_v11_Y + bTc0_v01_Y    \r
+       mov (16) Mbaff_TC0(0,0)<1>              r[ECM_AddrReg, bTc0_v01_Y]<1;4,0>:ub\r
+\r
+//     CALL(FILTER_Y_MBAFF, 1)\r
+       PRED_CALL(-f0.0, FILTER_Y_MBAFF, 1)\r
+\r
+BYPASS_V1_Y:\r
+//------------------------------------------------------------------\r
+\r
+\r
+//---------- Deblock Y internal mid vert edge (V2) ----------\r
+\r
+       // Bypass deblocking if FilterInternal8x8EdgesFlag = 0  \r
+       and.z.f0.0      (1)     null:w  r[ECM_AddrReg, BitFlags]:ub             FilterInternal8x8EdgesFlag:w            // Check for FilterInternal4x4EdgesFlag \r
+//    (f0.0)   jmpi    BYPASS_V2_Y\r
+\r
+       //      p3 = Cur MB Y row 4  = r[P_AddrReg, 0]<16;16,1>  \r
+       //      p2 = Cur MB Y row 5  = r[P_AddrReg, 16]<16;16,1> \r
+       //      p1 = Cur MB Y row 6  = r[P_AddrReg, 32]<16;16,1> \r
+       //      p0 = Cur MB Y row 7  = r[P_AddrReg, 48]<16;16,1> \r
+       //      q0 = Cur MB Y row 8  = r[Q_AddrReg, 0]<16;16,1>  \r
+       //      q1 = Cur MB Y row 9  = r[Q_AddrReg, 16]<16;16,1> \r
+       //      q2 = Cur MB Y row 10 = r[Q_AddrReg, 32]<16;16,1> \r
+       //      q3 = Cur MB Y row 11 = r[Q_AddrReg, 48]<16;16,1> \r
+       mov (1) P_AddrReg:w             4*Y_ROW_WIDTH+SRC_MB_Y_BASE:w           { NoDDClr }\r
+       mov (1) Q_AddrReg:w             8*Y_ROW_WIDTH+SRC_MB_Y_BASE:w           { NoDDChk }\r
+\r
+       mov (1) MaskA:uw        r[ECM_AddrReg, wEdgeCntlMap_IntMidVert]:uw\r
+\r
+       // tc0 has bTc0_v32_Y + bTc0_v22_Y + bTc0_v12_Y + bTc0_v02_Y    \r
+       mov (16) Mbaff_TC0(0,0)<1>              r[ECM_AddrReg, bTc0_v02_Y]<1;4,0>:ub\r
+\r
+//     CALL(FILTER_Y_MBAFF, 1)\r
+       PRED_CALL(-f0.0, FILTER_Y_MBAFF, 1)\r
+\r
+BYPASS_V2_Y:\r
+//-----------------------------------------------\r
+\r
+\r
+//---------- Deblock Y interal right edge (V3) ----------       \r
+\r
+       // Bypass deblocking if FilterInternal4x4EdgesFlag = 0  \r
+       and.z.f0.0      (1)     null:w  r[ECM_AddrReg, BitFlags]:ub             FilterInternal4x4EdgesFlag:w            // Check for FilterInternal4x4EdgesFlag \r
+//    (f0.0)   jmpi    BYPASS_V3_Y\r
+\r
+       //      p3 = Cur MB Y row 8  = r[P_AddrReg, 0]<16;16,1> \r
+       //      p2 = Cur MB Y row 9  = r[P_AddrReg, 16]<16;16,1>\r
+       //      p1 = Cur MB Y row 10 = r[P_AddrReg, 32]<16;16,1>\r
+       //      p0 = Cur MB Y row 11 = r[P_AddrReg, 48]<16;16,1>\r
+       //      q0 = Cur MB Y row 12 = r[Q_AddrReg, 0]<16;16,1> \r
+       //      q1 = Cur MB Y row 13 = r[Q_AddrReg, 16]<16;16,1>\r
+       //      q2 = Cur MB Y row 14 = r[Q_AddrReg, 32]<16;16,1>\r
+       //      q3 = Cur MB Y row 15 = r[Q_AddrReg, 48]<16;16,1>\r
+       mov (1) P_AddrReg:w             8*Y_ROW_WIDTH+SRC_MB_Y_BASE:w           { NoDDClr }\r
+       mov (1) Q_AddrReg:w             12*Y_ROW_WIDTH+SRC_MB_Y_BASE:w          { NoDDChk }\r
+       \r
+       mov (1) MaskA:uw        r[ECM_AddrReg, wEdgeCntlMap_IntRightVert]:uw\r
+\r
+       // tc0 has bTc0_v33_Y + bTc0_v23_Y + bTc0_v13_Y + bTc0_v03_Y\r
+       mov (16) Mbaff_TC0(0,0)<1>              r[ECM_AddrReg, bTc0_v03_Y]<1;4,0>:ub\r
+\r
+//     CALL(FILTER_Y_MBAFF, 1)\r
+       PRED_CALL(-f0.0, FILTER_Y_MBAFF, 1)\r
+\r
+BYPASS_V3_Y:\r
+//-----------------------------------------------\r
diff --git a/i965_drv_video/shaders/h264/ildb/AVC_ILDB_Filter_UV_h.asm b/i965_drv_video/shaders/h264/ildb/AVC_ILDB_Filter_UV_h.asm
new file mode 100644 (file)
index 0000000..168df0f
--- /dev/null
@@ -0,0 +1,145 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+////////// AVC ILDB filter horizontal UV ///////////////////////////////////////////////////////\r
+//\r
+//     This filter code prepares the src data and control data for ILDB filtering on all horizontal edges of UV.\r
+//\r
+//     It sssumes the data for horizontal de-blocking is already transposed.  \r
+//\r
+//             Chroma:\r
+//\r
+//             +-------+-------+               H0 Edge\r
+//             |               |               |\r
+//             |               |               |\r
+//             |               |               |\r
+//             +-------+-------+               H1 Edge\r
+//             |               |               |\r
+//             |               |               |\r
+//             |               |               |\r
+//             +-------+-------+\r
+//\r
+/////////////////////////////////////////////////////////////////////////////\r
+\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0xBBBC:w\r
+#endif \r
+\r
+//=============== Chroma deblocking ================\r
+\r
+//---------- Deblock U external top edge ----------\r
+       and.z.f0.0  (1) null:w          r[ECM_AddrReg, BitFlags]:ub             FilterTopMbEdgeFlag:w           // Check for FilterTopMbEdgeFlag \r
+//    (f0.0)   jmpi    BYPASS_EXT_TOP_EDGE_UV  \r
+\r
+       // Get horizontal border edge control data.\r
+       \r
+       //***** Need to take every other bit to form U maskA and mask B\r
+       // Get Luma maskA and maskB     \r
+       shr (16)        TempRow0(0)<1>          r[ECM_AddrReg, wEdgeCntlMapA_ExtTopHorz0]<0;1,0>:uw             RRampW(0)\r
+       shr (16)        TempRow1(0)<1>          r[ECM_AddrReg, wEdgeCntlMapB_ExtTopHorz0]<0;1,0>:uw             RRampW(0)\r
+               \r
+    (f0.0)     jmpi    ILDB_LABEL(BYPASS_EXT_TOP_EDGE_UV)                      \r
+\r
+       // Extract UV MaskA and MaskB from every other bit of Y masks\r
+       and.nz.f0.0 (8) null:w                  TempRow0(0)<16;8,2>             1:w\r
+       and.nz.f0.1 (8) null:w                  TempRow1(0)<16;8,2>             1:w\r
+\r
+//---------- Deblock U external edge ----------\r
+       //      p1 = Prev MB U row 0\r
+       //      p0 = Prev MB U row 1\r
+       //      q0 = Cur MB U row 0\r
+       //      q1 = Cur MB U row 1\r
+//     mov (1) P_AddrReg:w             PREV_MB_U_BASE:w                                                                        { NoDDClr }\r
+       mov (1) P_AddrReg:w             TOP_MB_U_BASE:w                                                                         { NoDDClr }\r
+       mov (1) Q_AddrReg:w             SRC_MB_U_BASE:w                                                                         { NoDDChk }\r
+\r
+       // alpha = bAlphaTop0_Cb, beta = bBetaTop0_Cb\r
+       mov     (2)     alpha<1>:w      r[ECM_AddrReg, bAlphaTop0_Cb]<2;2,1>:ub                                 { NoDDClr } \r
+       // tc0 has bTc0_h03_0_Cb + bTc0_h02_0_Cb + bTc0_h01_0_Cb + bTc0_h00_0_Cb\r
+       mov (4) tc0<1>:ub       r[ECM_AddrReg, bTc0_h00_0_Cb]<4;4,1>:ub                                 { NoDDChk } \r
+               \r
+       // UV MaskA and MaskB\r
+       mov (2)         MaskA<1>:uw                     f0.0<2;2,1>:uw\r
+\r
+       CALL(FILTER_UV, 1)      \r
+\r
+//---------- Deblock V external top edge ----------\r
+       //      p1 = Prev MB V row 0\r
+       //      p0 = Prev MB V row 1\r
+       //      q0 = Cur MB V row 0\r
+       //      q1 = Cur MB V row 1\r
+//     mov (1) P_AddrReg:w             PREV_MB_V_BASE:w                { NoDDClr }\r
+       mov (1) P_AddrReg:w             TOP_MB_V_BASE:w         { NoDDClr }\r
+       mov (1) Q_AddrReg:w             SRC_MB_V_BASE:w                 { NoDDChk }\r
+\r
+       // alpha = bAlphaTop0_Cr, beta = bBetaTop0_Cr\r
+       mov     (2)     alpha<1>:w      r[ECM_AddrReg, bAlphaTop0_Cr]<2;2,1>:ub         { NoDDClr }\r
+       \r
+       // tc0 has bTc0_h03_0_Cr + bTc0_h02_0_Cr + bTc0_h01_0_Cr + bTc0_h00_0_Cr\r
+       mov (4) tc0<1>:ub       r[ECM_AddrReg, bTc0_h00_0_Cr]<4;4,1>:ub         { NoDDChk }\r
+\r
+       // UV MaskA and MaskB\r
+       mov (2)         f0.0<1>:uw              MaskA<2;2,1>:uw\r
+\r
+       CALL(FILTER_UV, 1)      \r
+\r
+ILDB_LABEL(BYPASS_EXT_TOP_EDGE_UV):\r
+\r
+       // Set EdgeCntlMap2 = 0, so it always uses bS < 4 algorithm.\r
+\r
+       // Bypass deblocking if FilterInternal4x4EdgesFlag = 0  \r
+//     and.z.f0.0 (1) null:w   r[ECM_AddrReg, BitFlags]:ub             FilterInternal4x4EdgesFlag:w            // Check for FilterInternal4x4EdgesFlag \r
+//    (f0.0)   jmpi    BYPASS_4x4_DEBLOCK_H\r
+\r
+//---------- Deblock U internal horz middle edge ----------\r
+\r
+       //***** Need to take every other bit to form U maskA\r
+       // Get Luma maskA and maskB     \r
+       shr (16)        TempRow0(0)<1>          r[ECM_AddrReg, wEdgeCntlMap_IntMidHorz]<0;1,0>:uw               RRampW(0)\r
+\r
+       //      p1 = Cur MB U row 2\r
+       //      p0 = Cur MB U row 3\r
+       //      q0 = Cur MB U row 4\r
+       //      q1 = Cur MB U row 5\r
+       mov (1) P_AddrReg:w             4*UV_ROW_WIDTH+SRC_MB_U_BASE:w                                  { NoDDClr }             // Skip 2 U rows and 2 V rows\r
+       mov (1) Q_AddrReg:w             8*UV_ROW_WIDTH+SRC_MB_U_BASE:w                                  { NoDDChk }\r
+\r
+       // alpha = bAlphaInternal_Cb, beta = bBetaInternal_Cb\r
+       mov     (2)     alpha<1>:w      r[ECM_AddrReg, bAlphaInternal_Cb]<2;2,1>:ub             { NoDDClr }\r
+       // tc0 has bTc0_h23_Cb + bTc0_h22_Cb + bTc0_h21_Cb + bTc0_h20_Cb                \r
+       mov (4) tc0<1>:ub       r[ECM_AddrReg, bTc0_h20_Cb]<4;4,1>:ub                           { NoDDChk }\r
+\r
+       // Extract UV MaskA and MaskB from every other bit of Y masks\r
+       and.nz.f0.0 (8) null:w                  TempRow0(0)<16;8,2>             1:w\r
+\r
+       // UV MaskA and MaskB\r
+       mov (1) f0.1:uw         0:w\r
+       mov (1) MaskB:uw        0:w                                                                                                     { NoDDClr }\r
+       mov (1) MaskA:uw        f0.0:uw                                                                                         { NoDDChk }\r
+\r
+       CALL(FILTER_UV, 1)      \r
+\r
+//---------- Deblock V internal horz middle edge ----------\r
+       //      p1 = Cur MB V row 2\r
+       //      p0 = Cur MB V row 3\r
+       //      q0 = Cur MB V row 4\r
+       //      q1 = Cur MB V row 5\r
+       mov (1) P_AddrReg:w             4*UV_ROW_WIDTH+SRC_MB_V_BASE:w                                  { NoDDClr }             // Skip 2 U rows and 2 V rows\r
+       mov (1) Q_AddrReg:w             8*UV_ROW_WIDTH+SRC_MB_V_BASE:w                                  { NoDDChk }\r
+\r
+       // alpha = bAlphaInternal_Cr, beta = bBetaInternal_Cr\r
+       mov     (2)     alpha<1>:w      r[ECM_AddrReg, bAlphaInternal_Cr]<2;2,1>:ub             { NoDDClr }\r
+       // tc0 has bTc0_h23_Cr + bTc0_h22_Cr + bTc0_h21_Cr + bTc0_h20_Cr\r
+       mov (4) tc0<1>:ub       r[ECM_AddrReg, bTc0_h20_Cr]<4;4,1>:ub                           { NoDDChk }\r
+\r
+       // UV MaskA and MaskB\r
+       mov (2)         f0.0<1>:uw              MaskA<2;2,1>:uw\r
+\r
+       CALL(FILTER_UV, 1)      \r
+\r
+//BYPASS_4x4_DEBLOCK_H:\r
diff --git a/i965_drv_video/shaders/h264/ildb/AVC_ILDB_Filter_UV_v.asm b/i965_drv_video/shaders/h264/ildb/AVC_ILDB_Filter_UV_v.asm
new file mode 100644 (file)
index 0000000..8d331a0
--- /dev/null
@@ -0,0 +1,145 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+////////// AVC LDB filter vertical UV ///////////////////////////////////////////////////////\r
+//\r
+//     This filter code prepares the src data and control data for ILDB filtering on all vertical edges of UV.\r
+//\r
+//     It sssumes the data for vertical de-blocking is already transposed.  \r
+//\r
+//             Chroma:\r
+//\r
+//             +-------+-------+\r
+//             |               |               |\r
+//             |               |               |\r
+//             |               |               |\r
+//             +-------+-------+\r
+//             |               |               |\r
+//             |               |               |\r
+//             |               |               |\r
+//             +-------+-------+\r
+//\r
+//             V0              V1              \r
+//             Edge    Edge    \r
+//\r
+/////////////////////////////////////////////////////////////////////////////\r
+\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0xBBBC:w\r
+#endif \r
+\r
+//=============== Chroma deblocking ================\r
+\r
+       and.z.f0.0  (1) null:w          r[ECM_AddrReg, BitFlags]:ub             FilterLeftMbEdgeFlag:w          // Check for FilterLeftMbEdgeFlag \r
+//    (f0.0)   jmpi    BYPASS_EXT_LEFT_EDGE_UV \r
\r
+       // Get vertical border edge control data.  \r
+       \r
+       // Get Luma maskA and maskB     \r
+       shr (16)        TempRow0(0)<1>          r[ECM_AddrReg, wEdgeCntlMapA_ExtLeftVert0]<0;1,0>:uw            RRampW(0)\r
+       shr (16)        TempRow1(0)<1>          r[ECM_AddrReg, wEdgeCntlMapB_ExtLeftVert0]<0;1,0>:uw            RRampW(0)\r
+       \r
+    (f0.0)     jmpi    ILDB_LABEL(BYPASS_EXT_LEFT_EDGE_UV)\r
+\r
+       // Extract UV MaskA and MaskB from every other bit of Y masks\r
+       and.nz.f0.0 (8) null:w                  TempRow0(0)<16;8,2>             1:w\r
+       and.nz.f0.1 (8) null:w                  TempRow1(0)<16;8,2>             1:w\r
+\r
+//---------- Deblock U external edge ----------\r
+       //      p1 = Prev MB U row 0\r
+       //      p0 = Prev MB U row 1\r
+       //      q0 = Cur MB U row 0\r
+       //      q1 = Cur MB U row 1\r
+       mov (1) P_AddrReg:w             PREV_MB_U_BASE:w                                                                        { NoDDClr }\r
+       mov (1) Q_AddrReg:w             SRC_MB_U_BASE:w                                                                         { NoDDChk }\r
+\r
+       // alpha = bAlphaLeft0_Cb, beta = bBetaLeft0_Cb\r
+       mov     (2)     alpha<1>:w      r[ECM_AddrReg, bAlphaLeft0_Cb]<2;2,1>:ub                                { NoDDClr }\r
+       // tc0 has bTc0_v30_0_Cb + bTc0_v20_0_Cb + bTc0_v10_0_Cb + bTc0_v00_0_Cb\r
+       mov (4) tc0<1>:ub       r[ECM_AddrReg, bTc0_v00_0_Cb]<4;4,1>:ub                                 { NoDDChk }\r
+       \r
+       // UV MaskA and MaskB\r
+       mov (2)         MaskA<1>:uw                     f0.0<2;2,1>:uw\r
+\r
+       CALL(FILTER_UV, 1)      \r
+\r
+//---------- Deblock V external edge ----------\r
+       //      p1 = Prev MB V row 0\r
+       //      p0 = Prev MB V row 1\r
+       //      q0 = Cur MB V row 0\r
+       //      q1 = Cur MB V row 1\r
+       mov (1) P_AddrReg:w             PREV_MB_V_BASE:w                                                                        { NoDDClr }             \r
+       mov (1) Q_AddrReg:w             SRC_MB_V_BASE:w                                                                         { NoDDChk }\r
+\r
+       // for vert edge: alpha = bAlphaLeft0_Cr, beta = bBetaLeft0_Cr\r
+       mov     (2)     alpha<1>:w      r[ECM_AddrReg, bAlphaLeft0_Cr]<2;2,1>:ub                                { NoDDClr }\r
+       \r
+       // tc0 has bTc0_v30_0_Cr + bTc0_v20_0_Cr + bTc0_v10_0_Cr + bTc0_v00_0_Cr\r
+       mov (4) tc0<1>:ub       r[ECM_AddrReg, bTc0_v00_0_Cr]<4;4,1>:ub                                 { NoDDChk }\r
+\r
+       // UV MaskA and MaskB\r
+       mov (2)         f0.0<1>:uw              MaskA<2;2,1>:uw\r
+\r
+       CALL(FILTER_UV, 1)      \r
+\r
+\r
+ILDB_LABEL(BYPASS_EXT_LEFT_EDGE_UV):\r
+       // Set EdgeCntlMap2 = 0, so it always uses bS < 4 algorithm.\r
+       // Same alpha and beta for all internal vert and horiz edges \r
+\r
+\r
+       //***** Need to take every other bit to form U or V maskA\r
+       // Get Luma maskA and maskB     \r
+       shr (16)        TempRow0(0)<1>          r[ECM_AddrReg, wEdgeCntlMap_IntMidVert]<0;1,0>:uw               RRampW(0)\r
+\r
+//---------- Deblock U internal edge ----------\r
+       //      p1 = Cur MB U row 2\r
+       //      p0 = Cur MB U row 3\r
+       //      q0 = Cur MB U row 4\r
+       //      q1 = Cur MB U row 5\r
+       mov (1) P_AddrReg:w             4*UV_ROW_WIDTH+SRC_MB_U_BASE:w                                  { NoDDClr }\r
+       mov (1) Q_AddrReg:w             8*UV_ROW_WIDTH+SRC_MB_U_BASE:w                                  { NoDDChk }\r
+\r
+       // alpha = bAlphaInternal_Cb, beta = bBetaInternal_Cb\r
+       mov     (2)     alpha<1>:w      r[ECM_AddrReg, bAlphaInternal_Cb]<2;2,1>:ub             { NoDDClr }\r
+\r
+       // tc0 has bTc0_v32_Cb + bTc0_v22_Cb + bTc0_v12_Cb + bTc0_v02_Cb        \r
+       mov (4) tc0<1>:ub       r[ECM_AddrReg, bTc0_v02_Cb]<4;4,1>:ub                           { NoDDChk }\r
+\r
+       // Extract UV MaskA and MaskB from every other bit of Y masks\r
+       and.nz.f0.0 (8) null:w                  TempRow0(0)<16;8,2>             1:w\r
+\r
+       // UV MaskA and MaskB\r
+       mov (1) f0.1:uw         0:w\r
+       mov (1) MaskB:uw        0:w                                                                                                     { NoDDClr }\r
+       mov (1) MaskA:uw        f0.0:uw                                                                                         { NoDDChk }\r
+       \r
+       CALL(FILTER_UV, 1)      \r
+\r
+\r
+//---------- Deblock V internal edge ----------\r
+       //      P1 = Cur MB V row 2\r
+       //      P0 = Cur MB V row 3\r
+       //      Q0 = Cur MB V row 4\r
+       //      Q1 = Cur MB V row 5\r
+       mov (1) P_AddrReg:w             4*UV_ROW_WIDTH+SRC_MB_V_BASE:w                                  { NoDDClr }\r
+       mov (1) Q_AddrReg:w             8*UV_ROW_WIDTH+SRC_MB_V_BASE:w                                  { NoDDChk }\r
+\r
+       // alpha = bAlphaInternal_Cr, beta = bBetaInternal_Cr\r
+       mov     (2)     alpha<1>:w      r[ECM_AddrReg, bAlphaInternal_Cr]<2;2,1>:ub             { NoDDClr }     \r
+\r
+       // tc0 has bTc0_v32_Cr + bTc0_v22_Cr + bTc0_v12_Cr + bTc0_v02_Cr        \r
+       mov (4) tc0<1>:ub       r[ECM_AddrReg, bTc0_v02_Cr]<4;4,1>:ub                           { NoDDChk }\r
+\r
+       // UV MaskA and MaskB\r
+       mov (2)         f0.0<1>:uw              MaskA<2;2,1>:uw\r
+\r
+       CALL(FILTER_UV, 1)      \r
+\r
+\r
+//BYPASS_4x4_DEBLOCK_V:\r
diff --git a/i965_drv_video/shaders/h264/ildb/AVC_ILDB_Filter_Y_h.asm b/i965_drv_video/shaders/h264/ildb/AVC_ILDB_Filter_Y_h.asm
new file mode 100644 (file)
index 0000000..45ab4df
--- /dev/null
@@ -0,0 +1,199 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+////////// AVC ILDB filter horizontal Y ///////////////////////////////////////////////////////\r
+//\r
+//     This filter code prepares the src data and control data for ILDB filtering on all horizontal edges of Y.\r
+//\r
+//     It sssumes the data for horizontal de-blocking is already transposed.  \r
+//\r
+//             Luma:\r
+//\r
+//             +-------+-------+-------+-------+               H0  Edge\r
+//             |               |               |               |               |\r
+//             |               |               |               |               |\r
+//             |               |               |               |               |\r
+//             +-------+-------+-------+-------+               H1 Edge\r
+//             |               |               |               |               |\r
+//             |               |               |               |               |\r
+//             |               |               |               |               |\r
+//             +-------+-------+-------+-------+               H2      Edge\r
+//             |               |               |               |               |\r
+//             |               |               |               |               |\r
+//             |               |               |               |               |\r
+//             +-------+-------+-------+-------+               H3 Edge\r
+//             |               |               |               |               |\r
+//             |               |               |               |               |\r
+//             |               |               |               |               |\r
+//             +-------+-------+-------+-------+\r
+//\r
+/////////////////////////////////////////////////////////////////////////////\r
+\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0xBBBB:w\r
+#endif \r
+       \r
+\r
+//========== Luma deblocking ==========\r
+\r
+\r
+//---------- Deblock Y external top edge (H0)  ----------      \r
+\r
+       // Bypass deblocking if it is the top edge of the picture.  \r
+       and.z.f0.0  (1) null:w          r[ECM_AddrReg, BitFlags]:ub             FilterTopMbEdgeFlag:w           // Check for FilterTopMbEdgeFlag \r
+\r
+//     and.z.f0.1 (1)  null:uw         r[ECM_AddrReg, wEdgeCntlMapA_ExtTopHorz0]:uw            0xFFFF:uw       // MaskA = 0? \r
+\r
+       // Get (alpha >> 2) + 2\r
+       shr (1) alpha2:w                r[ECM_AddrReg, bAlphaTop0_Y]:ub         2:w                     // alpha >> 2\r
+\r
+       //      p3 = Prev MB Y row 0 = r[P_AddrReg, 0]<16;16,1> \r
+       //      p2 = Prev MB Y row 1 = r[P_AddrReg, 16]<16;16,1>\r
+       //      p1 = Prev MB Y row 2 = r[P_AddrReg, 32]<16;16,1>\r
+       //      p0 = Prev MB Y row 3 = r[P_AddrReg, 48]<16;16,1>\r
+       //      q0 = Cur MB Y row 0  = r[Q_AddrReg, 0]<16;16,1> \r
+       //      q1 = Cur MB Y row 1  = r[Q_AddrReg, 16]<16;16,1>\r
+       //      q2 = Cur MB Y row 2  = r[Q_AddrReg, 32]<16;16,1>\r
+       //      q3 = Cur MB Y row 3  = r[Q_AddrReg, 48]<16;16,1>\r
+//     mov (1) P_AddrReg:w             PREV_MB_Y_BASE:w                { NoDDClr }\r
+       mov (1) P_AddrReg:w             TOP_MB_Y_BASE:w         { NoDDClr }\r
+       mov (1) Q_AddrReg:w             SRC_MB_Y_BASE:w         { NoDDChk }\r
+       \r
+       // Get horizontal border edge control data\r
+       // alpha = bAlphaTop0_Y \r
+       // beta = bBetaTop0_Y\r
+       mov     (2)     alpha<1>:w      r[ECM_AddrReg, bAlphaTop0_Y]<2;2,1>:ub                  { NoDDClr }             // 2 channels for alpha and beta\r
+       \r
+       mov (2) MaskA<1>:uw     r[ECM_AddrReg, wEdgeCntlMapA_ExtTopHorz0]<2;2,1>:uw             { NoDDClr, NoDDChk }\r
+\r
+       // tc0 has bTc0_h03_0_Y | bTc0_h02_0_Y | bTc0_h01_0_Y | bTc0_h00_0_Y\r
+       mov (4) tc0<1>:ub       r[ECM_AddrReg, bTc0_h00_0_Y]<4;4,1>:ub                  { NoDDChk }\r
+\r
+//    (f0.0)   jmpi    BYPASS_EXT_TOP_EDGE_Y   \r
+//     (f0.0.anyv)      jmpi   BYPASS_EXT_TOP_EDGE_Y\r
+       \r
+       add (1) alpha2:w                alpha2:w                2:w                                                             // alpha2 = (alpha >> 2) + 2  \r
+               \r
+//     CALL(FILTER_Y, 1)\r
+       PRED_CALL(-f0.0, FILTER_Y, 1)\r
+\r
+//BYPASS_EXT_TOP_EDGE_Y:\r
+//------------------------------------------------------------------\r
+       // Same alpha, alpha2, beta and MaskB for all internal edges \r
+\r
+       // Get (alpha >> 2) + 2\r
+       shr (1) alpha2:w                r[ECM_AddrReg, bAlphaInternal_Y]:ub             2:w                     // alpha >> 2\r
+\r
+       // alpha = bAlphaInternal_Y \r
+       // beta = bBetaInternal_Y\r
+       mov     (2)     alpha<1>:w      r[ECM_AddrReg, bAlphaInternal_Y]<2;2,1>:ub              { NoDDClr }\r
+\r
+       // Set MaskB = 0 for all 3 int edges, so it always uses bS < 4 algorithm.\r
+       mov (1) MaskB:uw        0:w                                                                     { NoDDChk }\r
+\r
+       add (1) alpha2:w                alpha2:w                2:w                                                             // alpha2 = (alpha >> 2) + 2  \r
+               \r
+\r
+//---------- Deblock Y internal top edge (H1)  ----------\r
+\r
+       // Bypass deblocking if FilterInternal4x4EdgesFlag = 0  \r
+       and.z.f0.0 (1) null:w   r[ECM_AddrReg, BitFlags]:ub             FilterInternal4x4EdgesFlag:w            // Check for FilterInternal4x4EdgesFlag \r
+\r
+       //      p3 = Cur MB Y row 0 = r[P_AddrReg, 0]<16;16,1> \r
+       //      p2 = Cur MB Y row 1 = r[P_AddrReg, 16]<16;16,1>\r
+       //      p1 = Cur MB Y row 2 = r[P_AddrReg, 32]<16;16,1>\r
+       //      p0 = Cur MB Y row 3 = r[P_AddrReg, 48]<16;16,1>\r
+       //      q0 = Cur MB Y row 4 = r[Q_AddrReg, 0]<16;16,1> \r
+       //      q1 = Cur MB Y row 5 = r[Q_AddrReg, 16]<16;16,1>\r
+       //      q2 = Cur MB Y row 6 = r[Q_AddrReg, 32]<16;16,1>\r
+       //      q3 = Cur MB Y row 7 = r[Q_AddrReg, 48]<16;16,1>\r
+       mov (1) P_AddrReg:w             SRC_MB_Y_BASE:w                                 { NoDDClr }\r
+       mov (1) Q_AddrReg:w             4*Y_ROW_WIDTH+SRC_MB_Y_BASE:w   { NoDDChk }\r
+\r
+       mov (1) MaskA:uw        r[ECM_AddrReg, wEdgeCntlMap_IntTopHorz]:uw              { NoDDClr }\r
+       \r
+       // tc0 has bTc0_h13_Y + bTc0_h12_Y + bTc0_h11_Y + bTc0_h10_Y            \r
+       mov (4) tc0<1>:ub       r[ECM_AddrReg, bTc0_h10_Y]<4;4,1>:ub                    { NoDDChk }\r
+\r
+//    (f0.0)   jmpi    BYPASS_4x4_DEBLOCK_H\r
+//     (f0.0.anyv)      jmpi   BYPASS_4x4_DEBLOCK_H\r
+\r
+//     CALL(FILTER_Y, 1)\r
+       PRED_CALL(-f0.0, FILTER_Y, 1)\r
+\r
+//BYPASS_4x4_DEBLOCK_H:\r
+//------------------------------------------------------------------\r
+\r
+\r
+//---------- Deblock Y internal mid horizontal edge (H2) ----------\r
+\r
+       // Bypass deblocking if FilterInternal8x8EdgesFlag = 0  \r
+       and.z.f0.0 (1) null:w   r[ECM_AddrReg, BitFlags]:ub             FilterInternal8x8EdgesFlag:w            // Check for FilterInternal4x4EdgesFlag \r
+\r
+//     and.z.f0.1 (1)  null:uw         r[ECM_AddrReg, wEdgeCntlMap_IntMidHorz]:uw              0xFFFF:uw       // MaskA = 0? \r
+\r
+       //      p3 = Cur MB Y row 4  = r[P_AddrReg, 0]<16;16,1> \r
+       //      p2 = Cur MB Y row 5  = r[P_AddrReg, 16]<16;16,1>\r
+       //      p1 = Cur MB Y row 6  = r[P_AddrReg, 32]<16;16,1>\r
+       //      p0 = Cur MB Y row 7  = r[P_AddrReg, 48]<16;16,1>\r
+       //      q0 = Cur MB Y row 8  = r[Q_AddrReg, 0]<16;16,1> \r
+       //      q1 = Cur MB Y row 9  = r[Q_AddrReg, 16]<16;16,1>\r
+       //      q2 = Cur MB Y row 10 = r[Q_AddrReg, 32]<16;16,1>\r
+       //      q3 = Cur MB Y row 11 = r[Q_AddrReg, 48]<16;16,1>\r
+       mov (1) P_AddrReg:w             4*Y_ROW_WIDTH+SRC_MB_Y_BASE:w      { NoDDClr }\r
+       mov (1) Q_AddrReg:w             8*Y_ROW_WIDTH+SRC_MB_Y_BASE:w      { NoDDChk }\r
+\r
+       mov (1) MaskA:uw        r[ECM_AddrReg, wEdgeCntlMap_IntMidHorz]:uw      { NoDDClr }\r
+//     mov (1) MaskB:uw        0:w                                             // Set MaskB = 0, so it always uses bS < 4 algorithm.\r
+\r
+       // tc0 has bTc0_h23_Y + bTc0_h22_Y + bTc0_h21_Y + bTc0_h20_Y            \r
+       mov (4) tc0<1>:ub       r[ECM_AddrReg, bTc0_h20_Y]<4;4,1>:ub                    { NoDDChk }\r
+\r
+//    (f0.0)   jmpi    BYPASS_8x8_DEBLOCK_H\r
+//     (f0.0.anyv)      jmpi   BYPASS_8x8_DEBLOCK_H\r
+   \r
+//     CALL(FILTER_Y, 1)\r
+       PRED_CALL(-f0.0, FILTER_Y, 1)\r
+\r
+//BYPASS_8x8_DEBLOCK_H:\r
+//-----------------------------------------------\r
+\r
+\r
+//---------- Deblock Y internal bottom edge (H3) ----------     \r
+\r
+       // Bypass deblocking if FilterInternal4x4EdgesFlag = 0  \r
+       and.z.f0.0 (1) null:w   r[ECM_AddrReg, BitFlags]:ub             FilterInternal4x4EdgesFlag:w            // Check for FilterInternal4x4EdgesFlag \r
+\r
+//     and.z.f0.1 (1)  null:uw         r[ECM_AddrReg, wEdgeCntlMap_IntBotHorz]:uw              0xFFFF:uw       // MaskA = 0? \r
+\r
+       //      p3 = Cur MB Y row 8  = r[P_AddrReg, 0]<16;16,1> \r
+       //      p2 = Cur MB Y row 9  = r[P_AddrReg, 16]<16;16,1>\r
+       //      p1 = Cur MB Y row 10 = r[P_AddrReg, 32]<16;16,1>\r
+       //      p0 = Cur MB Y row 11 = r[P_AddrReg, 48]<16;16,1>\r
+       //      q0 = Cur MB Y row 12 = r[Q_AddrReg, 0]<16;16,1> \r
+       //      q1 = Cur MB Y row 13 = r[Q_AddrReg, 16]<16;16,1>\r
+       //      q2 = Cur MB Y row 14 = r[Q_AddrReg, 32]<16;16,1>\r
+       //      q3 = Cur MB Y row 15 = r[Q_AddrReg, 48]<16;16,1>\r
+       mov (1) P_AddrReg:w             8*Y_ROW_WIDTH+SRC_MB_Y_BASE:w           { NoDDClr } \r
+       mov (1) Q_AddrReg:w             12*Y_ROW_WIDTH+SRC_MB_Y_BASE:w      { NoDDChk } \r
+\r
+       \r
+       mov (1) MaskA:uw        r[ECM_AddrReg, wEdgeCntlMap_IntBotHorz]:uw      { NoDDClr }\r
+//     mov (1) MaskB:uw        0:w                                             // Set MaskB = 0, so it always uses bS < 4 algorithm.\r
+\r
+       // tc0 has bTc0_h33_Y + bTc0_h32_Y + bTc0_h31_Y + bTc0_h30_Y         \r
+       mov (4) tc0<1>:ub       r[ECM_AddrReg, bTc0_h30_Y]<4;4,1>:ub                    { NoDDChk }\r
+\r
+//    (f0.0)   jmpi    BYPASS_4x4_DEBLOCK_H2\r
+//     (f0.0.anyv)      jmpi   BYPASS_4x4_DEBLOCK_H2\r
+    \r
+//     CALL(FILTER_Y, 1)\r
+       PRED_CALL(-f0.0, FILTER_Y, 1)\r
+\r
+//BYPASS_4x4_DEBLOCK_H2:\r
+//-----------------------------------------------\r
diff --git a/i965_drv_video/shaders/h264/ildb/AVC_ILDB_Filter_Y_v.asm b/i965_drv_video/shaders/h264/ildb/AVC_ILDB_Filter_Y_v.asm
new file mode 100644 (file)
index 0000000..9d6bf0a
--- /dev/null
@@ -0,0 +1,203 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+////////// AVC ILDB filter vertical Y ///////////////////////////////////////////////////////\r
+//\r
+//     This filter code prepares the src data and control data for ILDB filtering on all vertical edges of Y.\r
+//\r
+//     It sssumes the data for vertical de-blocking is already transposed.  \r
+//\r
+//             Luma:\r
+//\r
+//             +-------+-------+-------+-------+\r
+//             |               |               |               |               |\r
+//             |               |               |               |               |\r
+//             |               |               |               |               |\r
+//             +-------+-------+-------+-------+\r
+//             |               |               |               |               |\r
+//             |               |               |               |               |\r
+//             |               |               |               |               |\r
+//             +-------+-------+-------+-------+\r
+//             |               |               |               |               |\r
+//             |               |               |               |               |\r
+//             |               |               |               |               |\r
+//             +-------+-------+-------+-------+\r
+//             |               |               |               |               |\r
+//             |               |               |               |               |\r
+//             |               |               |               |               |\r
+//             +-------+-------+-------+-------+\r
+//\r
+//             V0              V1              V2              V3\r
+//             Edge    Edge    Edge    Edge\r
+//\r
+/////////////////////////////////////////////////////////////////////////////\r
+\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0xBBBB:w\r
+#endif \r
+       \r
+\r
+//========== Luma deblocking ==========\r
+\r
+\r
+//---------- Deblock Y external left edge (V0) ----------      \r
+\r
+       // Bypass deblocking if it is left edge of the picture.  \r
+       and.z.f0.0  (1) null:w          r[ECM_AddrReg, BitFlags]:ub             FilterLeftMbEdgeFlag:w          // Check for FilterLeftMbEdgeFlag \r
+\r
+//     and.z.f0.1 (1)  null:uw         r[ECM_AddrReg, wEdgeCntlMapA_ExtLeftVert0]:uw           0xFFFF:uw       // MaskA = 0? \r
+\r
+       // Get (alpha >> 2) + 2\r
+       shr (1) alpha2:w                r[ECM_AddrReg, bAlphaLeft0_Y]:ub                2:w                     // alpha >> 2\r
+\r
+       //      p3 = Prev MB Y row 0 = r[P_AddrReg, 0]<16;16,1>\r
+       //      p2 = Prev MB Y row 1 = r[P_AddrReg, 16]<16;16,1>\r
+       //      p1 = Prev MB Y row 2 = r[P_AddrReg, 32]<16;16,1>\r
+       //      p0 = Prev MB Y row 3 = r[P_AddrReg, 48]<16;16,1>\r
+       //      q0 = Cur MB Y row 0  = r[Q_AddrReg, 0]<16;16,1>\r
+       //      q1 = Cur MB Y row 1  = r[Q_AddrReg, 16]<16;16,1>\r
+       //      q2 = Cur MB Y row 2  = r[Q_AddrReg, 32]<16;16,1>\r
+       //      q3 = Cur MB Y row 3  = r[Q_AddrReg, 48]<16;16,1>\r
+       mov (1) P_AddrReg:w             PREV_MB_Y_BASE:w                { NoDDClr }\r
+       mov (1) Q_AddrReg:w             SRC_MB_Y_BASE:w                 { NoDDChk }\r
+       \r
+       // Get vertical border edge control data  \r
+       // alpha = bAlphaLeft0_Y \r
+       // beta = bBetaLeft0_Y\r
+       mov     (2)     alpha<1>:w      r[ECM_AddrReg, bAlphaLeft0_Y]<2;2,1>:ub                 { NoDDClr }             // 2 channels for alpha and beta\r
+\r
+       mov (2) MaskA<1>:uw     r[ECM_AddrReg, wEdgeCntlMapA_ExtLeftVert0]<2;2,1>:uw    { NoDDClr, NoDDChk }\r
+       \r
+       // tc0 has bTc0_v30_0_Y | bTc0_v20_0_Y | bTc0_v10_0_Y | bTc0_v00_0_Y\r
+       mov (4) tc0<1>:ub       r[ECM_AddrReg, bTc0_v00_0_Y]<4;4,1>:ub                  { NoDDChk }\r
+\r
+//     (f0.0)  jmpi    BYPASS_EXT_LEFT_EDGE_Y  \r
+//     (f0.0.anyv)      jmpi   BYPASS_EXT_LEFT_EDGE_Y\r
+               \r
+       add (1) alpha2:w                alpha2:w                2:w                                                             // alpha2 = (alpha >> 2) + 2  \r
+               \r
+//     CALL(FILTER_Y, 1)\r
+       PRED_CALL(-f0.0, FILTER_Y, 1)\r
+\r
+\r
+//BYPASS_EXT_LEFT_EDGE_Y:\r
+//------------------------------------------------------------------\r
+       // Same alpha, alpha2, beta and MaskB for all internal edges \r
+\r
+       // Get (alpha >> 2) + 2\r
+       shr (1) alpha2:w                r[ECM_AddrReg, bAlphaInternal_Y]:ub             2:w                     // alpha >> 2\r
+\r
+       // alpha = bAlphaInternal_Y\r
+       // beta = bBetaInternal_Y\r
+       mov     (2)     alpha<1>:w      r[ECM_AddrReg, bAlphaInternal_Y]<2;2,1>:ub              { NoDDClr }\r
+\r
+       // Set MaskB = 0 for all 3 int edges, so it always uses bS < 4 algorithm.\r
+       mov (1) MaskB:uw        0:w                                                                                             { NoDDChk }\r
+\r
+       add (1) alpha2:w                alpha2:w                2:w                                                             // alpha2 = (alpha >> 2) + 2  \r
+\r
+\r
+//---------- Deblock Y internal left edge (V1) ----------\r
+\r
+       // Bypass deblocking if FilterInternal4x4EdgesFlag = 0  \r
+       and.z.f0.0  (1) null:w          r[ECM_AddrReg, BitFlags]:ub             FilterInternal4x4EdgesFlag:w            // Check for FilterInternal4x4EdgesFlag \r
+\r
+//     and.z.f0.1 (1)  null:uw         r[ECM_AddrReg, wEdgeCntlMap_IntLeftVert]:uw             0xFFFF:uw       // MaskA = 0? \r
+\r
+       //      p3 = Cur MB Y row 0 = r[P_AddrReg, 0]<16;16,1>  \r
+       //      p2 = Cur MB Y row 1 = r[P_AddrReg, 16]<16;16,1>\r
+       //      p1 = Cur MB Y row 2 = r[P_AddrReg, 32]<16;16,1>\r
+       //      p0 = Cur MB Y row 3 = r[P_AddrReg, 48]<16;16,1>\r
+       //      q0 = Cur MB Y row 4 = r[Q_AddrReg, 0]<16;16,1> \r
+       //      q1 = Cur MB Y row 5 = r[Q_AddrReg, 16]<16;16,1>\r
+       //      q2 = Cur MB Y row 6 = r[Q_AddrReg, 32]<16;16,1>\r
+       //      q3 = Cur MB Y row 7 = r[Q_AddrReg, 48]<16;16,1>\r
+       mov (1) P_AddrReg:w             SRC_MB_Y_BASE:w                                 { NoDDClr }\r
+       mov (1) Q_AddrReg:w             4*Y_ROW_WIDTH+SRC_MB_Y_BASE:w   { NoDDChk }\r
+       \r
+       mov (1) MaskA:uw        r[ECM_AddrReg, wEdgeCntlMap_IntLeftVert]:uw             { NoDDClr }\r
+\r
+       // tc0 has bTc0_v31_Y + bTc0_v21_Y + bTc0_v11_Y + bTc0_v01_Y    \r
+       mov (4) tc0<1>:ub       r[ECM_AddrReg, bTc0_v01_Y]<4;4,1>:ub                    { NoDDChk }\r
+\r
+//    (f0.0)   jmpi    BYPASS_4x4_DEBLOCK_V\r
+//     (f0.0.anyv)      jmpi   BYPASS_4x4_DEBLOCK_V\r
+\r
+//     CALL(FILTER_Y, 1)\r
+       PRED_CALL(-f0.0, FILTER_Y, 1)\r
+\r
+//BYPASS_4x4_DEBLOCK_V:\r
+//------------------------------------------------------------------\r
+\r
+\r
+//---------- Deblock Y internal mid vert edge (V2) ----------\r
+\r
+       // Bypass deblocking if FilterInternal8x8EdgesFlag = 0  \r
+       and.z.f0.0      (1)     null:w  r[ECM_AddrReg, BitFlags]:ub             FilterInternal8x8EdgesFlag:w            // Check for FilterInternal4x4EdgesFlag \r
+\r
+//     and.z.f0.1 (1)  null:uw         r[ECM_AddrReg, wEdgeCntlMap_IntMidVert]:uw              0xFFFF:uw       // MaskA = 0? \r
+\r
+       //      p3 = Cur MB Y row 4  = r[P_AddrReg, 0]<16;16,1>  \r
+       //      p2 = Cur MB Y row 5  = r[P_AddrReg, 16]<16;16,1> \r
+       //      p1 = Cur MB Y row 6  = r[P_AddrReg, 32]<16;16,1> \r
+       //      p0 = Cur MB Y row 7  = r[P_AddrReg, 48]<16;16,1> \r
+       //      q0 = Cur MB Y row 8  = r[Q_AddrReg, 0]<16;16,1>  \r
+       //      q1 = Cur MB Y row 9  = r[Q_AddrReg, 16]<16;16,1> \r
+       //      q2 = Cur MB Y row 10 = r[Q_AddrReg, 32]<16;16,1> \r
+       //      q3 = Cur MB Y row 11 = r[Q_AddrReg, 48]<16;16,1> \r
+       mov (1) P_AddrReg:w             4*Y_ROW_WIDTH+SRC_MB_Y_BASE:w   { NoDDClr }\r
+       mov (1) Q_AddrReg:w             8*Y_ROW_WIDTH+SRC_MB_Y_BASE:w   { NoDDChk }\r
+\r
+       mov (1) MaskA:uw        r[ECM_AddrReg, wEdgeCntlMap_IntMidVert]:uw              { NoDDClr }\r
+//     mov (1) MaskB:uw        0:w                                             // Set MaskB = 0, so it always uses bS < 4 algorithm.\r
+\r
+       // tc0 has bTc0_v32_Y + bTc0_v22_Y + bTc0_v12_Y + bTc0_v02_Y    \r
+       mov (4) tc0<1>:ub       r[ECM_AddrReg, bTc0_v02_Y]<4;4,1>:ub                    { NoDDChk }\r
+\r
+//    (f0.0)   jmpi    BYPASS_8x8_DEBLOCK_V\r
+//     (f0.0.anyv)      jmpi   BYPASS_8x8_DEBLOCK_V\r
+    \r
+//     CALL(FILTER_Y, 1)\r
+       PRED_CALL(-f0.0, FILTER_Y, 1)\r
+\r
+//BYPASS_8x8_DEBLOCK_V:\r
+//-----------------------------------------------\r
+\r
+\r
+//---------- Deblock Y interal right edge (V3) ----------       \r
+\r
+       // Bypass deblocking if FilterInternal4x4EdgesFlag = 0  \r
+       and.z.f0.0      (1)     null:w  r[ECM_AddrReg, BitFlags]:ub             FilterInternal4x4EdgesFlag:w            // Check for FilterInternal4x4EdgesFlag \r
+\r
+//     and.z.f0.1 (1)  null:uw         r[ECM_AddrReg, wEdgeCntlMap_IntRightVert]:uw            0xFFFF:uw       // MaskA = 0? \r
+\r
+       //      p3 = Cur MB Y row 8  = r[P_AddrReg, 0]<16;16,1> \r
+       //      p2 = Cur MB Y row 9  = r[P_AddrReg, 16]<16;16,1>\r
+       //      p1 = Cur MB Y row 10 = r[P_AddrReg, 32]<16;16,1>\r
+       //      p0 = Cur MB Y row 11 = r[P_AddrReg, 48]<16;16,1>\r
+       //      q0 = Cur MB Y row 12 = r[Q_AddrReg, 0]<16;16,1> \r
+       //      q1 = Cur MB Y row 13 = r[Q_AddrReg, 16]<16;16,1>\r
+       //      q2 = Cur MB Y row 14 = r[Q_AddrReg, 32]<16;16,1>\r
+       //      q3 = Cur MB Y row 15 = r[Q_AddrReg, 48]<16;16,1>\r
+       mov (1) P_AddrReg:w             8*Y_ROW_WIDTH+SRC_MB_Y_BASE:w           { NoDDClr }\r
+       mov (1) Q_AddrReg:w             12*Y_ROW_WIDTH+SRC_MB_Y_BASE:w      { NoDDChk }\r
+\r
+       mov (1) MaskA:uw        r[ECM_AddrReg, wEdgeCntlMap_IntRightVert]:uw    { NoDDClr }\r
+//     mov (1) MaskB:uw        0:w                                             // Set MaskB = 0, so it always uses bS < 4 algorithm.\r
+\r
+       // tc0 has bTc0_v33_Y + bTc0_v23_Y + bTc0_v13_Y + bTc0_v03_Y\r
+       mov (4) tc0<1>:ub       r[ECM_AddrReg, bTc0_v03_Y]<4;4,1>:ub                    { NoDDChk }\r
+\r
+//    (f0.0)   jmpi    BYPASS_4x4_DEBLOCK_V2\r
+//     (f0.0.anyv)      jmpi   BYPASS_4x4_DEBLOCK_V2\r
+    \r
+//     CALL(FILTER_Y, 1)\r
+       PRED_CALL(-f0.0, FILTER_Y, 1)\r
+\r
+//BYPASS_4x4_DEBLOCK_V2:\r
+//-----------------------------------------------\r
diff --git a/i965_drv_video/shaders/h264/ildb/AVC_ILDB_ForwardMsg.asm b/i965_drv_video/shaders/h264/ildb/AVC_ILDB_ForwardMsg.asm
new file mode 100644 (file)
index 0000000..96fe828
--- /dev/null
@@ -0,0 +1,57 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+//========== Forward message to root thread through gateway ==========\r
+// Each child thread write a byte into the root GRF r50 defiend in open Gataway.\r
+\r
+#if defined(_DEBUG) \r
+mov            (1)             EntrySignatureC:w                       0x7777:w\r
+#endif\r
+\r
+// Init payload to r0\r
+mov (8)        GatewayPayload<1>:ud    0:w                                                             //{ NoDDClr } \r
+\r
+// Forward a message:\r
+// Offset = x relative to r50 (defiend in open gataway), x = ORIX >> 4 [bit 28:16]\r
+// Need to shift left 16\r
+\r
+// shift 2 more bits for byte to word offset\r
+\r
+//shl  (1)             Offset_Length:ud                GateWayOffsetC:w                16:w            { NoDDClr, NoDDChk }\r
+shl    (1)             Offset_Length:ud                GateWayOffsetC:w                18:w            \r
+\r
+// 2 bytes offset\r
+add    (1)             Offset_Length:ud                        Offset_Length:ud                0x00020000:d    { NoDDClr }\r
+       \r
+// Length = 1 byte,    [bit 10:8 = 000]\r
+//000 xxxxxxxxxxxxx 00000 000 00000000 ==> 000x xxxx xxxx xxxx 0000 0000 0000 0000\r
+\r
+//mov (1)      DispatchID:ub                   r0.20:ub                // Dispatch ID\r
+\r
+//Move in EUid and Thread ID that we received from the PARENT thread\r
+mov (1)        EUID_TID:uw                     r0.6:uw                                                         { NoDDClr, NoDDChk }\r
+\r
+mov (1)        GatewayPayloadKey:uw    0x1212:uw                                                       { NoDDClr, NoDDChk }    // Key\r
+\r
+//mov  (4)             GatewayPayload<1>:ud    0:ud                                                            { NoDDClr, NoDDChk }    // Init payload low 4 dword\r
+\r
+// Write back one byte (value = 0xFF) to root thread GRF to indicate this child thread is finished\r
+// All lower 4 bytes must be assigned to the same byte value.\r
+mov    (4)             GatewayPayload<1>:ub    0xFFFF:uw                                                       { NoDDChk }\r
+\r
+// msg descriptor bit 15 set to '1' for notification\r
+#ifdef GW_DCN\r
+// For ILK, EOT bit should also be set to terminate the thread. This is to fix a timing related HW issue.\r
+//\r
+send (8)       null:ud                 m0                      GatewayPayload<8;8,1>:ud    MSG_GW_EOT  FWDMSGDSC+NOTIFYMSG\r
+#else\r
+send (8)       null:ud                 m0                      GatewayPayload<8;8,1>:ud    MSG_GW      FWDMSGDSC+NOTIFYMSG\r
+#endif // GW_DCN\r
+\r
+//========== Forward Msg Done ========================================\r
+\r
diff --git a/i965_drv_video/shaders/h264/ildb/AVC_ILDB_LumaThrdLimit.asm b/i965_drv_video/shaders/h264/ildb/AVC_ILDB_LumaThrdLimit.asm
new file mode 100644 (file)
index 0000000..714ee4d
--- /dev/null
@@ -0,0 +1,46 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+//========== Forward message to root thread through gateway ==========\r
+\r
+// Chroma root kenrel updates luma thread limit.\r
+\r
+#if defined(_DEBUG) \r
+mov            (1)             EntrySignatureC:w                       0x7788:w\r
+#endif\r
+\r
+// Init payload to r0\r
+mov (8)        GatewayPayload<1>:ud    0:w                                                             { NoDDClr } \r
+\r
+// Forward a message:\r
+// Offset = x relative to r50 (defiend in open gataway), x = ORIX >> 4 [bit 28:16]\r
+// Need to shift left 16\r
+\r
+mov    (1)             Offset_Length:ud                THREAD_LIMIT_OFFSET:ud                          { NoDDClr, NoDDChk }\r
+\r
+// Length = 1 byte,    [bit 10:8 = 000]\r
+//000 xxxxxxxxxxxxx 00000 000 00000000 ==> 000x xxxx xxxx xxxx 0000 0000 0000 0000\r
+\r
+//mov (1)      DispatchID:ub                   r0.20:ub                // Dispatch ID\r
+\r
+//  Copy EUid and Thread ID that we received from the PARENT thread\r
+mov (1)        EUID_TID:uw                     r0.6:uw                                                         { NoDDClr, NoDDChk }\r
+\r
+mov (1)        GatewayPayloadKey:uw    0x1212:uw                                                       { NoDDChk }     // Key\r
+\r
+//mov  (4)             GatewayPayload<1>:ud    0:ud                                                            { NoDDClr, NoDDChk }    // Init payload low 4 dword\r
+\r
+// Write back one byte (value = 0xFF) to root thread GRF to indicate this child thread is finished\r
+// All lower 4 bytes must be assigned to the same byte value.\r
+add    (1)             Temp1_W:w                               MaxThreads:uw   -OutstandingThreads:uw\r
+mov    (4)             GatewayPayload<1>:ub    Temp1_B:ub\r
+\r
+send (8)       GatewayResponse:ud              m0                      GatewayPayload<8;8,1>:ud    MSG_GW      FWDMSGDSC\r
+\r
+//========== Forward Msg Done ========================================\r
+\r
diff --git a/i965_drv_video/shaders/h264/ildb/AVC_ILDB_Luma_Core.asm b/i965_drv_video/shaders/h264/ildb/AVC_ILDB_Luma_Core.asm
new file mode 100644 (file)
index 0000000..edc8273
--- /dev/null
@@ -0,0 +1,419 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+#if !defined(__AVC_ILDB_LUMA_CORE__)   // Make sure this file is only included once\r
+#define __AVC_ILDB_LUMA_CORE__\r
+\r
+////////// AVC ILDB Luma Core /////////////////////////////////////////////////////////////////////////////////\r
+//\r
+//     This core performs AVC LUMA ILDB filtering on one horizontal edge (16 pixels) of a MB.  \r
+//     If data is transposed, it can also de-block a vertical edge.\r
+//\r
+//     Bafore calling this subroutine, caller needs to set the following parameters.\r
+//\r
+//     - EdgeCntlMap1                          //      Edge control map A\r
+//     - EdgeCntlMap2                          //      Edge control map B\r
+//     - P_AddrReg                                     //      Src and dest address register for P pixels\r
+//     - Q_AddrReg                                     //      Src and dest address register for Q pixels      \r
+//     - alpha                                         //  alpha corresponding to the edge to be filtered\r
+//     - beta                                          //  beta corresponding to the edge to be filtered\r
+//     - tc0                                           //      tc0  corresponding to the edge to be filtered\r
+//\r
+//\r
+//     +----+----+----+----+----+----+----+----+\r
+//     | p3 | p2 | P1 | p0 | q0 | q1 | q2 | q3 |\r
+//     +----+----+----+----+----+----+----+----+\r
+//\r
+//     p3 = r[P_AddrReg, 0]<16;16,1>  \r
+//     p2 = r[P_AddrReg, 16]<16;16,1> \r
+//     p1 = r[P_AddrReg, 32]<16;16,1> \r
+//     p0 = r[P_AddrReg, 48]<16;16,1> \r
+//     q0 = r[Q_AddrReg, 0]<16;16,1>  \r
+//     q1 = r[Q_AddrReg, 16]<16;16,1> \r
+//     q2 = r[Q_AddrReg, 32]<16;16,1> \r
+//     q3 = r[Q_AddrReg, 48]<16;16,1> \r
+//\r
+/////////////////////////////////////////////////////////////////////////////////////////////////////////////////\r
+\r
+// The region is both src and dest\r
+// P0-P3 and Q0-Q3 should be only used if they have not been modified to new values\r
+#undef         P3\r
+#undef         P2\r
+#undef         P1\r
+#undef         P0\r
+#undef         Q0\r
+#undef         Q1\r
+#undef         Q2\r
+#undef         Q3\r
+  \r
+#define P3             r[P_AddrReg,  0]<16;16,1>:ub\r
+#define P2             r[P_AddrReg, 16]<16;16,1>:ub\r
+#define P1             r[P_AddrReg, 32]<16;16,1>:ub\r
+#define P0             r[P_AddrReg, 48]<16;16,1>:ub\r
+#define Q0             r[Q_AddrReg,  0]<16;16,1>:ub\r
+#define Q1             r[Q_AddrReg, 16]<16;16,1>:ub\r
+#define Q2             r[Q_AddrReg, 32]<16;16,1>:ub\r
+#define Q3             r[Q_AddrReg, 48]<16;16,1>:ub\r
+\r
+// New region as dest\r
+#undef         NewP2\r
+#undef         NewP1\r
+#undef         NewP0\r
+#undef         NewQ0\r
+#undef         NewQ1\r
+#undef         NewQ2\r
+\r
+#define NewP2  r[P_AddrReg, 16]<1>:ub\r
+#define NewP1  r[P_AddrReg, 32]<1>:ub\r
+#define NewP0  r[P_AddrReg, 48]<1>:ub\r
+#define NewQ0  r[Q_AddrReg,  0]<1>:ub\r
+#define NewQ1  r[Q_AddrReg, 16]<1>:ub\r
+#define NewQ2  r[Q_AddrReg, 32]<1>:ub\r
+\r
+// Filter one luma edge\r
+FILTER_Y:\r
+\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0x1111:w\r
+#endif\r
+       //---------- Derive filterSampleflag in AVC spec, equition (8-469) ----------\r
+       // bS is in MaskA\r
+\r
+       // Src copy of the p3, p2, p1, p0, q0, q1, q2, q3\r
+//     mov (16) p0123_W(0)<1>          r[P_AddrReg]<16;16,1>:uw\r
+//     mov (16) p0123_W(1)<1>          r[P_AddrReg, 32]<16;16,1>:uw\r
+//     mov (16) q0123_W(0)<1>          r[Q_AddrReg]<16;16,1>:uw\r
+//     mov (16) q0123_W(1)<1>          r[Q_AddrReg, 32]<16;16,1>:uw\r
+\r
+       mov (2) f0.0<1>:uw              MaskA<2;2,1>:uw\r
+\r
+       add (16) q0_p0(0)<1>            Q0              -P0                             // q0-p0\r
+       add (16) TempRow0(0)<1>         P1              -P0                             // p1-p0\r
+       add (16) TempRow1(0)<1>         Q1              -Q0                             // q1-q0\r
+\r
+       // Build FilterSampleFlag\r
+       // abs(q0-p0) < alpha\r
+       (f0.0) cmp.l.f0.0 (16) null:w           (abs)q0_p0(0)                   alpha:w\r
+       // abs(p1-p0) < Beta\r
+       (f0.0) cmp.l.f0.0 (16) null:w           (abs)TempRow0(0)                beta:w\r
+       // abs(q1-q0) < Beta\r
+       (f0.0) cmp.l.f0.0 (16) null:w           (abs)TempRow1(0)                beta:w\r
+\r
+       //-----------------------------------------------------------------------------------------\r
+\r
+    (f0.0)     if      (16)            Y_ENDIF1\r
+               // For channels whose edge control map1 = 1 ---> perform de-blocking\r
+\r
+//             mov (1)         f0.1:uw         MaskB:uw        {NoMask}                // Now check for which algorithm to apply\r
+\r
+               // (abs)ap = |p2-p0|\r
+               add (16) ap(0)<1>               P2              -P0             // ap = p2-p0\r
+               // (abs)aq = |q2-q0|\r
+               add (16) aq(0)<1>               Q2              -Q0             // aq = q2-q0\r
+\r
+               // Make a copy of unmodified p0 and p1 for use in q0'and q1' calculation\r
+               mov (16) p0123_W(1)<1>          r[P_AddrReg, 32]<16;16,1>:uw            {NoMask}\r
+\r
+               (f0.1)  if      (16)            Y_ELSE2\r
+\r
+                       // For channels whose edge control map2 = 1 ---> bS = 4 algorithm\r
+\r
+                       // Compute q0', q1' and q2'\r
+                       //-----------------------------------------------------------------------------\r
+                       // bS = 4 Algorithm :                   \r
+                       //\r
+                       // gama = |p0-q0| < ((alpha >> 2) + 2) \r
+                       // deltap = (ap<beta) && gama;                  // deep filter flag\r
+                       //      if (deltap) {\r
+                       //              p0' = (        p2 +2*p1 +2*p0 +2*q0 + q1 + 4) >> 3; \r
+                       //              p1' = (        p2 +  p1 +  p0 +  q0      + 2) >> 2;\r
+                       //              p2' = (2*p3 +3*p2 +  p1 +  p0 +  q0      + 4) >> 3;\r
+                       //      } else {  \r
+                       //              p0' = (            2*p1 +  p0 +  q1      + 2) >> 2;\r
+                       //      }\r
+                       //-----------------------------------------------------------------------------\r
+\r
+                       // gama = |p0-q0| < ((alpha >> 2) + 2) = |p0-q0| < alpha2  \r
+                       cmp.l.f0.1 (16) null:w  (abs)q0_p0(0)   alpha2:w\r
+\r
+                       // Common P01 = p0 + p1\r
+                       add (16)        P0_plus_P1(0)<1>        P0                      P1      \r
+\r
+                       // Common Q01 = q0 + q1\r
+                       add (16)        Q0_plus_Q1(0)<1>        Q0                      Q1\r
+\r
+//                     mov (1) CTemp1_W:w              f0.1:uw                                         {NoMask}\r
+                       mov (1) f0.0:uw                 f0.1:uw                                         {NoMask}\r
+       \r
+                       // deltap = ((abs)ap < beta) && gama\r
+                       (f0.1) cmp.l.f0.1 (16) null:w   (abs)ap(0)              beta<0;1,0>:w                                                   // (abs)ap < beta ?\r
+\r
+                       // deltaq = ((abs)aq < beta) && gama\r
+                       (f0.0) cmp.l.f0.0 (16) null:w   (abs)aq(0)              beta<0;1,0>:w                                                   // (abs)aq < beta ?\r
+\r
+\r
+//                     mov (1) CTemp1_W:w              f0.0:uw                                         {NoMask}                                        // gama = |p0-q0| < ((alpha >> 2) + 2) for each channel \r
+//                     and (1)         f0.1:w          f0.1:uw         CTemp1_W:w              {NoMask}                                        // deltap = (ap<beta) && gama\r
+\r
+\r
+                       (f0.1)  if      (16)            Y_ELSE3                 // for channels its deltap = true\r
+\r
+                       add (16)        P2_plus_P3(0)<1>        P2              P3\r
+                       \r
+                       // A =  (p1 + p0) + q0 = P01 + q0\r
+                       add (16)        A(0)<1>                 P0_plus_P1(0)           Q0                                                      // A =  P01 + q0\r
+\r
+                       // Now acc0 = A\r
+\r
+                       // B =  p2 + (p1 + p0 + q0) + 4 = p2 + A + 4\r
+//                     add (16)        acc0.0<1>:w             P2                              4:w                                                             // p2 + 4 \r
+//                     add (16)        B(0)<1>                 acc0.0<16;16,1>:w               A(0)                                    // B = p2 + A + 4\r
+                       add (16)        acc0.0<1>:w             acc0.0<16;16,1>:w               4:w                                                             // p2 + 4 \r
+                       add (16)        B(0)<1>                 acc0.0<16;16,1>:w               P2                                      // B = p2 + A + 4\r
+                       \r
+                       // Now acc0 = B\r
+\r
+                       // p2' = (2*p3 +3*p2 + A + 4) >> 3 = (2*(p3+p2) + B) >> 3\r
+//                     mov     (16)    acc0.0<1>:w             B(0)\r
+                       mac (16)        acc0.0<1>:w             P2_plus_P3(0)           2:w             \r
+                       shr.sat (16) TempRow3B(0)<2>    acc0.0<16;16,1>:w               3:w\r
+                       \r
+                       // p1' = (p2 + A + 2) >> 2 = (B - 2) >> 2\r
+                       add (16)        acc0.0<1>:w             B(0)                    -2:w\r
+                       shr.sat (16) TempRow1B(0)<2>    acc0.0<16;16,1>:w               2:w\r
+       \r
+                       // p0' = (p2 +2*A + q1 + 4) >> 3 = (B + A + q1) >> 3\r
+                       add (16)        acc0.0<1>:w             Q1                              A(0)                                                    // B + A\r
+                       add (16)        acc0.0<1>:w             acc0.0<16;16,1>:w               B(0)                                                    // B + A + q1\r
+                       shr.sat (16) TempRow0B(0)<2>    acc0.0<16;16,1>:w               3:w                                                             // (B + A + q1) >> 3\r
+\r
+                       // p2' = (2*p3 +3*p2 + A + 4) >> 3 = (2*(p3+p2) + B) >> 3\r
+//                     mov     (16)    acc0.0<1>:w             B(0)\r
+//                     mac (16)        acc0.0<1>:w             P2_plus_P3(0)           2:w             \r
+//                     shr.sat (16) TempRow3B(0)<2>    acc0.0<16;16,1>:w               3:w\r
+\r
+                       mov (16)        NewP2           TempRow3B(0)                                            // p2'\r
+                       mov (16)        NewP1           TempRow1B(0)                                            // p1'                  \r
+                       mov (16)        NewP0           TempRow0B(0)                                            // p0'\r
+\r
+Y_ELSE3:\r
+                       else (16)               Y_ENDIF3                // for channels its deltap = false\r
+\r
+                       // p0' = (2*p1 + p0 + q1 + 2) >> 2 =  (p1 + P01 + q1 + 2) >> 2\r
+                       add (16)        acc0.0<1>:w             P1                      P0_plus_P1(0)                   // p1 + P01 (TempRow1(0) = P01)\r
+                       add (16)        acc0.0<1>:w             acc0.0<16;16,1>:w       Q1                              \r
+                       add (16)        acc0.0<1>:w             acc0.0<16;16,1>:w       2:w                     // p1 + P01 + q1 + 2\r
+\r
+                       shr.sat (16) TempRow0B(0)<2>    acc0.0<16;16,1>:w               2:w     // >> 2\r
+                       mov (16)        NewP0           TempRow0B(0)                                            // p0'\r
+\r
+                       endif\r
+Y_ENDIF3:\r
+                       // Compute q0', q1' and q2'\r
+                       //-----------------------------------------------------------------------------\r
+                       // bS = 4 Algorithm (cont):                     \r
+                       //\r
+                       //      deltaq = (aq<beta) && gama;             // deep filter flag\r
+                       //      if (deltaq) {\r
+                       //              q0' = (        q2 +2*q1 +2*q0 +2*p0 + p1 + 4) >> 3; \r
+                       //              q1' = (        q2 +  q1 +  q0 +  p0      + 2) >> 2;\r
+                       //              q2' = (2*q3 +3*q2 +  q1 +  q0 +  p0      + 4) >> 3;\r
+                       //      } else {\r
+                       //              q0' = (            2*q1 +  q0 +  p1      + 2) >> 2;\r
+                       //      }\r
+                       \r
+                       // deltaq = ((abs)aq < beta) && gama\r
+//                     cmp.l.f0.1 (16) null:w  (abs)aq(0)              beta<0;1,0>:w                                                   // (abs)aq < beta ?\r
+\r
+                       // Common Q01 = q0 + q1\r
+//                     add (16)        Q0_plus_Q1(0)<1>        Q0                      Q1\r
+                       \r
+//                     and (1)         f0.1:w          f0.1:uw         CTemp1_W:w              {NoMask}                                // deltaq = ((abs)ap < beta) && gama\r
+\r
+                       (f0.0)  if      (16)            Y_ELSE4                 // for channels its deltaq = true\r
+                       \r
+                       add (16)        Q2_plus_Q3(0)<1>        Q2                      Q3\r
+\r
+                       // A =  (q1 + q0) + p0 = Q01 + p0\r
+                       add (16)        A(0)<1>                 Q0_plus_Q1(0)           p0(0)                                                   // A =  q1+q0 + p0\r
+\r
+                       // Acc0 = A\r
+\r
+                       // B =  q2 + q1 + q0 + p0 + 4 = q2 + A + 4\r
+                       add (16)        acc0.0<1>:w             acc0.0<16;16,1>:w               4:w                                                     // q2 + 4 \r
+                       add (16)        B(0)<1>                 acc0.0<16;16,1>:w               Q2                                                              // B = q2 + A + 4\r
+\r
+                       // Acc0 = B\r
+                       \r
+                       // q2' = (2*q3 +3*q2 + A + 4) >> 3 = (2*(q3+q2) + B) >> 3\r
+//                     mov (16)        acc0.0<1>:w             B(0)    \r
+                       mac (16)        acc0.0<1>:w             Q2_plus_Q3(0)   2:w\r
+                       shr.sat (16) TempRow3B(0)<2>    acc0.0<16;16,1>:w               3:w\r
+\r
+                       // q1' = (q2 + A + 2) >> 2 = (B - 2) >> 2\r
+                       add (16)        acc0.0<1>:w             B(0)                    -2:w\r
+                       shr.sat (16) TempRow1B(0)<2>    acc0.0<16;16,1>:w       2:w\r
+                       \r
+                       // q0' = (q2 +2*A + p1 + 4) >> 3 = (B + A + p1) >> 3\r
+                       add (16)        acc0.0<1>:w             p1(0)                                   A(0)\r
+                       add (16)        acc0.0<1>:w             acc0.0<16;16,1>:w               B(0)\r
+                       shr.sat (16) TempRow0B(0)<2>    acc0.0<16;16,1>:w       3:w\r
+                       \r
+                       mov (16)        NewQ2           TempRow3B(0)                                            // q2'\r
+                       mov (16)        NewQ1           TempRow1B(0)                                            // q1'\r
+                       mov (16)        NewQ0           TempRow0B(0)                                            // q0'\r
+\r
+Y_ELSE4:\r
+                       else (16)               Y_ENDIF4                // for channels its deltaq = false\r
+\r
+                       // q0' = (2*q1 + q0 + p1 + 2) >> 2 =  (q1 + Q01 + p1 + 2) >> 2\r
+                       // Use original p1 values in p1(0)\r
+                       add (16)        acc0.0<1>:w             p1(0)                   Q0_plus_Q1(0)                   // p1 + P01 (TempRow1(0) = P01)\r
+                       add (16)        acc0.0<1>:w             acc0.0<16;16,1>:w       Q1                              \r
+                       add (16)        acc0.0<1>:w             acc0.0<16;16,1>:w       2:w                     // p1 + P01 + q1 + 2\r
+\r
+                       shr.sat (16)    TempRow0B(0)<2>         acc0.0<16;16,1>:w               2:w                                                             // >> 2\r
+                       mov (16)        NewQ0           TempRow0B(0)                                            // q0'\r
+\r
+                       endif\r
+Y_ENDIF4:\r
+\r
+                       \r
+                       // Done with bS = 4 algorithm\r
+                       \r
+Y_ELSE2: \r
+               else    (16)            Y_ENDIF2\r
+                       // For channels whose edge control map2 = 0 ---> bS < 4 algorithm\r
+\r
+                       //-----------------------------------------------------------------------------\r
+                       // bS < 4 Algorithm :\r
+                       // tc = tc0 + (|p2-p0|<Beta ? 1 : 0) + (|q2-q0|<Beta ? 1 : 0)\r
+                       // delta = Clip3(-tc, tc, ((((q0-p0)<<2) + (p1-q1) + 4) >> 3))\r
+                       // p0' = Clip1(p0 + delta) = Clip3(0, 0xFF, p0 + delta)\r
+                       // q0' = Clip1(q0 - delta) = Clip3(0, 0xFF, q0 - delta)\r
+                       // if (|p2-p0|<Beta)\r
+                       //              p1' = p1 + Clip3(-tc0, tc0, (p2 + ((p0+q0+1)>>1) - (p1<<1)) >> 1 )\r
+                       // if (|q2-q0|<Beta)\r
+                       //              q1' = q1 + Clip3(-tc0, tc0, (q2 + ((p0+q0+1)>>1) - (q1<<1)) >> 1 )\r
+                       //-----------------------------------------------------------------------------\r
+                       \r
+                       // Expand tc0\r
+                       mov (16)        tc_exp(0)<1>    tc0<1;4,0>:ub   {NoMask}\r
+                       mov (16)        tc0_exp(0)<1>   tc0<1;4,0>:ub   {NoMask}                                        // tc0_exp = tc0, each tc0 is duplicated 4 times for 4 adjcent 4 pixels \r
+                                               \r
+                       // tc_exp = tc0_exp + (|p2-p0|<Beta ? 1 : 0) + (|q2-q0|<Beta ? 1 : 0)                   \r
+//                     mov (16)        tc_exp(0)<1>            tc0_exp(0)                                                                      // tc = tc0_exp first\r
+                       \r
+\r
+                       cmp.l.f0.0 (16) null:w          (abs)ap(0)                      beta:w                                          // |p2-p0|< Beta ? ---> (abs)ap < Beta ?\r
+                       cmp.l.f0.1 (16) null:w          (abs)aq(0)                      beta:w                                          // |q2-q0|< Beta ? ---> (abs)aq < Beta ?\r
+                       \r
+                       //--- Use free cycles here ---\r
+                       // delta = Clip3(-tc, tc, ((((q0-p0)<<2) + (p1-q1) + 4) >> 3))\r
+                       // 4 * (q0-p0) + p1 - q1 + 4\r
+                       add (16) acc0<1>:w              P1                      4:w                                                     // p1 + 4\r
+                       mac (16) acc0<1>:w              q0_p0(0)        4:w                                                     // 4 * (q0-p0) + p1 + 4\r
+                       add (16) acc0<1>:w              acc0<16;16,1>:w         -Q1                                     // 4 * (q0-p0) + p1 - q1 + 4\r
+                       shr (16) TempRow0(0)<1> acc0<16;16,1>:w         3:w\r
+                                               \r
+                       // Continue on getting tc_exp\r
+                       (f0.0) add (16) tc_exp(0)<1>    tc_exp(0)       1:w                                                     // tc0_exp + (|p2-p0|<Beta ? 1 : 0)\r
+                       mov (2) CTemp1_W<1>:w           f0.0<2;2,1>:w                   {NoMask}                                        // Save |p2-p0|<Beta flag                       \r
+                       (f0.1) add (16) tc_exp(0)<1>    tc_exp(0)       1:w                                                     // tc_exp = tc0_exp + (|p2-p0|<Beta ? 1 : 0) + (|q2-q0|<Beta ? 1 : 0)\r
+                       \r
+\r
+                       // Continue on cliping tc to get delta\r
+                       cmp.g.f0.0      (16) null:w             TempRow0(0)             tc_exp(0)                                       // Clip if delta' > tc\r
+                       cmp.l.f0.1      (16) null:w             TempRow0(0)             -tc_exp(0)                                      // Clip if delta' < -tc\r
+\r
+                       //--- Use free cycles here ---\r
+                       // common = (p0+q0+1) >> 1        --->  TempRow2(0)\r
+                       // Same as avg of p0 and q0\r
+                       avg (16) TempRow2(0)<1>         P0                      Q0\r
+\r
+                       // Continue on cliping tc to get delta\r
+                       (f0.0) mov (16) TempRow0(0)<1>                          tc_exp(0)\r
+                       (f0.1) mov (16) TempRow0(0)<1>                          -tc_exp(0)\r
+\r
+                       //--- Use free cycles here ---\r
+                       mov (2) f0.0<1>:w               CTemp1_W<2;2,1>:w       {NoMask}                        // CTemp1_W = (|p2-p0|<Beta)\r
+                                                                                                                                                       // CTemp2_W = (|q2-q0|<Beta)            \r
+                       //-----------------------------------------------------------------------\r
+\r
+                       // p0' = Clip1(p0 + delta) = Clip3(0, 0xFF, p0 + delta)\r
+                       // q0' = Clip1(q0 - delta) = Clip3(0, 0xFF, q0 - delta)\r
+                       add.sat (16) TempRow1B(0)<2>            P0                      TempRow0(0)                                     // p0+delta\r
+                       add.sat (16) TempRow0B(0)<2>            Q0                      -TempRow0(0)                            // q0-delta\r
+                       mov (16) NewP0          TempRow1B(0)                                    // p0'\r
+                       mov (16) NewQ0          TempRow0B(0)                                    // q0'\r
+                       //-----------------------------------------------------------------------\r
+\r
+                       // Now compute p1' and q1'\r
+\r
+                       // if (|p2-p0|<Beta)\r
+//                     mov (1) f0.0:w          CTemp1_W:w                              {NoMask}                        // CTemp1_W = (|p2-p0|<Beta)\r
+                       (f0.0)  if      (16)            Y_ENDIF6\r
+               \r
+                       // p1' = p1 + Clip3(-tc0, tc0, adj)\r
+                       // adj = (p2 + common - (p1<<1)) >> 1 = (p2 + common - (p1*2)) >> 1\r
+                       add (16) acc0<1>:w      P2              TempRow2(0)                                                     // TempRow2(0) = common = (p0+q0+1) >> 1\r
+                       mac (16) acc0<1>:w      P1              -2:w\r
+                       shr (16) TempRow1(0)<1>         acc0<16;16,1>:w         1:w\r
+\r
+                       // tc clip to get tc_adj\r
+                       cmp.g.f0.0      (16) null:w             TempRow1(0)             tc0_exp(0)                                      // Clip if delta' > tc\r
+                       cmp.l.f0.1      (16) null:w             TempRow1(0)             -tc0_exp(0)                                     // Clip if delta' < -tc\r
+                       \r
+                       (f0.0) mov (16) TempRow1(0)<1>                          tc0_exp(0)\r
+                       (f0.1) mov (16) TempRow1(0)<1>                          -tc0_exp(0)\r
+\r
+                       //--- Use free cycles here ---\r
+                       mov (1) f0.1:w          CTemp2_W:w                              {NoMask}                        // CTemp2_W = (|q2-q0|<Beta)\r
+\r
+                       // p1' = p1 + tc_adj\r
+                       add.sat (16) TempRow1B(0)<2>            P1                      TempRow1(0)                                     // p1+tc_adj\r
+                       mov (16) NewP1                  TempRow1B(0)                            // p1'\r
+\r
+                       //------------------------------------------------------------------------\r
+Y_ENDIF6:\r
+                       endif\r
+                       \r
+                       // if (|q2-q0|<Beta)\r
+//                     mov (1) f0.1:w          CTemp2_W:w                              {NoMask}                        // CTemp2_W = (|q2-q0|<Beta)\r
+                       (f0.1)  if      (16)            Y_ENDIF7\r
+                                       \r
+                       // q1' = q1 + Clip3(-tc0, tc0, adj)\r
+                       // adj = (q2 + common - (q1<<1)) >> 1 \r
+                       // same as q2 + common - (q1 * 2)\r
+                       add (16) acc0<1>:w      Q2              TempRow2(0)\r
+                       mac (16) acc0<1>:w      Q1              -2:w\r
+                       shr (16) TempRow1(0)<1>         acc0<16;16,1>:w         1:w     \r
+\r
+                       // tc clip to get tc_adj\r
+                       cmp.g.f0.0      (16) null:w             TempRow1(0)             tc0_exp(0)                                      // Clip if delta' > tc\r
+                       cmp.l.f0.1      (16) null:w             TempRow1(0)             -tc0_exp(0)                                     // Clip if delta' < -tc\r
+\r
+                       (f0.0) mov (16) TempRow1(0)<1>                          tc0_exp(0)\r
+                       (f0.1) mov (16) TempRow1(0)<1>                          -tc0_exp(0)\r
+\r
+                       // q1' = q1 + tc_adj\r
+                       add.sat (16) TempRow1B(0)<2>            Q1                      TempRow1(0)                                     // q1+tc_adj\r
+                       mov (16) NewQ1                  TempRow1B(0)                            // q1'\r
+\r
+                       //------------------------------------------------------------------------                      \r
+Y_ENDIF7:\r
+                       endif\r
+\r
+               endif\r
+Y_ENDIF2:\r
+Y_ENDIF1:\r
+       endif\r
+\r
+RETURN\r
+\r
+#endif // !defined(__AVC_ILDB_LUMA_CORE__)\r
diff --git a/i965_drv_video/shaders/h264/ildb/AVC_ILDB_Luma_Core_Mbaff.asm b/i965_drv_video/shaders/h264/ildb/AVC_ILDB_Luma_Core_Mbaff.asm
new file mode 100644 (file)
index 0000000..0c3f752
--- /dev/null
@@ -0,0 +1,391 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+#if !defined(__AVC_ILDB_LUMA_CORE_MBAFF__)     // Make sure this file is only included once\r
+#define __AVC_ILDB_LUMA_CORE_MBAFF__\r
+\r
+////////// AVC ILDB Luma Core Mbaff /////////////////////////////////////////////////////////////////////////////////\r
+//\r
+//     This core performs AVC LUMA ILDB filtering on one horizontal edge (16 pixels) of a MB.  \r
+//     If data is transposed, it can also de-block a vertical edge.\r
+//\r
+//     Bafore calling this subroutine, caller needs to set the following parameters.\r
+//\r
+//     - EdgeCntlMap1                          //      Edge control map A\r
+//     - EdgeCntlMap2                          //      Edge control map B\r
+//     - P_AddrReg                                     //      Src and dest address register for P pixels\r
+//     - Q_AddrReg                                     //      Src and dest address register for Q pixels      \r
+//     - alpha                                         //  alpha corresponding to the edge to be filtered\r
+//     - beta                                          //  beta corresponding to the edge to be filtered\r
+//     - tc0                                           //      tc0  corresponding to the edge to be filtered\r
+//\r
+//\r
+//     +----+----+----+----+----+----+----+----+\r
+//     | p3 | p2 | P1 | p0 | q0 | q1 | q2 | q3 |\r
+//     +----+----+----+----+----+----+----+----+\r
+//\r
+//     p3 = r[P_AddrReg, 0]<16;16,1>  \r
+//     p2 = r[P_AddrReg, 16]<16;16,1> \r
+//     p1 = r[P_AddrReg, 32]<16;16,1> \r
+//     p0 = r[P_AddrReg, 48]<16;16,1> \r
+//     q0 = r[Q_AddrReg, 0]<16;16,1>  \r
+//     q1 = r[Q_AddrReg, 16]<16;16,1> \r
+//     q2 = r[Q_AddrReg, 32]<16;16,1> \r
+//     q3 = r[Q_AddrReg, 48]<16;16,1> \r
+//\r
+/////////////////////////////////////////////////////////////////////////////////////////////////////////////////\r
+\r
+// The region is both src and dest\r
+// P0-P3 and Q0-Q3 should be only used if they have not been modified to new values  \r
+#undef         P3\r
+#undef         P2\r
+#undef         P1\r
+#undef         P0\r
+#undef         Q0\r
+#undef         Q1\r
+#undef         Q2\r
+#undef         Q3\r
+\r
+#define P3             r[P_AddrReg,  0]<16;16,1>:ub\r
+#define P2             r[P_AddrReg, 16]<16;16,1>:ub\r
+#define P1             r[P_AddrReg, 32]<16;16,1>:ub\r
+#define P0             r[P_AddrReg, 48]<16;16,1>:ub\r
+#define Q0             r[Q_AddrReg,  0]<16;16,1>:ub\r
+#define Q1             r[Q_AddrReg, 16]<16;16,1>:ub\r
+#define Q2             r[Q_AddrReg, 32]<16;16,1>:ub\r
+#define Q3             r[Q_AddrReg, 48]<16;16,1>:ub\r
+\r
+// New region as dest\r
+#undef         NewP2\r
+#undef         NewP1\r
+#undef         NewP0\r
+#undef         NewQ0\r
+#undef         NewQ1\r
+#undef         NewQ2\r
+\r
+#define NewP2  r[P_AddrReg, 16]<1>:ub\r
+#define NewP1  r[P_AddrReg, 32]<1>:ub\r
+#define NewP0  r[P_AddrReg, 48]<1>:ub\r
+#define NewQ0  r[Q_AddrReg,  0]<1>:ub\r
+#define NewQ1  r[Q_AddrReg, 16]<1>:ub\r
+#define NewQ2  r[Q_AddrReg, 32]<1>:ub\r
+\r
+\r
+\r
+// Filter one luma edge - mbaff\r
+FILTER_Y_MBAFF:\r
+\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0x1111:w\r
+#endif\r
+       //---------- Derive filterSampleflag in AVC spec, equition (8-469) ----------\r
+       // bS is in MaskA\r
+\r
+       // Src copy of the p3, p2, p1, p0, q0, q1, q2, q3\r
+//     mov (16) p0123_W(0)<1>          r[P_AddrReg]<16;16,1>:uw\r
+//     mov (16) p0123_W(1)<1>          r[P_AddrReg, 32]<16;16,1>:uw\r
+//     mov (16) q0123_W(0)<1>          r[Q_AddrReg]<16;16,1>:uw\r
+//     mov (16) q0123_W(1)<1>          r[Q_AddrReg, 32]<16;16,1>:uw\r
+\r
+       // Move MaskA and MaskB to flag regs\r
+       mov (2) f0.0<1>:uw              MaskA<2;2,1>:uw\r
+\r
+       add (16) q0_p0(0)<1>            Q0                      -P0                             // q0-p0\r
+       add (16) TempRow0(0)<1>         P1                      -P0                             // p1-p0\r
+       add (16) TempRow1(0)<1>         Q1                      -Q0                             // q1-q0\r
+\r
+       // abs(q0-p0) < alpha\r
+       (f0.0) cmp.l.f0.0 (16) null:w           (abs)q0_p0(0)           Mbaff_ALPHA(0)\r
+       // abs(p1-p0) < Beta\r
+       (f0.0) cmp.l.f0.0 (16) null:w           (abs)TempRow0(0)        Mbaff_BETA(0)\r
+       // abs(q1-q0) < Beta\r
+       (f0.0) cmp.l.f0.0 (16) null:w           (abs)TempRow1(0)        Mbaff_BETA(0)\r
+\r
+       //-----------------------------------------------------------------------------------------\r
+\r
+    (f0.0)     if      (16)            MBAFF_Y_ENDIF1\r
+               // For channels whose edge control map1 = 1 ---> perform de-blocking\r
+\r
+//             mov (1)         f0.1:uw         MaskB:uw        {NoMask}                // Now check for which algorithm to apply\r
+\r
+               // (abs)ap = |p2-p0|\r
+               add (16) ap(0)<1>               P2              -P0\r
+\r
+               // (abs)aq = |q2-q0|\r
+               add (16) aq(0)<1>               Q2              -Q0\r
+\r
+               // Make a copy of unmodified p0 and p1 for use in q0'and q1' calculation\r
+               mov (16) p0123_W(1)<1>          r[P_AddrReg, 32]<16;16,1>:uw            {NoMask}\r
+                       \r
+               (f0.1)  if      (16)            MBAFF_Y_ELSE2\r
+\r
+                       // For channels whose edge control map2 = 1 ---> bS = 4 algorithm\r
+\r
+                       // Compute q0', q1' and q2'\r
+                       //-----------------------------------------------------------------------------\r
+                       // bS = 4 Algorithm :                   \r
+                       //\r
+                       // gama = |p0-q0| < ((alpha >> 2) + 2) \r
+                       // deltap = (ap<beta) && gama;                  // deep filter flag\r
+                       //      if (deltap) {\r
+                       //              p0' = (        p2 +2*p1 +2*p0 +2*q0 + q1 + 4) >> 3; \r
+                       //              p1' = (        p2 +  p1 +  p0 +  q0      + 2) >> 2;\r
+                       //              p2' = (2*p3 +3*p2 +  p1 +  p0 +  q0      + 4) >> 3;\r
+                       //      } else {  \r
+                       //              p0' = (            2*p1 +  p0 +  q1      + 2) >> 2;\r
+                       //      }\r
+                       //-----------------------------------------------------------------------------\r
+\r
+                       // gama = |p0-q0| < ((alpha >> 2) + 2) = |p0-q0| < alpha2  \r
+                       cmp.l.f0.1 (16) null:w  (abs)q0_p0(0)   Mbaff_ALPHA2(0)\r
+\r
+                       // Common P01 = p0 + p1\r
+                       add (16)        P0_plus_P1(0)<1>        P0                      P1      \r
+\r
+                       // Common Q01 = q0 + q1\r
+                       add (16)        Q0_plus_Q1(0)<1>        Q0                      Q1\r
+\r
+                       mov (1) f0.0:uw                 f0.1:uw                                         {NoMask}\r
+\r
+                       // deltap = ((abs)ap < beta) && gama\r
+                       (f0.1) cmp.l.f0.1 (16) null:w   (abs)ap(0)              Mbaff_BETA(0)                                                   // (abs)ap < beta ?\r
+\r
+                       // deltaq = ((abs)aq < beta) && gama\r
+                       (f0.0) cmp.l.f0.0 (16) null:w   (abs)aq(0)              Mbaff_BETA(0)                                                   // (abs)aq < beta ?\r
+\r
+\r
+                       (f0.1)  if      (16)            MBAFF_Y_ELSE3                   // for channels its deltap = true\r
+       \r
+                       add (16)        P2_plus_P3(0)<1>        P2              P3\r
+                       \r
+                       // A =  p1 + p0 + q0 = P01 + q0\r
+                       add (16)        A(0)<1>                 P0_plus_P1(0)           Q0                                                      // A =  P01 + q0\r
+\r
+                       // Now acc0 = A\r
+\r
+                       // B =  p2 + p1 + p0 + q0 + 4 = p2 + A + 4\r
+                       add (16)        acc0.0<1>:w             acc0.0<16;16,1>:w               4:w                                                             // p2 + 4 \r
+                       add (16)        B(0)<1>                 acc0.0<16;16,1>:w               P2                                      // B = p2 + A + 4\r
+\r
+                       // Now acc0 = B\r
+\r
+                       // p2' = (2*p3 +3*p2 + A + 4) >> 3 = (2*(p3+p2) + B) >> 3\r
+                       mac (16)        acc0.0<1>:w             P2_plus_P3(0)           2:w             \r
+                       shr.sat (16) TempRow3B(0)<2>    acc0.0<16;16,1>:w               3:w\r
+\r
+                       // p1' = (p2 + A + 2) >> 2 = (B - 2) >> 2\r
+                       add (16)        acc0.0<1>:w             B(0)                    -2:w\r
+                       shr.sat (16) TempRow1B(0)<2>    acc0.0<16;16,1>:w               2:w\r
+       \r
+                       // p0' = (p2 +2*A + q1 + 4) >> 3 = (B + A + q1) >> 3\r
+                       add (16)        acc0.0<1>:w             Q1                              A(0)                                                    // B + A\r
+                       add (16)        acc0.0<1>:w             acc0.0<16;16,1>:w               B(0)                                                    // B + A + q1\r
+                       shr.sat (16) TempRow0B(0)<2>    acc0.0<16;16,1>:w               3:w                                                             // (B + A + q1) >> 3\r
+\r
+                       mov (16)        NewP2           TempRow3B(0)                                            // p2'\r
+                       mov (16)        NewP1           TempRow1B(0)                                            // p1'                  \r
+                       mov (16)        NewP0           TempRow0B(0)                                            // p0'\r
+\r
+MBAFF_Y_ELSE3:\r
+                       else (16)               MBAFF_Y_ENDIF3          // for channels its deltap = false\r
+\r
+                       // p0' = (2*p1 + p0 + q1 + 2) >> 2 =  (p1 + P01 + q1 + 2) >> 2\r
+                       add (16)        acc0.0<1>:w             P1                      P0_plus_P1(0)                   // p1 + P01 (TempRow1(0) = P01)\r
+                       add (16)        acc0.0<1>:w             acc0.0<16;16,1>:w       Q1                              \r
+                       add (16)        acc0.0<1>:w             acc0.0<16;16,1>:w       2:w                     // p1 + P01 + q1 + 2\r
+\r
+                       shr.sat (16) TempRow0B(0)<2>    acc0.0<16;16,1>:w               2:w     // >> 2\r
+                       mov (16)        NewP0           TempRow0B(0)                                            // p0'\r
+\r
+                       endif\r
+\r
+MBAFF_Y_ENDIF3:\r
+                       // Compute q0', q1' and q2'\r
+                       //-----------------------------------------------------------------------------\r
+                       // bS = 4 Algorithm (cont):                     \r
+                       //\r
+                       //      deltaq = (aq<beta) && gama;             // deep filter flag\r
+                       //      if (deltaq) {\r
+                       //              q0' = (        q2 +2*q1 +2*q0 +2*p0 + p1 + 4) >> 3; \r
+                       //              q1' = (        q2 +  q1 +  q0 +  p0      + 2) >> 2;\r
+                       //              q2' = (2*q3 +3*q2 +  q1 +  q0 +  p0      + 4) >> 3;\r
+                       //      } else {\r
+                       //              q0' = (            2*q1 +  q0 +  p1      + 2) >> 2;\r
+                       //      }\r
+                       \r
+                       (f0.0)  if      (16)            MBAFF_Y_ELSE4                   // for channels its deltaq = true\r
+                       \r
+                       add (16)        Q2_plus_Q3(0)<1>        Q2                      Q3\r
+\r
+                       // A =  q1 + q0 + p0 = Q01 + p0\r
+                       add (16)        A(0)<1>                 Q0_plus_Q1(0)           p0(0)                                                   // A =  q1+q0 + p0\r
+\r
+                       // B =  q2 + q1 + q0 + p0 + 4 = q2 + A + 4\r
+                       add (16)        acc0.0<1>:w             acc0.0<16;16,1>:w               4:w                                                     // q2 + 4 \r
+                       add (16)        B(0)<1>                 acc0.0<16;16,1>:w               Q2                                                              // B = q2 + A + 4\r
+                       \r
+                       // Acc0 = B\r
+\r
+                       // q2' = (2*q3 +3*q2 + A + 4) >> 3 = (2*(q3+q2) + B) >> 3\r
+                       mac (16)        acc0.0<1>:w             Q2_plus_Q3(0)   2:w\r
+                       shr.sat (16) TempRow3B(0)<2>    acc0.0<16;16,1>:w               3:w\r
+\r
+                       // q1' = (q2 + A + 2) >> 2 = (B - 2) >> 2\r
+                       add (16)        acc0.0<1>:w             B(0)                    -2:w\r
+                       shr.sat (16) TempRow1B(0)<2>    acc0.0<16;16,1>:w       2:w\r
+                       \r
+                       // q0' = (q2 +2*A + p1 + 4) >> 3 = (B + A + p1) >> 3\r
+                       add (16)        acc0.0<1>:w             p1(0)                                   A(0)\r
+                       add (16)        acc0.0<1>:w             acc0.0<16;16,1>:w               B(0)\r
+                       shr.sat (16) TempRow0B(0)<2>    acc0.0<16;16,1>:w       3:w\r
+                       \r
+                       mov (16)        NewQ2           TempRow3B(0)                                            // q2'\r
+                       mov (16)        NewQ1           TempRow1B(0)                                            // q1'\r
+                       mov (16)        NewQ0           TempRow0B(0)                                            // q0'\r
+\r
+MBAFF_Y_ELSE4:\r
+                       else (16)               MBAFF_Y_ENDIF4          // for channels its deltaq = false\r
+\r
+                       // q0' = (2*q1 + q0 + p1 + 2) >> 2 =  (q1 + Q01 + p1 + 2) >> 2\r
+                       // Use original p1 values in p1(0)\r
+                       add (16)        acc0.0<1>:w             p1(0)                   Q0_plus_Q1(0)                   // p1 + P01 (TempRow1(0) = P01)\r
+                       add (16)        acc0.0<1>:w             acc0.0<16;16,1>:w       Q1                              \r
+                       add (16)        acc0.0<1>:w             acc0.0<16;16,1>:w       2:w                     // p1 + P01 + q1 + 2\r
+\r
+                       shr.sat (16)    TempRow0B(0)<2>         acc0.0<16;16,1>:w               2:w                                                             // >> 2\r
+                       mov (16)        NewQ0           TempRow0B(0)                                            // q0'\r
+\r
+                       endif\r
+MBAFF_Y_ENDIF4:\r
+\r
+                       \r
+                       // Done with bS = 4 algorithm\r
+                       \r
+MBAFF_Y_ELSE2: \r
+               else    (16)            MBAFF_Y_ENDIF2\r
+                       // For channels whose edge control map2 = 0 ---> bS < 4 algorithm\r
+\r
+                       //-----------------------------------------------------------------------------\r
+                       // bS < 4 Algorithm :\r
+                       // tc = tc0 + (|p2-p0|<Beta ? 1 : 0) + (|q2-q0|<Beta ? 1 : 0)\r
+                       // delta = Clip3(-tc, tc, ((((q0-p0)<<2) + (p1-q1) + 4) >> 3))\r
+                       // p0' = Clip1(p0 + delta) = Clip3(0, 0xFF, p0 + delta)\r
+                       // q0' = Clip1(q0 - delta) = Clip3(0, 0xFF, q0 - delta)\r
+                       // if (|p2-p0|<Beta)\r
+                       //              p1' = p1 + Clip3(-tc0, tc0, (p2 + ((p0+q0+1)>>1) - (p1<<1)) >> 1 )\r
+                       // if (|q2-q0|<Beta)\r
+                       //              q1' = q1 + Clip3(-tc0, tc0, (q2 + ((p0+q0+1)>>1) - (q1<<1)) >> 1 )\r
+                       //-----------------------------------------------------------------------------\r
+                       \r
+                       mov (16)        tc_exp(0)<1>            Mbaff_TC0(0)                                                            // tc = tc0_exp first\r
+                       \r
+                       cmp.l.f0.0 (16) null:w          (abs)ap(0)                      Mbaff_BETA(0)                           // |p2-p0|<Beta ?\r
+                       cmp.l.f0.1 (16) null:w          (abs)aq(0)                      Mbaff_BETA(0)                           // |q2-q0|<Beta ?\r
+                               \r
+                       //--- Use free cycles here ---\r
+                       // delta = Clip3(-tc, tc, ((((q0-p0)<<2) + (p1-q1) + 4) >> 3))\r
+                       // 4 * (q0-p0) + p1 - q1 + 4\r
+                       add (16) acc0<1>:w              P1                      4:w                                                     // p1 + 4\r
+                       mac (16) acc0<1>:w              q0_p0(0)        4:w                                                     // 4 * (q0-p0) + p1 + 4\r
+                       add (16) acc0<1>:w              acc0<16;16,1>:w         -Q1                                     // 4 * (q0-p0) + p1 - q1 + 4\r
+                       shr (16) TempRow0(0)<1> acc0<16;16,1>:w         3:w\r
+                                               \r
+                       // Continue on getting tc_exp\r
+                       (f0.0) add (16) tc_exp(0)<1>    tc_exp(0)       1:w                                                     // tc0_exp + (|p2-p0|<Beta ? 1 : 0)\r
+                       mov (2) CTemp1_W<1>:w           f0.0<2;2,1>:w                   {NoMask}                                        // Save |p2-p0|<Beta flag                       \r
+                       (f0.1) add (16) tc_exp(0)<1>    tc_exp(0)       1:w                                                     // tc_exp = tc0_exp + (|p2-p0|<Beta ? 1 : 0) + (|q2-q0|<Beta ? 1 : 0)\r
+                               \r
+                       // Continue on cliping tc to get delta\r
+                       cmp.g.f0.0      (16) null:w             TempRow0(0)             tc_exp(0)                                       // Clip if delta' > tc\r
+                       cmp.l.f0.1      (16) null:w             TempRow0(0)             -tc_exp(0)                                      // Clip if delta' < -tc\r
+\r
+                       //--- Use free cycles here ---\r
+                       // common = (p0+q0+1) >> 1        --->  TempRow2(0)\r
+                       // Same as avg of p0 and q0\r
+                       avg (16) TempRow2(0)<1>         P0                      Q0\r
+\r
+                       // Continue on cliping tc to get delta\r
+                       (f0.0) mov (16) TempRow0(0)<1>                          tc_exp(0)\r
+                       (f0.1) mov (16) TempRow0(0)<1>                          -tc_exp(0)\r
+\r
+                       //--- Use free cycles here ---\r
+                       mov (2) f0.0<1>:w               CTemp1_W<2;2,1>:w       {NoMask}                        // CTemp1_W = (|p2-p0|<Beta)\r
+                                                                                                                                                       // CTemp2_W = (|q2-q0|<Beta)            \r
+\r
+                       // p0' = Clip1(p0 + delta) = Clip3(0, 0xFF, p0 + delta)\r
+                       // q0' = Clip1(q0 - delta) = Clip3(0, 0xFF, q0 - delta)\r
+                       add.sat (16) TempRow1B(0)<2>            P0                      TempRow0(0)                                     // p0+delta\r
+                       add.sat (16) TempRow0B(0)<2>            Q0                      -TempRow0(0)                            // q0-delta\r
+                       \r
+                       mov (16) NewP0          TempRow1B(0)                                    // p0'\r
+                       mov (16) NewQ0          TempRow0B(0)                                    // q0'\r
+\r
+                       //-----------------------------------------------------------------------\r
+\r
+                       // Now compute p1' and q1'\r
+\r
+                       // if (|p2-p0|<Beta)\r
+                       (f0.0)  if      (16)            MBAFF_Y_ENDIF6\r
+\r
+                       // p1' = p1 + Clip3(-tc0, tc0, adj)\r
+                       // adj = (p2 + common - (p1<<1)) >> 1 = (p2 + common - (p1*2)) >> 1\r
+                       add (16) acc0<1>:w      P2              TempRow2(0)                                                     // TempRow2(0) = common = (p0+q0+1) >> 1\r
+                       mac (16) acc0<1>:w      P1              -2:w\r
+                       shr (16) TempRow1(0)<1>         acc0<16;16,1>:w         1:w\r
+\r
+                       // tc clip to get tc_adj\r
+                       cmp.g.f0.0      (16) null:w             TempRow1(0)             Mbaff_TC0(0)                                    // Clip if delta' > tc\r
+                       cmp.l.f0.1      (16) null:w             TempRow1(0)             -Mbaff_TC0(0)                                   // Clip if delta' < -tc\r
+                       \r
+                       (f0.0) mov (16) TempRow1(0)<1>                          Mbaff_TC0(0)\r
+                       (f0.1) mov (16) TempRow1(0)<1>                          -Mbaff_TC0(0)\r
+\r
+                       //--- Use free cycles here ---\r
+                       mov (1) f0.1:w          CTemp2_W:w                              {NoMask}                        // CTemp2_W = (|q2-q0|<Beta)\r
+\r
+                       // p1' = p1 + tc_adj\r
+                       add.sat (16) TempRow1B(0)<2>            P1                      TempRow1(0)                                     // p1+tc_adj\r
+                       mov (16) NewP1                  TempRow1B(0)                            // p1'\r
+                       //------------------------------------------------------------------------\r
+\r
+MBAFF_Y_ENDIF6:\r
+                       endif\r
+                       \r
+                       // if (|q2-q0|<Beta)\r
+                       (f0.1)  if      (16)            MBAFF_Y_ENDIF7\r
+\r
+                       // q1' = q1 + Clip3(-tc0, tc0, adj)\r
+                       // adj = (q2 + common - (q1<<1)) >> 1 \r
+                       // same as q2 + common - (q1 * 2)\r
+                       add (16) acc0<1>:w      Q2              TempRow2(0)\r
+                       mac (16) acc0<1>:w      Q1              -2:w\r
+                       shr (16) TempRow1(0)<1>         acc0<16;16,1>:w         1:w     \r
+                                       \r
+                       // tc clip to get tc_adj\r
+                       cmp.g.f0.0      (16) null:w             TempRow1(0)             Mbaff_TC0(0)                                    // Clip if delta' > tc\r
+                       cmp.l.f0.1      (16) null:w             TempRow1(0)             -Mbaff_TC0(0)                                   // Clip if delta' < -tc\r
+\r
+                       (f0.0) mov (16) TempRow1(0)<1>                          Mbaff_TC0(0)\r
+                       (f0.1) mov (16) TempRow1(0)<1>                          -Mbaff_TC0(0)\r
+                       \r
+                       // q1' = q1 + tc_adj\r
+                       add.sat (16) TempRow1B(0)<2>            Q1                      TempRow1(0)                                     // q1+tc_adj\r
+                       mov (16) NewQ1                  TempRow1B(0)                            // q1'\r
+                       \r
+                       //------------------------------------------------------------------------                      \r
+MBAFF_Y_ENDIF7:\r
+                       endif\r
+\r
+               endif\r
+MBAFF_Y_ENDIF2:\r
+MBAFF_Y_ENDIF1:\r
+       endif\r
+\r
+RETURN\r
+\r
+#endif // !defined(__AVC_ILDB_LUMA_CORE_MBAFF__)\r
diff --git a/i965_drv_video/shaders/h264/ildb/AVC_ILDB_OpenGateway.asm b/i965_drv_video/shaders/h264/ildb/AVC_ILDB_OpenGateway.asm
new file mode 100644 (file)
index 0000000..06d1933
--- /dev/null
@@ -0,0 +1,43 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+//----- Open a Message Gateway -----\r
+// The parent thread is the recipient thread\r
+\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignature:w                        0x1111:w\r
+#endif\r
+\r
+mov (8)        GatewayPayload<1>:ud            r0.0<8;8,1>:ud                  // Init payload to r0\r
+\r
+// r50- (16 GRFs) are the GRFs child thread can wtite to. \r
+\r
+// Reg base is at bit 28:21, Gateway size is at [bit 10:8]\r
+// r6: 6 = 00000110\r
+//000 00000110 0000000000 100 00000000 ==> 0000 0000 1100 0000 0000 0100 0000 0000\r
+mov (1)        RegBase_GatewaySize:ud  0x00C00400:ud   // Reg base + Gateway size (16 GRFs)\r
+\r
+\r
+//000 00110010 0000000000 100 00000000 ==> 0000 0110 0100 0000 0000 0100 0000 0000\r
+//mov (1)      RegBase_GatewaySize:ud  0x06400400:ud   // Reg base (r50 = 0x640 byte offset) + Gateway size (16 GRFs)\r
+\r
+//mov (1)      DispatchID:ub                   r0.20:ub                // Dispatch ID\r
+mov (1)        GatewayPayloadKey:uw    0x1212:uw               // Key=0x1212\r
+\r
+// Message descriptor\r
+// bit 31      EOD\r
+// 27:24       FFID = 0x0011 for msg gateway\r
+// 23:20       msg length = 1 MRF\r
+// 19:16       Response length = 0\r
+// 14          AckReg = 1\r
+// 1:0         SubFuncID = 00 for OpenGateway\r
+// Message descriptor: 0 000 0011 0001 0000 + 0 1 000000000000 00 ==> 0000 0011 0001 0000 0100 0000 0000 0000\r
+// Send message to gateway: the ack message is put into response GRF r49 ==> Good for debugging\r
+send (8)       GatewayResponse:ud      m7        GatewayPayload<8;8,1>:ud    MSG_GW    OGWMSGDSC\r
+\r
+//----- End of Open a Message Gateway -----\r
diff --git a/i965_drv_video/shaders/h264/ildb/AVC_ILDB_Root_Field_UV.asm b/i965_drv_video/shaders/h264/ildb/AVC_ILDB_Root_Field_UV.asm
new file mode 100644 (file)
index 0000000..759e80a
--- /dev/null
@@ -0,0 +1,9 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+#include "AVC_ILDB_Root_UV.asm"\r
diff --git a/i965_drv_video/shaders/h264/ildb/AVC_ILDB_Root_Field_Y.asm b/i965_drv_video/shaders/h264/ildb/AVC_ILDB_Root_Field_Y.asm
new file mode 100644 (file)
index 0000000..f88eede
--- /dev/null
@@ -0,0 +1,9 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+#include "AVC_ILDB_Root_Y.asm"\r
diff --git a/i965_drv_video/shaders/h264/ildb/AVC_ILDB_Root_Mbaff_UV.asm b/i965_drv_video/shaders/h264/ildb/AVC_ILDB_Root_Mbaff_UV.asm
new file mode 100644 (file)
index 0000000..01109be
--- /dev/null
@@ -0,0 +1,140 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+/////////////////////////////////////////////////////////////////////////////////////\r
+// Kernel name: AVC_ILDB_Root_Mbaff.asm\r
+//\r
+//  Root kernel serves as a scheduler for child threads.\r
+//\r
+//\r
+//     ***** Note *****\r
+//     Initial design bundle MB pair for each thread, and share AVC_ILDB_MB_Dep_Check.asm\r
+//     with non mbaff kernels.\r
+//\r
+//     Optimization will be done later, putting top and bottom MBs on separate threads.\r
+//\r
+//\r
+/////////////////////////////////////////////////////////////////////////////////////\r
+//\r
+//  $Revision: 1 $\r
+//  $Date: 10/19/06 5:06p $\r
+//\r
+\r
+// ----------------------------------------------------\r
+//  AVC_ILDB_ROOT_MBAFF_UV\r
+// ----------------------------------------------------\r
+#define AVC_ILDB\r
+\r
+.kernel AVC_ILDB_ROOT_MBAFF_UV\r
+#if defined(COMBINED_KERNEL)\r
+ILDB_LABEL(AVC_ILDB_ROOT_UV):\r
+#endif\r
+\r
+#include "setupVPKernel.asm"\r
+#include "AVC_ILDB.inc"\r
+\r
+\r
+#if defined(_DEBUG) \r
+\r
+/////////////////////////////////////////////////////////////////////////////////////\r
+// Init URB space for running on RTL.  It satisfies reading an unwritten URB entries.  \r
+// Will remove it for production release.\r
+\r
+\r
+//mov (8) m1:ud                0x11111111:ud\r
+//mov (8) m2:ud                0x22222222:ud \r
+//mov (8) m3:ud                0x33333333:ud\r
+//mov (8) m4:ud                0x44444444:ud \r
+\r
+//mov (1)      Temp1_W:w       0:w\r
+\r
+//ILDB_INIT_URB:\r
+//mul (1)      URBOffset:w                             Temp1_W:w               4:w\r
+//shl (1) URBWriteMsgDescLow:uw        URBOffset:w             4:w             // Msg descriptor: URB write dest offset (9:4)\r
+//mov (1) URBWriteMsgDescHigh:uw       0x0650:uw                               // Msg descriptor: URB write 5 MRFs (m0 - m4)\r
+//#include "writeURB.asm"\r
+\r
+//add          (1)             Temp1_W:w       Temp1_W:w       1:w                             // Increase block count\r
+//cmp.l.f0.0 (1)       null            Temp1_W:w       MBsCntY:w               // Check the block count limit\r
+//(f0.0) jmpi          ILDB_INIT_URB                                                   // Loop back\r
+\r
+\r
+/////////////////////////////////////////////////////////////////////////////////////\r
+\r
+\r
+mov            (1)             EntrySignature:w                        0xEFF0:w\r
+\r
+#endif\r
+//----------------------------------------------------------------------------------------------------------------\r
+\r
+// Set global variable\r
+mov (32)       ChildParam:uw                   0:uw                                                            // Reset local variables\r
+//mul  (1)             TotalBlocks:w                   MBsCntX:w               MBsCntY:w                       // Total # of MB pairs\r
+//add  (1)             GatewayApertureE:w              MBsCntY:w               GatewayApertureB:w      // Aperture End = aperture Head + BlockCntY\r
+\r
+\r
+// 2 URB entries for Y:\r
+// Entry 0 - Child thread R0Hdr\r
+// Entry 1 - input parameter to child kernel (child r1)\r
+\r
+#define        URB_ENTRIES_PER_MB      2\r
+\r
+// URB_ENTRIES_PER_MB in differnt form, the final desired format is (URB_ENTRIES_PER_MB-1) << 10\r
+mov (1) URB_EntriesPerMB_2:w           URB_ENTRIES_PER_MB-1:w\r
+shl (1) URB_EntriesPerMB_2:w           URB_EntriesPerMB_2:w    10:w\r
+\r
+#define        CHROMA_ROOT                                                                                                             // Compiling flag for chroma only\r
+//mul (1)              URBOffsetUVBase:w               MBsCntY:w               URB_ENTRIES_PER_MB:w    // Right after Y entries\r
+\r
+// URB base for UV kernels\r
+#if defined(DEV_CL)    \r
+       mov (1)         URBOffsetUVBase:w               240:w\r
+#else\r
+       mov (1)         URBOffsetUVBase:w               320:w\r
+#endif\r
+\r
+\r
+mov    (1)             ChildThreadsID:uw               3:uw\r
+\r
+shr (1)                ThreadLimit:w           MaxThreads:w            1:w             // Initial luma thread limit to 50%\r
+mul    (1)             TotalBlocks:w           MBsCntX:w               MBsCntY:w       // MBs to be processed count down from TotalBlocks\r
+\r
+//***** Init CT_R0Hdr fields that are common to all threads *************************\r
+mov (8)        CT_R0Hdr.0:ud                   r0.0<8;8,1>:ud                          // Init to root R0 header\r
+mov (1)        CT_R0Hdr.7:ud                   r0.6:ud                                         // Copy Parent Thread Cnt; JJ did the change on 06/20/2006\r
+mov (1) CT_R0Hdr.31:ub                 0:w                                                     // Reset the highest byte\r
+mov (1) CT_R0Hdr.3:ud                  0x00000000       \r
+mov (1) CT_R0Hdr.6:uw                  sr0.0:uw                                        // sr0.0: state reg contains general thread states, e.g. EUID/TID.\r
+\r
+//***** Init ChildParam fields that are common to all threads ***********************\r
+mov (8)        ChildParam<1>:ud        RootParam<8;8,1>:ud             // Copy all root parameters\r
+mov (4)                CurCol<1>:w                     0:w                                             // Reset CurCol, CurRow\r
+add    (2)             LastCol<1>:w            MBsCntX<2;2,1>:w                -1:w    // Get LastCol and LastRow\r
+\r
+mov (1) URBWriteMsgDesc:ud             MSG_LEN(2)+URBWMSGDSC:ud\r
+\r
+//===================================================================================\r
+\r
+#include "AVC_ILDB_OpenGateway.asm"            // Open root thread gateway for receiving notification \r
+\r
+#include "AVC_ILDB_Dep_Check.asm"      // Check dependency and spawn all MBs\r
+\r
+//#include "AVC_ILDB_UpdateThrdLimit.asm"      // Update thread limit in luma root thread via gateway\r
+\r
+#include "AVC_ILDB_CloseGateway.asm"   // Close root thread gateway \r
+\r
+// Chroma root EOT = child send EOT : Request type = 1\r
+       END_CHILD_THREAD\r
+\r
+#undef         CHROMA_ROOT\r
+\r
+#if !defined(COMBINED_KERNEL)          // For standalone kernel only\r
+.end_code\r
+\r
+.end_kernel\r
+#endif\r
diff --git a/i965_drv_video/shaders/h264/ildb/AVC_ILDB_Root_Mbaff_Y.asm b/i965_drv_video/shaders/h264/ildb/AVC_ILDB_Root_Mbaff_Y.asm
new file mode 100644 (file)
index 0000000..ae80a36
--- /dev/null
@@ -0,0 +1,140 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+/////////////////////////////////////////////////////////////////////////////////////\r
+// Kernel name: AVC_ILDB_Root_Mbaff.asm\r
+//\r
+//  Root kernel serves as a scheduler for child threads.\r
+//\r
+//\r
+//     ***** Note *****\r
+//     Initial design bundle MB pair for each thread, and share AVC_ILDB_MB_Dep_Check.asm\r
+//     with non mbaff kernels.\r
+//\r
+//     Optimization will be done later, putting top and bottom MBs on separate threads.\r
+//\r
+//\r
+/////////////////////////////////////////////////////////////////////////////////////\r
+//\r
+//  $Revision: 1 $\r
+//  $Date: 10/19/06 5:06p $\r
+//\r
+\r
+// ----------------------------------------------------\r
+//  AVC_ILDB_ROOT_MBAFF_Y\r
+// ----------------------------------------------------\r
+#define AVC_ILDB\r
+\r
+.kernel AVC_ILDB_ROOT_MBAFF_Y\r
+#if defined(COMBINED_KERNEL)\r
+ILDB_LABEL(AVC_ILDB_ROOT_Y):\r
+#endif\r
+\r
+#include "setupVPKernel.asm"\r
+#include "AVC_ILDB.inc"\r
+\r
+\r
+#if defined(_DEBUG) \r
+\r
+/////////////////////////////////////////////////////////////////////////////////////\r
+// Init URB space for running on RTL.  It satisfies reading an unwritten URB entries.  \r
+// Will remove it for production release.\r
+\r
+\r
+//mov (8) m1:ud                0x11111111:ud\r
+//mov (8) m2:ud                0x22222222:ud \r
+//mov (8) m3:ud                0x33333333:ud\r
+//mov (8) m4:ud                0x44444444:ud \r
+\r
+//mov (1)      Temp1_W:w       0:w\r
+\r
+//ILDB_INIT_URB:\r
+//mul (1)      URBOffset:w                             Temp1_W:w               4:w\r
+//shl (1) URBWriteMsgDescLow:uw        URBOffset:w             4:w             // Msg descriptor: URB write dest offset (9:4)\r
+//mov (1) URBWriteMsgDescHigh:uw       0x0650:uw                               // Msg descriptor: URB write 5 MRFs (m0 - m4)\r
+//#include "writeURB.asm"\r
+\r
+//add          (1)             Temp1_W:w       Temp1_W:w       1:w                             // Increase block count\r
+//cmp.l.f0.0 (1)       null            Temp1_W:w       MBsCntY:w               // Check the block count limit\r
+//(f0.0) jmpi          ILDB_INIT_URB                                                   // Loop back\r
+\r
+/////////////////////////////////////////////////////////////////////////////////////\r
+\r
+\r
+mov            (1)             EntrySignature:w                        0xEFF0:w\r
+\r
+#endif\r
+//----------------------------------------------------------------------------------------------------------------\r
+\r
+// Set global variable\r
+mov (32)       ChildParam:uw                   0:uw                                                            // Reset local variables\r
+//mul  (1)             TotalBlocks:w                   MBsCntX:w               MBsCntY:w                       // Total # of MB pairs\r
+//add  (1)             GatewayApertureE:w              MBsCntY:w               GatewayApertureB:w      // Aperture End = aperture Head + BlockCntY\r
+\r
+\r
+// 2 URB entries for Y:\r
+// Entry 0 - Child thread R0Hdr\r
+// Entry 1 - input parameter to child kernel (child r1)\r
+\r
+#undef                 URB_ENTRIES_PER_MB\r
+#define        URB_ENTRIES_PER_MB              2\r
+\r
+// URB_ENTRIES_PER_MB in differnt form, the final desired format is (URB_ENTRIES_PER_MB-1) << 10\r
+mov (1)        URB_EntriesPerMB_2:w            URB_ENTRIES_PER_MB-1:w\r
+shl (1)        URB_EntriesPerMB_2:w            URB_EntriesPerMB_2:w    10:w\r
+\r
+mov    (1)             ChildThreadsID:uw               1:uw                                    // ChildThreadsID for chroma root\r
+\r
+shr (1)                ThreadLimit:w           MaxThreads:w            1:w             // Initial luma thread limit to 50%\r
+mul    (1)             TotalBlocks:w           MBsCntX:w               MBsCntY:w       // MBs to be processed count down from TotalBlocks\r
+\r
+//***** Init CT_R0Hdr fields that are common to all threads *************************\r
+mov (8)                CT_R0Hdr.0:ud                   r0.0<8;8,1>:ud                          // Init to root R0 header\r
+mov (1)                CT_R0Hdr.7:ud                   r0.6:ud                                         // Copy Parent Thread Cnt; JJ did the change on 06/20/2006\r
+mov (1)        CT_R0Hdr.31:ub                  0:w                                                     // Reset the highest byte\r
+mov (1)        CT_R0Hdr.3:ud                   0x00000000       \r
+mov (1)        CT_R0Hdr.6:uw                   sr0.0:uw                                        // sr0.0: state reg contains general thread states, e.g. EUID/TID.\r
+\r
+//***** Init ChildParam fields that are common to all threads ***********************\r
+mov (8)        ChildParam<1>:ud        RootParam<8;8,1>:ud             // Copy all root parameters\r
+mov (4)                CurCol<1>:w                     0:w                                             // Reset CurCol, CurRow\r
+add    (2)             LastCol<1>:w            MBsCntX<2;2,1>:w                -1:w    // Get LastCol and LastRow\r
+\r
+mov (1)        URBWriteMsgDesc:ud              MSG_LEN(2)+URBWMSGDSC:ud\r
+\r
+//===================================================================================\r
+\r
+#include "AVC_ILDB_OpenGateway.asm"            // Open root thread gateway for receiving notification \r
+\r
+#if defined(DEV_CL)    \r
+       mov     (1)             URBOffset:uw            240:uw  // Use chroma URB offset to spawn chroma root\r
+#else\r
+       mov     (1)             URBOffset:uw            320:uw  // Use chroma URB offset to spawn chroma root\r
+#endif\r
+\r
+#include "AVC_ILDB_SpawnChromaRoot.asm"        // Spawn chroma root\r
+\r
+mov    (1)             URBOffset:uw            0:uw    // Use luma URB offset to spawn luma child \r
+mov    (1)             ChildThreadsID:uw       2:uw    // Starting ChildThreadsID for luma child threads\r
+\r
+#include "AVC_ILDB_Dep_Check.asm"      // Check dependency and spawn all MBs\r
+\r
+// Wait for UV root thread to finish\r
+ILDB_LABEL(WAIT_FOR_UV):\r
+cmp.l.f0.0 (1) null:w  ThreadLimit:w           MaxThreads:w\r
+(f0.0)         jmpi    ILDB_LABEL(WAIT_FOR_UV)\r
+\r
+#include "AVC_ILDB_CloseGateway.asm"   // Close root thread gateway \r
+\r
+END_THREAD                                                             // End of root thread\r
+\r
+#if !defined(COMBINED_KERNEL)          // For standalone kernel only\r
+.end_code\r
+\r
+.end_kernel\r
+#endif\r
diff --git a/i965_drv_video/shaders/h264/ildb/AVC_ILDB_Root_UV.asm b/i965_drv_video/shaders/h264/ildb/AVC_ILDB_Root_UV.asm
new file mode 100644 (file)
index 0000000..cb8e146
--- /dev/null
@@ -0,0 +1,127 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Kernel name: AVC_ILDB_Root_UV.asm\r
+//\r
+//  Root kernel serves as a scheduler for child threads\r
+//\r
+//  $Revision: 1 $\r
+//  $Date: 10/19/06 5:06p $\r
+//\r
+\r
+// ----------------------------------------------------\r
+//  AVC_ILDB_ROOT_UV\r
+// ----------------------------------------------------\r
+#define AVC_ILDB\r
+\r
+.kernel AVC_ILDB_ROOT_UV\r
+#if defined(COMBINED_KERNEL)\r
+ILDB_LABEL(AVC_ILDB_ROOT_UV):\r
+#endif\r
\r
+#include "setupVPKernel.asm"\r
+#include "AVC_ILDB.inc"\r
+\r
+#if defined(_DEBUG) \r
+mov            (1)             EntrySignature:w                        0xFF11:w\r
+#endif\r
+\r
+/////////////////////////////////////////////////////////////////////////////////////\r
+#if defined(_DEBUG) \r
+\r
+// Init URB space for running on RTL.  It satisfies reading an unwritten URB entries.  \r
+// Will remove it for production release.\r
+\r
+mov (8) m1:ud          0x55555555:ud\r
+mov (8) m2:ud          0x66666666:ud \r
+mov (8) m3:ud          0x77777777:ud\r
+mov (8) m4:ud          0x88888888:ud \r
+\r
+mov (1)        Temp1_W:w       MBsCntY:w\r
+shl (1)        Temp2_W:w       MBsCntY:w               1:w\r
+\r
+ILDB_LABEL(ILDB_INIT_URB_UV):\r
+mul (1)        URBOffset:uw                    Temp1_W:uw              4:w             // Each thread uses 4 URB entries (1 r0 + 1 inline + 2 data)\r
+mov (1) URBWriteMsgDesc:ud             MSG_LEN(4)+URBWMSGDSC:ud                        // Msg descriptor: URB write msg length = 5\r
+#include "writeURB.asm"\r
+\r
+add            (1)             Temp1_W:w       Temp1_W:w       1:w                             // Increase block count\r
+cmp.l.f0.0 (1)         null            Temp1_W:w       Temp2_W:w               // Check the block count limit\r
+(f0.0) jmpi            ILDB_LABEL(ILDB_INIT_URB_UV)                    // Loop back\r
+\r
+mov            (1)             EntrySignature:w                        0xFFF0:w\r
+\r
+#endif\r
+/////////////////////////////////////////////////////////////////////////////////////\r
+\r
+// Set global variable\r
+mov (32)       ChildParam:uw                   0:uw                                                            // Reset local variables, 2 GRFs\r
+//mul  (1)             TotalBlocks:w                   MBsCntX:w               MBsCntY:w                       // Total # of blocks\r
+//add  (1)             GatewayApertureE:w              MBsCntY:w               GatewayApertureB:w      // Aperture End = aperture Head + BlockCntY\r
+\r
+// 4 URB entries for Y:\r
+// Entry 0 - Child thread R0Hdr\r
+// Entry 1 - input parameter to child kernel (child r1)\r
+// Entry 2 - Prev MB data UV 2x8\r
+// Entry 3 - Unused\r
+\r
+#define        URB_ENTRIES_PER_MB      4\r
+\r
+// URB_ENTRIES_PER_MB in differnt form, the final desired format is (URB_ENTRIES_PER_MB-1) << 10\r
+mov (1)        URB_EntriesPerMB_2:w            URB_ENTRIES_PER_MB-1:w\r
+shl (1)        URB_EntriesPerMB_2:w            URB_EntriesPerMB_2:w    10:w\r
+\r
+#define        CHROMA_ROOT                                                                                                     // Compiling flag for chroma only\r
+\r
+// URB base for UV kernels\r
+#if defined(DEV_CL)    \r
+       mov (1)         URBOffsetUVBase:w               240:w\r
+#else\r
+       mov (1)         URBOffsetUVBase:w               320:w\r
+#endif\r
+\r
+\r
+mov    (1)             ChildThreadsID:uw               3:uw\r
+\r
+shr (1)                ThreadLimit:w           MaxThreads:w            1:w             // Initial luma thread limit to 50%\r
+mul    (1)             TotalBlocks:w           MBsCntX:w               MBsCntY:w       // MBs to be processed count down from TotalBlocks\r
+\r
+//***** Init CT_R0Hdr fields that are common to all threads *************************\r
+mov (8)                CT_R0Hdr.0:ud           r0.0<8;8,1>:ud                          // Init to root R0 header\r
+mov (1)        CT_R0Hdr.7:ud           r0.6:ud                                         // Copy Parent Thread Cnt; JJ did the change on 06/20/2006\r
+mov (1)        CT_R0Hdr.31:ub          0:w                                                     // Reset the highest byte\r
+mov (1)        CT_R0Hdr.3:ud           0x00000000       \r
+mov (1)        CT_R0Hdr.6:uw           sr0.0:uw                                        // sr0.0: state reg contains general thread states, e.g. EUID/TID.\r
+\r
+//***** Init ChildParam fields that are common to all threads ***********************\r
+mov (8)        ChildParam<1>:ud        RootParam<8;8,1>:ud             // Copy all root parameters\r
+mov (4)                CurCol<1>:w                     0:w                                             // Reset CurCol, CurRow, \r
+add    (2)             LastCol<1>:w            MBsCntX<2;2,1>:w                -1:w    // Get LastCol and LastRow\r
+\r
+mov (1)        URBWriteMsgDesc:ud              MSG_LEN(2)+URBWMSGDSC:ud\r
+\r
+//===================================================================================\r
+\r
+#include "AVC_ILDB_OpenGateway.asm"            // Open gateway for receiving notification \r
+\r
+#include "AVC_ILDB_Dep_Check.asm"              // Check dependency and spawn all luma child threads in parallel with chroma root\r
+\r
+//#include "AVC_ILDB_LumaThrdLimit.asm"        // Update thread limit in luma root thread via gateway\r
+\r
+#include "AVC_ILDB_CloseGateway.asm"   // Close root thread gateway \r
+\r
+// Chroma root EOT = child send EOT : Request type = 1\r
+       END_CHILD_THREAD\r
+       \r
+#undef         CHROMA_ROOT\r
+\r
+#if !defined(COMBINED_KERNEL)          // For standalone kernel only\r
+.end_code\r
+\r
+.end_kernel\r
+#endif\r
diff --git a/i965_drv_video/shaders/h264/ildb/AVC_ILDB_Root_Y.asm b/i965_drv_video/shaders/h264/ildb/AVC_ILDB_Root_Y.asm
new file mode 100644 (file)
index 0000000..2274b93
--- /dev/null
@@ -0,0 +1,130 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Kernel name: AVC_ILDB_Root_Y.asm\r
+//\r
+//  Root kernel serves as a scheduler for child threads\r
+//\r
+//  $Revision: 1 $\r
+//  $Date: 10/19/06 5:06p $\r
+//\r
+\r
+// ----------------------------------------------------\r
+//  AVC_ILDB_ROOT_Y\r
+// ----------------------------------------------------\r
+#define AVC_ILDB\r
+\r
+.kernel AVC_ILDB_ROOT_Y\r
+#if defined(COMBINED_KERNEL)\r
+ILDB_LABEL(AVC_ILDB_ROOT_Y):\r
+#endif\r
+\r
+#include "setupVPKernel.asm"\r
+#include "AVC_ILDB.inc"\r
+\r
+/////////////////////////////////////////////////////////////////////////////////////\r
+#if defined(_DEBUG) \r
+\r
+// Init URB space for running on RTL.  It satisfies reading an unwritten URB entries.  \r
+// Will remove it for production release.\r
+\r
+mov (8) m1:ud          0x11111111:ud\r
+mov (8) m2:ud          0x22222222:ud \r
+mov (8) m3:ud          0x33333333:ud\r
+mov (8) m4:ud          0x44444444:ud \r
+\r
+mov (1)        Temp1_W:w       0:w\r
+\r
+ILDB_LABEL(ILDB_INIT_URB_Y):\r
+//mul (1)      Temp2_W:w                               Temp1_W:w               4:w             // URBOffset\r
+//shl (1) URBWriteMsgDescLow:uw        Temp2_W:w               4:w             // Msg descriptor: URB write dest offset (9:4)\r
+//mov (1) URBWriteMsgDescHigh:uw       0x0650:uw                               // Msg descriptor: URB write 5 MRFs (m0 - m4)\r
+\r
+//mul (1)      URBOffset:uw                    Temp1_W:uw              4:w             // Each thread uses 4 URB entries (1 r0 + 1 inline + 2 data)\r
+mul (1)        URBOffset:uw                    Temp1_W:uw              2:w             // Each thread uses 2 URB entries (1 r0 + 1 inline)\r
+mov (1) URBWriteMsgDesc:ud             MSG_LEN(2)+URBWMSGDSC:ud                        // Msg descriptor: URB write msg length = 3\r
+#include "writeURB.asm"\r
+\r
+add            (1)             Temp1_W:w       Temp1_W:w       1:w                             // Increase block count\r
+cmp.l.f0.0 (1)         null            Temp1_W:w       MBsCntY:w               // Check the block count limit\r
+(f0.0) jmpi            ILDB_LABEL(ILDB_INIT_URB_Y)                                                     // Loop back\r
+\r
+mov            (1)             EntrySignature:w                        0xFFF0:w\r
+\r
+#endif\r
+/////////////////////////////////////////////////////////////////////////////////////\r
+\r
+\r
+// Set global variable\r
+mov (32)       ChildParam:uw                   0:uw                                                            // Reset local variables, 2 GRFs\r
+//mul  (1)             TotalBlocks:w                   MBsCntX:w               MBsCntY:w                       // Total # of blocks\r
+//add  (1)             GatewayApertureE:w              MBsCntY:w               GatewayApertureB:w      // Aperture End = aperture Head + BlockCntY\r
+\r
+// 4 URB entries for Y:\r
+// Entry 0 - Child thread R0Hdr\r
+// Entry 1 - input parameter to child kernel (child r1)\r
+// Entry 2 - Prev MB data Y 4x16, col 1 and col 0\r
+// Entry 3 - Prev MB data Y 4x16, col 3 and col 2\r
+\r
+#undef                 URB_ENTRIES_PER_MB\r
+#define        URB_ENTRIES_PER_MB              4\r
+\r
+// URB_ENTRIES_PER_MB in differnt form, the final desired format is (URB_ENTRIES_PER_MB-1) << 10\r
+mov (1)        URB_EntriesPerMB_2:w            URB_ENTRIES_PER_MB-1:w\r
+shl (1)        URB_EntriesPerMB_2:w            URB_EntriesPerMB_2:w    10:w\r
+\r
+shr (1)                ThreadLimit:w           MaxThreads:w    1:w                     // Initial luma thread limit to 50%\r
+mul    (1)             TotalBlocks:w           MBsCntX:w               MBsCntY:w       // MBs to be processed count down from TotalBlocks\r
+\r
+//***** Init CT_R0Hdr fields that are common to all threads *************************\r
+mov (8)                CT_R0Hdr.0:ud           r0.0<8;8,1>:ud                          // Init to root R0 header\r
+mov (1)        CT_R0Hdr.7:ud           r0.6:ud                                         // Copy Parent Thread Cnt; JJ did the change on 06/20/2006\r
+mov (1)        CT_R0Hdr.31:ub          0:w                                                     // Reset the highest byte\r
+mov (1)        CT_R0Hdr.3:ud           0x00000000       \r
+mov (1)        CT_R0Hdr.6:uw           sr0.0:uw                                        // sr0.0: state reg contains general thread states, e.g. EUID/TID.\r
+\r
+//***** Init ChildParam fields that are common to all threads ***********************\r
+mov (8)        ChildParam<1>:ud        RootParam<8;8,1>:ud             // Copy all root parameters\r
+mov (4)                CurCol<1>:w                     0:w                                             // Reset CurCol, CurRow, \r
+add    (2)             LastCol<1>:w            MBsCntX<2;2,1>:w                -1:w    // Get LastCol and LastRow\r
+       \r
+mov (1)        URBWriteMsgDesc:ud              MSG_LEN(2)+URBWMSGDSC:ud\r
+\r
+//===================================================================================\r
+\r
+#include "AVC_ILDB_OpenGateway.asm"            // Open gateway for receiving notification \r
+\r
+#if defined(DEV_CL)    \r
+       mov     (1)             URBOffset:uw            240:uw  // Use chroma URB offset to spawn chroma root\r
+#else\r
+       mov     (1)             URBOffset:uw            320:uw  // Use chroma URB offset to spawn chroma root\r
+#endif\r
+\r
+#include "AVC_ILDB_SpawnChromaRoot.asm"        // Spawn chroma root\r
+\r
+mov    (1)             URBOffset:uw            0:uw    // Use luma URB offset to spawn luma child \r
+mov    (1)             ChildThreadsID:uw       2:uw    // Starting ChildThreadsID for luma child threads\r
+\r
+#include "AVC_ILDB_Dep_Check.asm"              // Check dependency and spawn all luma child threads in parallel with chroma root\r
+\r
+\r
+// Wait for UV root thread to finish\r
+ILDB_LABEL(WAIT_FOR_UV):\r
+cmp.l.f0.0 (1) null:w  ThreadLimit:w           MaxThreads:w\r
+(f0.0)         jmpi    ILDB_LABEL(WAIT_FOR_UV)\r
+\r
+\r
+#include "AVC_ILDB_CloseGateway.asm"   // Close root thread gateway \r
+\r
+END_THREAD                                                             // End of root thread\r
+\r
+#if !defined(COMBINED_KERNEL)          // For standalone kernel only\r
+.end_code\r
+\r
+.end_kernel\r
+#endif\r
diff --git a/i965_drv_video/shaders/h264/ildb/AVC_ILDB_Spawn.asm b/i965_drv_video/shaders/h264/ildb/AVC_ILDB_Spawn.asm
new file mode 100644 (file)
index 0000000..5b5c91e
--- /dev/null
@@ -0,0 +1,22 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+//=============== Spawn a child thread for a vertical child ===============\r
+\r
+#if defined(_DEBUG) \r
+       mov     (1)             EntrySignature:w        0x6666:w\r
+#endif\r
+       \r
+       mul     (1)             URBOffset:uw            CurRow:uw               2:w // 5:w                      // Each row uses 5 URB entries (R0, child R0, 3 GRFs of data from left MB)\r
+\r
+       mov (8)         CT_R0Hdr.0:ud           r0.0<8;8,1>:ud                          // Init to root R0 header\r
+       \r
+       // R0.2: Interface Discriptor Ptr.  Add offset 16 for next Interface Discriptor for child kernel\r
+       add (1)         CT_R0Hdr.2:ud           r0.2:ud                 IDesc_Child_Offset:w\r
+       \r
+       #include "AVC_ILDB_SpawnChild.asm"\r
diff --git a/i965_drv_video/shaders/h264/ildb/AVC_ILDB_SpawnChild.asm b/i965_drv_video/shaders/h264/ildb/AVC_ILDB_SpawnChild.asm
new file mode 100644 (file)
index 0000000..0f6950c
--- /dev/null
@@ -0,0 +1,55 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+//=============== Spawn a child thread for Luma or Chroma ===============\r
+\r
+       //----- Create child thread R0 header -----\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignature:w                        0xAAAA:w\r
+#endif\r
+\r
+       //***** Set CT_R0Hdr fields that change for every thread \r
+       \r
+       // Restore CT_R0Hdr.4:ud to r0.4:ud \r
+       mov (1) CT_R0Hdr.4:ud           r0.4:ud\r
+\r
+       // R0.2: Interface Discriptor Ptr.  Add a child offset for child kernel\r
+//     add (1) CT_R0Hdr.2:ud           r0.2:ud                 CHILD_OFFSET:w\r
+\r
+       // Assign a new Thread Count for this child\r
+       mov (1) CT_R0Hdr.6:ud           ChildThreadsID:uw\r
+\r
+       //----- Prepare URB for launching a child thread -----\r
+       mov (16) m2.0:w         ChildParam<16;16,1>:w\r
+\r
+       shr (1)  MRF0.0:uw      URBOffset:uw    1:w\r
+\r
+       add     (1)     ChildThreadsID:uw               ChildThreadsID:uw       2:uw    // Luma child=even, chroma child=odd\r
+               \r
+       //--------------------------------------------------\r
+//     #include "writeURB.asm"\r
+       send  null:uw   MRF0     null:ud        URBWRITE        URBWriteMsgDesc:ud              // URB write    \r
+\r
+       //--------------------------------------------------\r
+       // Set URB handle for child thread launching:\r
+       // URB handle Length            (bit 15:10) - 0000 0000 0000 0000  yyyy yy00 0000 0000\r
+       // URB handle offset            (bit 9:0)       - 0000 0000 0000 0000  0000 00xx xxxx xxxx\r
+\r
+       or  (1) CT_R0Hdr.4:ud           URB_EntriesPerMB_2:w    URBOffset:uw\r
+       \r
+       // 2 URB entries:\r
+       // Entry 0 - CT_R0Hdr\r
+       // Entry 1 - input parameter to child kernel\r
+\r
+       //----- Spawn a child now -----\r
+       send (8) null:ud        CT_R0Hdr          null:ud    TS TSMSGDSC\r
+//     send (8) null:ud        CT_Spawn_Reg    null:ud    0x07100001\r
+\r
+\r
+       // Restore CT_R0Hdr.4:ud to r0.4:ud for next use\r
+//     mov (1) CT_R0Hdr.4:ud           r0.4:ud\r
diff --git a/i965_drv_video/shaders/h264/ildb/AVC_ILDB_SpawnChromaRoot.asm b/i965_drv_video/shaders/h264/ildb/AVC_ILDB_SpawnChromaRoot.asm
new file mode 100644 (file)
index 0000000..cd5e57e
--- /dev/null
@@ -0,0 +1,47 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+//=============== Spawn a chroma root thread ===============\r
+\r
+       //----- Create chroma root thread R0 header -----\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignature:w                        0xAABA:w\r
+#endif\r
+\r
+\r
+\r
+       // Restore CT_R0Hdr.4:ud to r0.4:ud \r
+//     mov (1) CT_R0Hdr.4:ud           r0.4:ud\r
+\r
+       // R0.2: Interface Discriptor Ptr.  Add child offset for child kernel\r
+       add (1) CT_R0Hdr.2:ud           r0.2:ud                 CHROMA_ROOT_OFFSET:w\r
+\r
+       // Assign a new Thread Count for this child\r
+       mov (1) CT_R0Hdr.6:ud           1:w             // ThreadID=1 for chroma root\r
+\r
+       //----- Copy luma root r1 for launching chroma root thread -----\r
+       mov (16) m2.0:w         RootParam<16;16,1>:w\r
+\r
+       #include "writeURB.asm"\r
+\r
+       //--------------------------------------------------\r
+       // Set URB handle for child thread launching:\r
+       // URB handle Length            (bit 15:10) - 0000 0000 0000 0000  yyyy yy00 0000 0000\r
+       // URB handle offset            (bit 9:0)       - 0000 0000 0000 0000  0000 00xx xxxx xxxx\r
+\r
+       or  (1) CT_R0Hdr.4:ud           URB_EntriesPerMB_2:w    URBOffset:uw\r
+       \r
+       // 2 URB entries:\r
+       // Entry 0 - CT_R0Hdr\r
+       // Entry 1 - input parameter to child kernel\r
+\r
+       //----- Spawn a child now -----\r
+       send (8) null:ud        CT_R0Hdr        null:ud    TS   TSMSGDSC\r
+\r
+       // Restore CT_R0Hdr.4:ud to r0.4:ud for next use \r
+       mov (1) CT_R0Hdr.4:ud           r0.4:ud\r
diff --git a/i965_drv_video/shaders/h264/ildb/Child_Undefs.inc b/i965_drv_video/shaders/h264/ildb/Child_Undefs.inc
new file mode 100644 (file)
index 0000000..fa3ade1
--- /dev/null
@@ -0,0 +1,24 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Modual name: Child_Undefs.inc\r
+//\r
+// Undefine global symbols for new process in child thread\r
+//\r
+\r
+#undef         P1      \r
+#undef         P2      \r
+#undef         P3      \r
+#undef         P4\r
+#undef         P5\r
+#undef         P6\r
+#undef         P7\r
+#undef         P8\r
+#undef         EDGECNTLMAP\r
+#undef         CLIP_NEGATIVE\r
+#undef         CLIP_DONE\r
diff --git a/i965_drv_video/shaders/h264/ildb/ILDB_header.inc b/i965_drv_video/shaders/h264/ildb/ILDB_header.inc
new file mode 100644 (file)
index 0000000..015bf28
--- /dev/null
@@ -0,0 +1,306 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+#if !defined(__ILDB_HEADER__)  // Make sure this file is only included once\r
+#define __ILDB_HEADER__\r
+\r
+// Module name: ILDB_header.inc\r
+//\r
+\r
+.default_execution_size        (16)\r
+.default_register_type :ub\r
+\r
+#undef NULLREG\r
+#undef RETURN_REG\r
+#undef EOTMSGDSC\r
+#undef MSGSRC\r
+#undef END_THREAD\r
+#undef TSMSGDSC\r
+\r
+//  ----------- Common constant definitions ------------\r
+//\r
+//  Bit position constants\r
+//\r
+#define BIT0   0x01\r
+#define BIT1   0x02\r
+#define BIT2   0x04\r
+#define BIT3   0x08\r
+#define BIT4   0x10\r
+#define BIT5   0x20\r
+#define BIT6   0x40\r
+#define BIT7   0x80\r
+#define BIT8   0x0100\r
+#define BIT9   0x0200\r
+#define BIT10  0x0400\r
+#define BIT11  0x0800\r
+#define BIT12  0x1000\r
+#define BIT13  0x2000\r
+#define BIT14  0x4000\r
+#define BIT15  0x8000\r
+#define BIT16  0x00010000\r
+#define BIT17  0x00020000\r
+#define BIT18  0x00040000\r
+#define BIT19  0x00080000\r
+#define BIT20  0x00100000\r
+#define BIT21  0x00200000\r
+#define BIT22  0x00400000\r
+#define BIT23  0x00800000\r
+#define BIT24  0x01000000\r
+#define BIT25  0x02000000\r
+#define BIT26  0x04000000\r
+#define BIT27  0x08000000\r
+#define BIT28  0x10000000\r
+#define BIT29  0x20000000\r
+#define BIT30  0x40000000\r
+#define BIT31  0x80000000\r
+\r
+// Common constants\r
+//\r
+#define        INST_SIZE       16      // Instruction size in byte\r
+\r
+#define        GRFWIB  32              // GRF register width in byte\r
+#define        GRFWIW  16              // GRF register width in word\r
+#define        GRFWID  8               // GRF register width in dword\r
+\r
+#define TOP_FIELD      0\r
+#define BOTTOM_FIELD   1\r
+\r
+#define PREVIOUS_FRAME 0       // Previous frame\r
+#define CURRENT_FRAME  1       // Current frame\r
+#define NEXT_FRAME     2       // Next frame\r
+\r
+#define Y_ROW_WIDTH            16      // in bytes\r
+#define UV_ROW_WIDTH   8\r
+\r
+//  Useful macros\r
+//\r
+#define REGION(Width,HStride) <Width*HStride;Width,HStride>    // Region definition when ExecSize = Width\r
+\r
+#define NULLREG                null<1>:d\r
+#define NULLREGW       null<1>:w\r
+\r
+#define RETURN_REG     r62             // Return pointer for all sub-routine calls (type DWORD)\r
+\r
+#define CALL(subFunc, skipInst)        add (1) RETURN_REG<1>:ud   ip:ud        1+skipInst*INST_SIZE \n\\r
+                               jmpi (1) subFunc\r
+\r
+#define        RETURN          mov (1) ip:ud   RETURN_REG<0;1,0>:ud            // Return to calling module\r
+\r
+#define PRED_CALL(flag, subFunc, skipInst)     add (1) RETURN_REG<1>:ud   ip:ud        1+skipInst*INST_SIZE \n\\r
+                       (flag)  jmpi (1) subFunc\r
+\r
+\r
+//  Definitions for surface states, GRF regions, and common data fields\r
+//\r
+//  Note: Each kernel needs to define a specific symbol before including this\r
+//  header file to ensure correct definitions.\r
+//\r
+\r
+\r
+\r
+#if defined(AVC_ILDB)  \r
+\r
+.reg_count_total    64\r
+.reg_count_payload  4\r
+\r
+    // Binding Table Index\r
+       #define BI_CNTRL_DATA           0                       // Control data map  \r
+    #define    BI_SRC_Y                        1\r
+    #define    BI_SRC_UV                       2\r
+    #define    BI_DEST_Y                       3\r
+    #define    BI_DEST_UV                      4\r
+       \r
+\r
+       //========== Left MB, 4x16 in r2 and r3 ==========\r
+       #define         PREV_MB_Y_BASE  64              //2*GRFWIB                                                                                              // Byte offset to r2\r
+       .declare    PREV_MB_YD  Base=r2         ElementSize=4 SrcRegion=REGION(8,1) Type=ud\r
+       .declare    PREV_MB_YW  Base=r2         ElementSize=2 SrcRegion=REGION(8,1) Type=uw\r
+    .declare    PREV_MB_YB  Base=r2    ElementSize=1 SrcRegion=REGION(16,1) Type=ub\r
+    \r
+    #define            PREV_MB_U_BASE  64              //2*GRFWIB              // seperate thread from Y                               // Byte offset to r2\r
+       .declare        PREV_MB_UD      Base=r2         ElementSize=4 SrcRegion=REGION(8,1) Type=ud\r
+    .declare    PREV_MB_UW  Base=r2    ElementSize=2 SrcRegion=REGION(16,1) Type=uw\r
+    .declare    PREV_MB_UB  Base=r2    ElementSize=1 SrcRegion=REGION(16,1) Type=ub\r
+    \r
+       #define         PREV_MB_V_BASE  65              //2*GRFWIB+1            // NV12                                                                 // Byte offset to r2.1\r
+    .declare    PREV_MB_VB     Base=r2.1       ElementSize=1 SrcRegion=REGION(16,1) Type=ub\r
+\r
+       //========== Top MB, 16x4 in r4 and r5 ==========       \r
+       #define         TOP_MB_Y_BASE   128             //4*GRFWIB                                                                                              // Byte offset to r4\r
+       .declare    TOP_MB_YD   Base=r4         ElementSize=4 SrcRegion=REGION(8,1) Type=ud\r
+       .declare    TOP_MB_YW   Base=r4         ElementSize=2 SrcRegion=REGION(8,1) Type=uw\r
+    .declare    TOP_MB_YB      Base=r4         ElementSize=1 SrcRegion=REGION(16,1) Type=ub\r
+    \r
+    #define            TOP_MB_U_BASE   128             //4*GRFWIB              // seperate thread from Y                               // Byte offset to r4\r
+       .declare        TOP_MB_UD       Base=r4         ElementSize=4 SrcRegion=REGION(8,1) Type=ud\r
+    .declare    TOP_MB_UW      Base=r4         ElementSize=2 SrcRegion=REGION(16,1) Type=uw\r
+    .declare    TOP_MB_UB      Base=r4         ElementSize=1 SrcRegion=REGION(16,1) Type=ub\r
+    \r
+       #define         TOP_MB_V_BASE   129             //4*GRFWIB+1            // NV12                                                                 // Byte offset to r4.1\r
+    .declare    TOP_MB_VB  Base=r4.1   ElementSize=1 SrcRegion=REGION(16,1) Type=ub\r
+\r
+    \r
+    //========== Current MB, 16x16 in r6-r13 ==========\r
+       #define         SRC_MB_Y_BASE   192     //6*GRFWIB                                                                                              // Byte offset to r6\r
+    .declare    SRC_YD         Base=r6         ElementSize=4 SrcRegion=REGION(8,1) Type=ud             // For read and write, 8 GRFs\r
+    .declare    SRC_YW         Base=r6         ElementSize=2 SrcRegion=REGION(8,1) Type=uw\r
+    .declare    SRC_YB         Base=r6         ElementSize=1 SrcRegion=REGION(16,1) Type=ub    // 8 GRFs       \r
+\r
+       #define         SRC_MB_U_BASE   192             //6*GRFWIB              // seperate thread from Y                               // Byte offset to r6\r
+    .declare    SRC_UD         Base=r6         ElementSize=4 SrcRegion=REGION(8,1) Type=ud             // For read and write, 2 GRFs\r
+    .declare    SRC_UW         Base=r6         ElementSize=2 SrcRegion=REGION(16,1) Type=uw    // For read and write, 4 GRFs\r
+    .declare    SRC_UB         Base=r6         ElementSize=1 SrcRegion=REGION(16,1) Type=ub    // 2 GRFs\r
+          \r
+       #define         SRC_MB_V_BASE   193             // 6*GRFWIB+1           // NV12                                                                 // Byte offset to r6.1\r
+    .declare    SRC_VD         Base=r6.1       ElementSize=4 SrcRegion=REGION(8,1) Type=ud             // For read and write, 2 GRFs\r
+    .declare    SRC_VW         Base=r6.1       ElementSize=2 SrcRegion=REGION(16,1) Type=uw    // For read and write, 4 GRFs\r
+    .declare    SRC_VB         Base=r6.1       ElementSize=1 SrcRegion=REGION(16,1) Type=ub    // 2 GRFs\r
+\r
+#else                          // No kernel specified, define nothing.\r
+\r
+.reg_count_total    64\r
+.reg_count_payload  2\r
+\r
+       #define SRCAOFF r1.0:ud         // Offset into alpha data\r
+       #define SRCOFF  r1.1:ud         // Offset into source YUV data\r
+       #define ORIX    r1.4            // :w, H. origin of the destination block in pel\r
+       #define ORIY    r1.5            // :w, V. origin of the destination block in pel\r
+\r
+#endif\r
+\r
+//  ----------- Message Payload Header fields------------\r
+//\r
+#define IDP            r0.2:ud         // Interface Descriptor Pointer\r
+#define        BTP             r0.4:ud         // Binding Table Pointer\r
+\r
+//  ----------- Common Message Descriptor ------------\r
+//\r
+#ifdef DEV_ILK\r
+#define GW_DCN                         // Should be enabled only for ILK-B0 and beyond\r
+#define MSG_GW         0x03            // Message Gateway\r
+#define MSG_GW_EOT     0x23            // Message Gateway plus EOT bit set (For ILK only)\r
+#define DAPREAD                0x04            // Data Port Read Extended Message Descriptor,\r
+#define DAPWRITE       0x05            // Data Port Write Extended Message Descriptor,\r
+#define URBWRITE       0x06            // URB\r
+#define TS                     0x07            // Thread Spawner Extended Message Descriptor\r
+#define TS_EOT         0x27            // End of Thread Extended Message Descriptor\r
+\r
+#define EOTMSGDSC      0x02000000      // End of Thread Message Descriptor /w URB handle dereferenced (used by root kernel)\r
+#define CHILD_EOTMSGDSC        0x02000012      // End of Child Thread Message Descriptor w/o URB handle dereferenced\r
+\r
+// Data Port Message Descriptor\r
+#define DWBRMSGDSC_RC   0x02086000     // DWORD Block Read Message Descriptor, reading from render cache = 6.\r
+#define DWBRMSGDSC_RC_TF 0x02086600    // DWORD Block Read Message Descriptor, reading from render cache = 6.\r
+#define DWBRMSGDSC_RC_BF 0x02086700    // DWORD Block Read Message Descriptor, reading from render cache = 6.\r
+#define DWBRMSGDSC_SC   0x0208A000     // DWORD Block Read Message Descriptor, reading from sampler cache = A.\r
+#define DWBRMSGDSC_SC_TF 0x0208E600    // DWORD Block Read Message Descriptor, reading top field from field mode sampler cache.\r
+#define DWBRMSGDSC_SC_BF 0x0208E700    // DWORD Block Read Message Descriptor, reading bottom field from field mode sampler cache.\r
+\r
+#define ILDBRMSGDSC             0x02085800     // AVC ILDB Control Data Read Msg Desc on Bearlake-C\r
+\r
+#define DWBWMSGDSC              0x02082000     // DWORD Block Write Message Descriptor\r
+#define DWBWMSGDSC_WC   0x0218A000     // DWORD Block Write Message Descriptor + write commit\r
+\r
+// URB Message Descriptor\r
+#define        URBWMSGDSC      0x02080000              // URB Write Message Descriptor\r
+\r
+// Thread Spawner Message Descriptor\r
+#define        TSMSGDSC        0x02000001\r
+\r
+// Message Gateway Message Descriptors\r
+#define OGWMSGDSC      0x02000000      // OpenGateway Message Descriptor\r
+#define CGWMSGDSC      0x02000001      // CloseGateway Message Descriptor\r
+#define FWDMSGDSC      0x02000002      // ForwardMsg Message Descriptor\r
+#define        NOTIFYMSG       0x00008000      // Send notification with ForwardMsg message\r
+\r
+#define        RESP_LEN(len)   0x100000*len\r
+#define MSG_LEN(len)   0x2000000*len\r
+\r
+#else  // Pre DEV_ILK\r
+\r
+#define MSG_GW\r
+#define MSG_GW_EOT\r
+#define DAPREAD\r
+#define DAPWRITE\r
+#define URBWRITE\r
+#define TS\r
+#define TS_EOT\r
+\r
+#define EOTMSGDSC      0x87100000              // End of Thread Message Descriptor /w URB handle dereferenced (used by root kernel) \r
+#define CHILD_EOTMSGDSC        0x87100012      // End of Child Thread Message Descriptor w/o URB handle dereferenced\r
+\r
+// Data Port Message Descriptor\r
+#define DWBRMSGDSC_RC   0x04106000     // DWORD Block Read Message Descriptor, reading from render cache = 6.\r
+#define DWBRMSGDSC_RC_TF 0x04106600    // DWORD Block Read Message Descriptor, reading from render cache = 6.\r
+#define DWBRMSGDSC_RC_BF 0x04106700    // DWORD Block Read Message Descriptor, reading from render cache = 6.\r
+#define DWBRMSGDSC_SC   0x0410A000     // DWORD Block Read Message Descriptor, reading from sampler cache = A.\r
+#define DWBRMSGDSC_SC_TF 0x0410A600    // DWORD Block Read Message Descriptor, reading top field from field mode sampler cache.\r
+#define DWBRMSGDSC_SC_BF 0x0410A700    // DWORD Block Read Message Descriptor, reading bottom field from field mode sampler cache.\r
+\r
+#define ILDBRMSGDSC             0x04105800     // AVC ILDB Control Data Read Msg Desc on Bearlake-C\r
+\r
+#define DWBWMSGDSC              0x05102000     // DWORD Block Write Message Descriptor\r
+#define DWBWMSGDSC_WC   0x0511A000     // DWORD Block Write Message Descriptor + write commit\r
+\r
+// URB Message Descriptor\r
+#define        URBWMSGDSC      0x06100000              // URB Write Message Descriptor\r
+\r
+// Thread Spawner Message Descriptor\r
+#define        TSMSGDSC        0x07100001\r
+\r
+// Message Gateway Message Descriptors\r
+#define OGWMSGDSC      0x03100000      // OpenGateway Message Descriptor\r
+#define CGWMSGDSC      0x03100001      // CloseGateway Message Descriptor\r
+#define FWDMSGDSC      0x03100002      // ForwardMsg Message Descriptor\r
+#define        NOTIFYMSG       0x00008000      // Send notification with ForwardMsg message\r
+\r
+#define        RESP_LEN(len)   0x10000*len\r
+#define MSG_LEN(len)   0x100000*len\r
+\r
+//     bits 15 - 0 = 01 011 000 00000000 = 0101 1000 0000 0000 = 5800\r
+// Render cache, AVC loop rd,                                                  \r
+#endif // DEV_ILK\r
+                               \r
+//     Enable frame/field selection in message descriptor\r
+#define ENMSGDSCFM     0x400           // Enable MSGDSC to select frame surface\r
+#define ENMSGDSCTF     0x600           // Enable MSGDSC to select top field surface\r
+#define ENMSGDSCBF     0x700           // Enable MSGDSC to select bottom field surface\r
+\r
+#define END_THREAD      send (8) NULLREG MSGHDR r0:ud TS_EOT   EOTMSGDSC\r
+#define END_CHILD_THREAD       send (8) NULLREG MSGHDR r0:ud TS_EOT    CHILD_EOTMSGDSC\r
+\r
+//  ----------- Message related register ------------\r
+//\r
+#define MSGHDR         m1              // Message Payload Header\r
+#define MSGHDRY                m1              // Message Payload Header register for Y data\r
+#define MSGHDRU                m2              // Message Payload Header register for U data\r
+#define MSGHDRV                m3              // Message Payload Header register for V data\r
+\r
+#define MSGHDRC                m1              // Message Payload Header register for CUR MB\r
+#define MSGHDRL                m2              // Message Payload Header register for LEFT MB\r
+#define MSGHDRT                m3              // Message Payload Header register for TOP MB\r
+\r
+#define MSGHDRYA       m4              // Second Message Payload Header register for Y data\r
+#define MSGSRC         r63             // Message source register\r
+#define MSGDSC         a0.0:ud         // Message Descriptor register (type DWORD)\r
+\r
+#define MH_ORI         MSGSRC.0        // DWORD block R/W message header block offset\r
+#define MH_ORIX                MSGSRC.0        // DWORD block R/W message header X offset\r
+#define MH_ORIY                MSGSRC.1        // DWORD block R/W message header Y offset\r
+#define MH_SIZE                MSGSRC.2        // DWORD block R/W message header block width & height\r
+\r
+\r
+//  M2 - M9 for message data payload\r
+.declare    MSGPAYLOADB        Base=m2 ElementSize=1 SrcRegion=REGION(16,1) Type=ub\r
+.declare    MSGPAYLOADW        Base=m2 ElementSize=2 SrcRegion=REGION(16,1) Type=uw\r
+.declare    MSGPAYLOADD        Base=m2 ElementSize=4 SrcRegion=REGION(8,1) Type=ud\r
+.declare    MSGPAYLOADF        Base=m2 ElementSize=4 SrcRegion=REGION(8,1) Type=f\r
+\r
+// End of ILDB_header.inc\r
+\r
+#endif // !defined(__ILDB_HEADER__)\r
diff --git a/i965_drv_video/shaders/h264/ildb/Load_ILDB_Cntrl_Data.asm b/i965_drv_video/shaders/h264/ildb/Load_ILDB_Cntrl_Data.asm
new file mode 100644 (file)
index 0000000..695ae4e
--- /dev/null
@@ -0,0 +1,80 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Module name: load_ILDB_Cntrl_Data.asm\r
+//\r
+// This module loads AVC ILDB control data for one MB.  \r
+//\r
+//----------------------------------------------------------------\r
+//  Symbols need to be defined before including this module\r
+//\r
+//     Source region in :ud\r
+//     CNTRL_DATA_D:   CNTRL_DATA_D Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud                       // 8 GRFs\r
+//\r
+//     Binding table index: \r
+//     BI_CNTRL_DATA:  Binding table index of control data surface\r
+//\r
+//----------------------------------------------------------------\r
+\r
+       // We need to get control data offset for the bottom MB in mbaff mode.\r
+       // That is, get f0.1=1 if MbaffFlag==1 && BotFieldFlag==1\r
+       and (1) CTemp1_W:uw             BitFields:uw    MbaffFlag+BotFieldFlag:uw       // Mute all other bits\r
+\r
+       and.nz.f0.0     (1)     null:w          BitFields:w             CntlDataExpFlag:w                       // Get CntlDataExpFlag\r
+\r
+       cmp.e.f0.1 (1) NULLREGW         CTemp1_W:uw     MbaffFlag+BotFieldFlag:uw       // Check mbaff and bot flags\r
+\r
+       (f0.0)  jmpi    ILDB_LABEL(READ_BLC_CNTL_DATA) \r
+\r
+       // On Crestline, MB control data in memory occupy 64 DWs (expanded).  \r
+//    mov (1)  MSGSRC.0<1>:ud  0:w                                             { NoDDClr }                             // Block origin X\r
+//    mov (1)  MSGSRC.1<1>:ud  CntrlDataOffsetY:ud             { NoDDClr, NoDDChk }    // Block origin Y\r
+//    mov (1)  MSGSRC.2<1>:ud  0x000F000F:ud                   { NoDDChk }                             // Block width and height (16x16=256 bytes)\r
+\r
+    mov (2)    MSGSRC.0<1>:ud  ORIX_CUR<2;2,1>:uw                      { NoDDClr }                             // Block origin X,Y\r
+    mov (1)    MSGSRC.2<1>:ud  0x000F000F:ud                           { NoDDChk }                             // Block width and height (16x16=256 bytes)\r
+\r
+       (f0.1) add (1)  MSGSRC.1:ud             MSGSRC.1:ud             16:w    // +16 to for bottom MB in a pair\r
+\r
+    send (8) CNTRL_DATA_D(0)<1>        MSGHDRY MSGSRC<8;8,1>:ud        DAPREAD DWBRMSGDSC_SC+0x00080000+BI_CNTRL_DATA  // Receive 8 GRFs\r
+       jmpi    ILDB_LABEL(READ_CNTL_DATA_DONE)\r
+       \r
+       \r
+ILDB_LABEL(READ_BLC_CNTL_DATA):\r
+       // On Bearlake-C, MB control data in memory occupy 16 DWs. Data port returns 8 GRFs with expanded control data.\r
+\r
+       // Global offset\r
+       mov (1) MSGSRC.2:ud             CntrlDataOffsetY:ud     // CntrlDataOffsetY is the global offset\r
+\r
+       (f0.1) add (1) MSGSRC.2:ud              MSGSRC.2:ud             64:w    // +64 to the next MB control data (bot MB)\r
+\r
+    send (8) CNTRL_DATA_D(0)<1>        MSGHDRY MSGSRC<8;8,1>:ud        DAPREAD RESP_LEN(8)+ILDBRMSGDSC+BI_CNTRL_DATA   // Receive 8 GRFs\r
+\r
+ILDB_LABEL(READ_CNTL_DATA_DONE):\r
+\r
+// End of load_ILDB_Cntrl_Data.asm\r
+\r
+\r
+\r
+\r
+// AVC ILDB control data message header format\r
+\r
+//DWord        Bit     Description\r
+//M0.7 31:0    Debug \r
+//M0.6 31:0    Debug\r
+//M0.5 31:8    Ignored\r
+//             7:0             Dispatch ID. // This ID is assigned by the fixed function unit and is a unique identifier for the thread.  It is used to free up resources used by the thread upon thread completion.\r
+//M0.4 31:0    Ignored\r
+//M0.3 31:0    Ignored\r
+//M0.2 31:0    Global Offset. Specifies the global byte offset into the buffer.\r
+                               //      This offset must be OWord aligned (bits 3:0 MBZ) Format = U32 Range = [0,FFFFFFF0h]\r
+//M0.1 31:0    Ignored\r
+//M0.0 31:0    Ignored\r
+\r
+\r
+\r
diff --git a/i965_drv_video/shaders/h264/ildb/Load_ILDB_Cntrl_Data_16DW.asm b/i965_drv_video/shaders/h264/ildb/Load_ILDB_Cntrl_Data_16DW.asm
new file mode 100644 (file)
index 0000000..4c91a29
--- /dev/null
@@ -0,0 +1,62 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Module name: load_ILDB_Cntrl_Data_16DW.asm\r
+//\r
+// This module loads AVC ILDB 64DW control data for one MB CTG. \r
+// Dataport expands from 16DW to 64DW.\r
+//\r
+//----------------------------------------------------------------\r
+//  Symbols need to be defined before including this module\r
+//\r
+//     Source region in :ud\r
+//     CNTRL_DATA_D:   CNTRL_DATA_D Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud                       // 8 GRFs\r
+//\r
+//     Binding table index: \r
+//     BI_CNTRL_DATA:  Binding table index of control data surface\r
+//\r
+//----------------------------------------------------------------\r
+\r
+// On CTG, MB control data in memory occupy 16 DWs. Data port returns 8 GRFs with expanded control data.\r
+\r
+#if defined(_MBAFF) \r
+       // We need to get control data offset for the bottom MB in mbaff mode.\r
+       // That is, get f0.1=1 if MbaffFlag==1 && BotFieldFlag==1\r
+//     and (1) CTemp1_W:uw             BitFields:uw    MbaffFlag+BotFieldFlag:uw       // Mute all other bits\r
+//     cmp.e.f0.1 (1) NULLREGW         CTemp1_W:uw     MbaffFlag+BotFieldFlag:uw       // Check mbaff and bot flags\r
+               \r
+       and.ne.f0.1 (1) NULLREGW        BitFields:uw    BotFieldFlag:uw\r
+\r
+       // Global offset\r
+       mov (1) MSGSRC.2:ud             CntrlDataOffsetY:ud\r
+               \r
+       (f0.1) add (1) MSGSRC.2:ud              MSGSRC.2:ud             64:w    // +64 to the next MB control data (bot MB)\r
+#endif\r
+\r
+    send (8) CNTRL_DATA_D(0)<1>        MSGHDRY MSGSRC<8;8,1>:ud        DAPREAD RESP_LEN(8)+ILDBRMSGDSC+BI_CNTRL_DATA   // Receive 8 GRFs\r
+\r
+// End of load_ILDB_Cntrl_Data_16DW.asm\r
+\r
+\r
+\r
+// AVC ILDB control data message header format\r
+\r
+//DWord        Bit     Description\r
+//M0.7 31:0    Debug \r
+//M0.6 31:0    Debug\r
+//M0.5 31:8    Ignored\r
+//             7:0             Dispatch ID. // This ID is assigned by the fixed function unit and is a unique identifier for the thread.  It is used to free up resources used by the thread upon thread completion.\r
+//M0.4 31:0    Ignored\r
+//M0.3 31:0    Ignored\r
+//M0.2 31:0    Global Offset. Specifies the global byte offset into the buffer.\r
+                               //      This offset must be OWord aligned (bits 3:0 MBZ) Format = U32 Range = [0,FFFFFFF0h]\r
+//M0.1 31:0    Ignored\r
+//M0.0 31:0    Ignored\r
+\r
+\r
+\r
diff --git a/i965_drv_video/shaders/h264/ildb/Load_ILDB_Cntrl_Data_22DW.asm b/i965_drv_video/shaders/h264/ildb/Load_ILDB_Cntrl_Data_22DW.asm
new file mode 100644 (file)
index 0000000..19e9a20
--- /dev/null
@@ -0,0 +1,36 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Module name: load_ILDB_Cntrl_Data_22DW.asm\r
+//\r
+// ********** Apple only module **********\r
+//\r
+// This module loads AVC ILDB 22DW control data for one MB for CLN.\r
+// The reduced control data set is for progressive picture ONLY.\r
+//\r
+// Control data memory layout for each MB is 8x11 = 88 bytes.  \r
+// It ocuppies 3 GRFs after reading in.\r
+//\r
+//----------------------------------------------------------------\r
+//  Symbols need to be defined before including this module\r
+//\r
+//     Source region in :ud\r
+//     CNTRL_DATA_D:   CNTRL_DATA_D Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud                       // 3 GRFs\r
+//\r
+//     Binding table index: \r
+//     BI_CNTRL_DATA:  Binding table index of control data surface\r
+//\r
+//----------------------------------------------------------------\r
+\r
+    mul (1)    MSGSRC.0<1>:ud  ORIX:uw                 8:uw            { NoDDClr }                             // Block origin X\r
+    mul (1)    MSGSRC.1<1>:ud  ORIY:uw                 11:uw           { NoDDClr, NoDDChk }    // Block origin Y\r
+    mov (1)    MSGSRC.2<1>:ud  0x000A0007:ud                           { NoDDChk }                             // Block width and height (8x11=88 bytes)\r
+\r
+    send (8) CNTRL_DATA_D(0)<1>        MSGHDRY MSGSRC<8;8,1>:ud        DAPREAD RESP_LEN(3)+DWBRMSGDSC_SC+BI_CNTRL_DATA // Receive 3 GRFs\r
+       \r
+// End of load_ILDB_Cntrl_Data_22DW.asm\r
diff --git a/i965_drv_video/shaders/h264/ildb/Load_ILDB_Cntrl_Data_64DW.asm b/i965_drv_video/shaders/h264/ildb/Load_ILDB_Cntrl_Data_64DW.asm
new file mode 100644 (file)
index 0000000..b026afb
--- /dev/null
@@ -0,0 +1,42 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Module name: load_ILDB_Cntrl_Data_64DW.asm\r
+//\r
+// This module loads AVC ILDB 64DW control data for one MB for CLN.  \r
+//\r
+//----------------------------------------------------------------\r
+//  Symbols need to be defined before including this module\r
+//\r
+//     Source region in :ud\r
+//     CNTRL_DATA_D:   CNTRL_DATA_D Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud                       // 8 GRFs\r
+//\r
+//     Binding table index: \r
+//     BI_CNTRL_DATA:  Binding table index of control data surface\r
+//\r
+//----------------------------------------------------------------\r
+\r
+// On CLN, MB control data in memory occupy 64 DWs.\r
+\r
+#if defined(_MBAFF) \r
+       // We need to get control data offset for the bottom MB in mbaff mode.\r
+       // That is, set f0.1=1 if MbaffFlag==1 && BotFieldFlag==1\r
+       and (1) acc0.0:uw               BitFields:uw    MbaffFlag+BotFieldFlag:uw       // Mute all other bits\r
+       cmp.e.f0.1 (1) NULLREGW         acc0.0:uw       MbaffFlag+BotFieldFlag:uw       // Check mbaff and bot flags\r
+#endif         // CTemp1_W\r
+\r
+    mov (2)    MSGSRC.0<1>:ud  ORIX_CUR<2;2,1>:uw                      { NoDDClr }                             // Block origin X,Y\r
+    mov (1)    MSGSRC.2<1>:ud  0x000F000F:ud                           { NoDDChk }                             // Block width and height (16x16=256 bytes)\r
+\r
+#if defined(_MBAFF) \r
+       (f0.1) add (1) MSGSRC.1:ud      MSGSRC.1:ud             16:w    // +16 to the bottom MB control data (bot MB)\r
+#endif\r
+\r
+    send (8) CNTRL_DATA_D(0)<1>        MSGHDRY MSGSRC<8;8,1>:ud        DAPREAD RESP_LEN(8)+DWBRMSGDSC_SC+BI_CNTRL_DATA // Receive 8 GRFs\r
+       \r
+// End of load_ILDB_Cntrl_Data_64DW.asm\r
diff --git a/i965_drv_video/shaders/h264/ildb/Makefile.am b/i965_drv_video/shaders/h264/ildb/Makefile.am
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/i965_drv_video/shaders/h264/ildb/Root_Undefs.inc b/i965_drv_video/shaders/h264/ildb/Root_Undefs.inc
new file mode 100644 (file)
index 0000000..c0ee016
--- /dev/null
@@ -0,0 +1,27 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Modual name: Root_Undefs.inc\r
+//\r
+// Undefine global symbols for new process in root thread\r
+//\r
+\r
+#undef                 READ_BI\r
+#undef                 WRITE_BI\r
+\r
+#undef         ILDB_H_INDEPENDENT\r
+#undef         ILDB_H_INDEPENDENT_CONT\r
+#undef         ILDB_H_DEPENDENT\r
+#undef         ILDB_H_DEPENDENT_SCAN\r
+#undef         ILDB_H_NO_DEPENDENT\r
+\r
+#undef         ILDB_V_INDEPENDENT\r
+#undef         ILDB_V_INDEPENDENT_CONT\r
+#undef         ILDB_V_DEPENDENT\r
+#undef         ILDB_V_DEPENDENT_SCAN\r
+#undef         ILDB_V_NO_DEPENDENT\r
diff --git a/i965_drv_video/shaders/h264/ildb/SetupVPKernel.asm b/i965_drv_video/shaders/h264/ildb/SetupVPKernel.asm
new file mode 100644 (file)
index 0000000..c7d9634
--- /dev/null
@@ -0,0 +1,24 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Modual name: SetupVPKernel.asm\r
+//\r
+// Initial setup for running video-processing kernels\r
+//\r
+\r
+#include "ILDB_header.inc"\r
+\r
+//\r
+//  Now, begin source code....\r
+//\r
+\r
+.code\r
+\r
+    mov (8)    MSGSRC.0<1>:ud  r0.0<8;8,1>:ud  // Initialize message payload header with R0\r
+\r
+// End of SetupVPKernel\r
diff --git a/i965_drv_video/shaders/h264/ildb/TransposeNV12_16x16.asm b/i965_drv_video/shaders/h264/ildb/TransposeNV12_16x16.asm
new file mode 100644 (file)
index 0000000..192a89f
--- /dev/null
@@ -0,0 +1,135 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+//////////////////////////////////////////////////////////////////////////////////////////\r
+//     Module name: TransposeNV12_16x16.asm\r
+//     \r
+//     Transpose a 16x16 NV12 MB.  The output is also in NV12\r
+//\r
+//----------------------------------------------------------------------------------------\r
+//  Symbols need to be defined before including this module\r
+//\r
+//     Source region is :ub\r
+//     SRC_YB:                 SRC_YB Base=rxx ElementSize=1 SrcRegion=REGION(16,1) Type=ub    // 8 GRFs\r
+//     SRC_UW:                 SRC_UW Base=rxx ElementSize=2 SrcRegion=REGION(8,1) Type=uw     // 4 GRFs\r
+//\r
+//  Temp buffer:\r
+//     BUF_B:                  BUF_B Base=rxx ElementSize=1 SrcRegion=REGION(16,1) Type=ub             // 8 GRFs\r
+//     BUF_W:                  BUF_W Base=rxx ElementSize=2 SrcRegion=REGION(8,1) Type=uw              // 4 GRFs\r
+//\r
+//////////////////////////////////////////////////////////////////////////////////////////\r
+\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0xDDDA:w\r
+#endif\r
+\r
+\r
+// Transpose Y (16x16 bytes)\r
+\r
+// The first step\r
+mov (16)       BUF_B(0,0)<1>           SRC_YB(0,0)<16;4,1>\r
+mov (16)       BUF_B(0,16)<1>          SRC_YB(2,0)<16;4,1>\r
+mov (16)       BUF_B(1,0)<1>           SRC_YB(4,0)<16;4,1>\r
+mov (16)       BUF_B(1,16)<1>          SRC_YB(6,0)<16;4,1>\r
+\r
+mov (16)       BUF_B(2,0)<1>           SRC_YB(0,4)<16;4,1>\r
+mov (16)       BUF_B(2,16)<1>          SRC_YB(2,4)<16;4,1>\r
+mov (16)       BUF_B(3,0)<1>           SRC_YB(4,4)<16;4,1>\r
+mov (16)       BUF_B(3,16)<1>          SRC_YB(6,4)<16;4,1>\r
+\r
+mov (16)       BUF_B(4,0)<1>           SRC_YB(0,8)<16;4,1>\r
+mov (16)       BUF_B(4,16)<1>          SRC_YB(2,8)<16;4,1>\r
+mov (16)       BUF_B(5,0)<1>           SRC_YB(4,8)<16;4,1>\r
+mov (16)       BUF_B(5,16)<1>          SRC_YB(6,8)<16;4,1>\r
+\r
+mov (16)       BUF_B(6,0)<1>           SRC_YB(0,12)<16;4,1>\r
+mov (16)       BUF_B(6,16)<1>          SRC_YB(2,12)<16;4,1>\r
+mov (16)       BUF_B(7,0)<1>           SRC_YB(4,12)<16;4,1>\r
+mov (16)       BUF_B(7,16)<1>          SRC_YB(6,12)<16;4,1>\r
+\r
+// The second step\r
+mov (16)       SRC_YB(0,0)<1>          BUF_B(0,0)<32;8,4>\r
+mov (16)       SRC_YB(0,16)<1>         BUF_B(0,1)<32;8,4>\r
+mov (16)       SRC_YB(1,0)<1>          BUF_B(0,2)<32;8,4>\r
+mov (16)       SRC_YB(1,16)<1>         BUF_B(0,3)<32;8,4>\r
+\r
+mov (16)       SRC_YB(2,0)<1>          BUF_B(2,0)<32;8,4>\r
+mov (16)       SRC_YB(2,16)<1>         BUF_B(2,1)<32;8,4>\r
+mov (16)       SRC_YB(3,0)<1>          BUF_B(2,2)<32;8,4>\r
+mov (16)       SRC_YB(3,16)<1>         BUF_B(2,3)<32;8,4>\r
+\r
+mov (16)       SRC_YB(4,0)<1>          BUF_B(4,0)<32;8,4>\r
+mov (16)       SRC_YB(4,16)<1>         BUF_B(4,1)<32;8,4>\r
+mov (16)       SRC_YB(5,0)<1>          BUF_B(4,2)<32;8,4>\r
+mov (16)       SRC_YB(5,16)<1>         BUF_B(4,3)<32;8,4>\r
+\r
+mov (16)       SRC_YB(6,0)<1>          BUF_B(6,0)<32;8,4>\r
+mov (16)       SRC_YB(6,16)<1>         BUF_B(6,1)<32;8,4>\r
+mov (16)       SRC_YB(7,0)<1>          BUF_B(6,2)<32;8,4>\r
+mov (16)       SRC_YB(7,16)<1>         BUF_B(6,3)<32;8,4>\r
+\r
+// Y is transposed.\r
+\r
+\r
+//////////////////////////////////////////////////////////////////////////////////////////////////////////\r
+\r
+// Src U and V are mixed in NV12 format. U on even bytes, V on odd bytes.\r
+// Transpose by treating UV pair as a word.\r
+\r
+\r
+// Src U 8x8 and V 8x8 are mixed. (each pix is specified as yx)\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |17 17 16 16 15 15 14 14 13 13 12 12 11 11 10 10 07 07 06 06 05 05 04 04 03 03 02 02 01 01 00 00|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |37 37 36 36 35 35 34 34 33 33 32 32 31 31 30 30 27 27 26 26 25 25 24 24 23 23 22 22 21 21 20 20|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |57 57 56 56 55 55 54 54 53 53 52 52 51 51 50 50 47 47 46 46 45 45 44 44 43 43 42 42 41 41 40 40|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |77 77 76 76 75 75 74 74 73 73 72 72 71 71 70 70 67 67 66 66 65 65 64 64 63 63 62 62 61 61 60 60|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+\r
+//  First step                 (16)    <1>:w <==== <8;4,1>:w\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |33 33 32 32 31 31 30 30 23 23 22 22 21 21 20 20 13 13 12 12 11 11 10 10 03 03 02 02 01 01 00 00|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |73 73 72 72 71 71 70 70 63 63 62 62 61 61 60 60 53 53 52 52 51 51 50 50 43 43 42 42 41 41 40 40|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |37 37 36 36 35 35 34 34 27 27 26 26 25 25 24 24 17 17 16 16 15 15 14 14 07 07 06 06 05 05 04 04|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |77 77 76 76 75 75 74 74 67 67 66 66 65 65 64 64 57 57 56 56 55 55 54 54 47 47 46 46 45 45 44 44|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//\r
+// Transpose UV (8x8 words), The first step\r
+mov (16)       BUF_W(0,0)<1>           SRC_UW(0,0)<8;4,1>\r
+mov (16)       BUF_W(1,0)<1>           SRC_UW(2,0)<8;4,1>\r
+mov (16)       BUF_W(2,0)<1>           SRC_UW(0,4)<8;4,1>\r
+mov (16)       BUF_W(3,0)<1>           SRC_UW(2,4)<8;4,1>\r
+\r
+\r
+//     Second step             (16)    <1>:w <=== <16;4,4>:w\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |71 71 61 61 51 51 41 41 31 31 21 21 11 11 01 01 70 70 60 60 50 50 40 40 30 30 20 20 10 10 00 00|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |73 73 63 63 53 53 43 43 33 33 23 23 13 13 03 03 72 72 62 62 52 52 42 42 32 32 22 22 12 12 02 02|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |75 75 65 65 55 55 45 45 35 35 25 25 15 15 05 05 74 74 64 64 54 54 44 44 34 34 24 24 14 14 04 04|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |77 77 67 67 57 57 47 47 37 37 27 27 17 17 07 07 76 76 66 66 56 56 46 46 36 36 26 26 16 16 06 06|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//\r
+// Transpose UV (8x8 words), The second step\r
+mov (8)                SRC_UW(0,0)<1>          BUF_W(0,0)<16;4,4>\r
+mov (8)                SRC_UW(0,8)<1>          BUF_W(0,1)<16;4,4>\r
+mov (8)                SRC_UW(1,0)<1>          BUF_W(0,2)<16;4,4>\r
+mov (8)                SRC_UW(1,8)<1>          BUF_W(0,3)<16;4,4>\r
+mov (8)                SRC_UW(2,0)<1>          BUF_W(2,0)<16;4,4>\r
+mov (8)                SRC_UW(2,8)<1>          BUF_W(2,1)<16;4,4>\r
+mov (8)                SRC_UW(3,0)<1>          BUF_W(2,2)<16;4,4>\r
+mov (8)                SRC_UW(3,8)<1>          BUF_W(2,3)<16;4,4>\r
+\r
+// U and V are now transposed and separated.\r
diff --git a/i965_drv_video/shaders/h264/ildb/TransposeNV12_4x16.asm b/i965_drv_video/shaders/h264/ildb/TransposeNV12_4x16.asm
new file mode 100644 (file)
index 0000000..cb1dcbc
--- /dev/null
@@ -0,0 +1,94 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+//////////////////////////////////////////////////////////////////////////////////////////\r
+//     Module name: TransposeNV12_4x16.asm\r
+//     \r
+//     Transpose a 4x16 internal planar to 16x4 internal planar block\r
+//\r
+//----------------------------------------------------------------------------------------\r
+//  Symbols need to be defined before including this module\r
+//\r
+//     Source region is :ub\r
+//     SRC_YB:                 SRC_YB Base=rxx ElementSize=1 SrcRegion=REGION(16,1) Type=ub    // 8 GRFs\r
+//     SRC_UW:                 SRC_UB Base=rxx ElementSize=2 SrcRegion=REGION(8,1) Type=uw             // 4 GRFs\r
+//\r
+//  Temp buffer:\r
+//     BUF_B:                  BUF_B Base=rxx ElementSize=1 SrcRegion=REGION(16,1) Type=ub             // 8 GRFs\r
+//     BUF_W:                  BUF_W Base=rxx ElementSize=2 SrcRegion=REGION(8,1) Type=uw              // 4 GRFs\r
+//\r
+//////////////////////////////////////////////////////////////////////////////////////////\r
+\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0xDDDB:w\r
+#endif\r
+\r
+// Transpose Y (4x16) right most 4 columns\r
+\r
+// The first step\r
+mov (16)       BUF_B(0,0)<1>           SRC_YB(0,0)<16;4,1>             // Read 2 rows, write 1 row\r
+mov (16)       BUF_B(0,16)<1>          SRC_YB(2,0)<16;4,1>\r
+mov (16)       BUF_B(1,0)<1>           SRC_YB(4,0)<16;4,1>\r
+mov (16)       BUF_B(1,16)<1>          SRC_YB(6,0)<16;4,1>\r
+\r
+// The second step\r
+mov (16)       BUF_B(2,0)<1>           BUF_B(0,0)<32;8,4>              // Read 2 rows, write 1 row\r
+mov (16)       BUF_B(2,16)<1>          BUF_B(0,1)<32;8,4>\r
+mov (16)       BUF_B(3,0)<1>           BUF_B(0,2)<32;8,4>\r
+mov (16)       BUF_B(3,16)<1>          BUF_B(0,3)<32;8,4>\r
+\r
+// Y is now transposed. the result is in BUF_B(2) and BUF_B(3).\r
+\r
+\r
+\r
+// Transpose UV (4x8),  right most 2 columns in word\r
+// Use BUF_W(0) as temp buf\r
+\r
+// Src U 8x8 and V 8x8 are mixed. (each pix is specified as yx)\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |17 17 16 16 15 15 14 14 13 13 12 12 11 11 10 10 07 07 06 06 05 05 04 04 03 03 02 02 01 01 00 00|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |37 37 36 36 35 35 34 34 33 33 32 32 31 31 30 30 27 27 26 26 25 25 24 24 23 23 22 22 21 21 20 20|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |57 57 56 56 55 55 54 54 53 53 52 52 51 51 50 50 47 47 46 46 45 45 44 44 43 43 42 42 41 41 40 40|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |77 77 76 76 75 75 74 74 73 73 72 72 71 71 70 70 67 67 66 66 65 65 64 64 63 63 62 62 61 61 60 60|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+\r
+//  First step                 (8)     <1>:w <==== <8;2,1>:w\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |71 71 70 70 61 61 60 60 51 51 50 50 41 41 40 40 31 31 30 30 21 21 20 20 11 11 10 10 01 01 00 00|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+mov (8)                BUF_W(0,0)<1>           SRC_UW(0,0)<8;2,1>\r
+mov (8)                BUF_W(0,8)<1>           SRC_UW(2,0)<8;2,1>\r
+\r
+//     Second step             (16) <1>:w <==== <1;8,2>:w\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |71 71 61 61 51 51 41 41 31 31 21 21 11 11 01 01 70 70 60 60 50 50 40 40 30 30 20 20 10 10 00 00|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+mov (16)       BUF_W(1,0)<1>           BUF_W(0,0)<1;8,2>\r
+\r
+// UV are now transposed.  the result is in BUF_W(1).\r
+\r
+\r
+\r
+//The first step\r
+//mov (16)     BUF_B(0,0)<1>           SRC_UW(0,0)<8;2,1>              // Read 2 rows, write 1 row\r
+// The second step\r
+//mov (8)              SRC_UB(4,0)<1>          BUF_B(0,0)<16;8,2>              // Read 1 row, write 1 row\r
+//mov (8)              SRC_UB(4,8)<1>          BUF_B(0,1)<16;8,2>              // Read 1 row, write 1 row\r
+\r
+// Transpose V (8x8),  right most 2 columns\r
+// The first step\r
+//mov (16)     BUF_B(0,0)<1>           SRC_VB(0,1)<8;2,1>              // Read 2 rows, write 1 row\r
+// The second step\r
+//mov (8)              SRC_UB(4,16)<1>         BUF_B(0,0)<16;8,2>              // Read 1 row, write 1 row\r
+//mov (8)              SRC_UB(4,24)<1>         BUF_B(0,1)<16;8,2>              // Read 1 row, write 1 row\r
+\r
+// U and V are now transposed.  the result is in BUF_B(4).\r
+\r
diff --git a/i965_drv_video/shaders/h264/ildb/Transpose_Cur_UV_2x8.asm b/i965_drv_video/shaders/h264/ildb/Transpose_Cur_UV_2x8.asm
new file mode 100644 (file)
index 0000000..967e587
--- /dev/null
@@ -0,0 +1,56 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+//////////////////////////////////////////////////////////////////////////////////////////\r
+//     Module name: Transpose_UV_2x8.asm\r
+//     \r
+//     Transpose UV 2x8 to 8x2 block (2x8U + 2x8V in NV12)\r
+//\r
+//----------------------------------------------------------------------------------------\r
+//  Symbols need to be defined before including this module\r
+//\r
+//     Source region is :ub\r
+//     SRC_UW:                 SRC_UB Base=rxx ElementSize=2 SrcRegion=REGION(8,1) Type=uw             // 4 GRFs\r
+//\r
+//  Temp buffer:\r
+//     BUF_W:                  BUF_W Base=rxx ElementSize=2 SrcRegion=REGION(8,1) Type=uw              // 4 GRFs\r
+//\r
+//////////////////////////////////////////////////////////////////////////////////////////\r
+\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0xDDDB:w\r
+#endif\r
+\r
+// Transpose UV (4x8),  right most 2 columns in word\r
+// Use BUF_W(0) as temp buf\r
+\r
+// Src U 8x8 and V 8x8 are mixed. (each pix is specified as yx)\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |17 17 16 16 15 15 14 14 13 13 12 12 11 11 10 10 07 07 06 06 05 05 04 04 03 03 02 02 01 01 00 00|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |37 37 36 36 35 35 34 34 33 33 32 32 31 31 30 30 27 27 26 26 25 25 24 24 23 23 22 22 21 21 20 20|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |57 57 56 56 55 55 54 54 53 53 52 52 51 51 50 50 47 47 46 46 45 45 44 44 43 43 42 42 41 41 40 40|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |77 77 76 76 75 75 74 74 73 73 72 72 71 71 70 70 67 67 66 66 65 65 64 64 63 63 62 62 61 61 60 60|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+\r
+//  First step                 (8)     <1>:w <==== <8;2,1>:w\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |77 77 76 76 67 67 66 66 57 57 56 56 47 47 46 46 37 37 36 36 27 27 26 26 17 17 16 16 07 07 06 06|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+mov (8)                LEFT_TEMP_W(0,0)<1>             SRC_UW(0,6)<8;2,1>              { NoDDClr }\r
+mov (8)                LEFT_TEMP_W(0,8)<1>             SRC_UW(2,6)<8;2,1>              { NoDDChk }\r
+\r
+//     Second step             (16) <1>:w <==== <1;8,2>:w\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |77 77 67 67 57 57 47 47 37 37 27 27 17 17 07 07 76 76 66 66 56 56 46 46 36 36 26 26 16 16 06 06|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+mov (16)       LEFT_TEMP_W(1,0)<1>             LEFT_TEMP_W(0,0)<1;8,2>\r
+\r
+// UV are now transposed.  the result is in BUF_W(1)\r
diff --git a/i965_drv_video/shaders/h264/ildb/Transpose_Cur_UV_8x8.asm b/i965_drv_video/shaders/h264/ildb/Transpose_Cur_UV_8x8.asm
new file mode 100644 (file)
index 0000000..dbb7e65
--- /dev/null
@@ -0,0 +1,85 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+//////////////////////////////////////////////////////////////////////////////////////////\r
+//     Module name: Transpose_UV_8x8.asm\r
+//     \r
+//     Transpose a 8x8 UV block. (8x8U + 8x8V)  The output is also in NV12\r
+//\r
+//----------------------------------------------------------------------------------------\r
+//  Symbols need to be defined before including this module\r
+//\r
+//     Source region is :ub\r
+//     SRC_UW:                 SRC_UW Base=rxx ElementSize=2 SrcRegion=REGION(8,1) Type=uw     // 4 GRFs\r
+//\r
+//  Temp buffer:\r
+//     BUF_W:                  BUF_W Base=rxx ElementSize=2 SrcRegion=REGION(8,1) Type=uw              // 4 GRFs\r
+//\r
+//////////////////////////////////////////////////////////////////////////////////////////\r
+\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0xDDDA:w\r
+#endif\r
+\r
+//////////////////////////////////////////////////////////////////////////////////////////////////////////\r
+\r
+// Src U and V are mixed in NV12 format. U on even bytes, V on odd bytes.\r
+// Transpose by treating UV pair as a word.\r
+\r
+\r
+// Src U 8x8 and V 8x8 are mixed. (each pix is specified as yx)\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |17 17 16 16 15 15 14 14 13 13 12 12 11 11 10 10 07 07 06 06 05 05 04 04 03 03 02 02 01 01 00 00|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |37 37 36 36 35 35 34 34 33 33 32 32 31 31 30 30 27 27 26 26 25 25 24 24 23 23 22 22 21 21 20 20|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |57 57 56 56 55 55 54 54 53 53 52 52 51 51 50 50 47 47 46 46 45 45 44 44 43 43 42 42 41 41 40 40|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |77 77 76 76 75 75 74 74 73 73 72 72 71 71 70 70 67 67 66 66 65 65 64 64 63 63 62 62 61 61 60 60|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+\r
+//  First step                 (16)    <1>:w <==== <8;4,1>:w\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |33 33 32 32 31 31 30 30 23 23 22 22 21 21 20 20 13 13 12 12 11 11 10 10 03 03 02 02 01 01 00 00|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |73 73 72 72 71 71 70 70 63 63 62 62 61 61 60 60 53 53 52 52 51 51 50 50 43 43 42 42 41 41 40 40|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |37 37 36 36 35 35 34 34 27 27 26 26 25 25 24 24 17 17 16 16 15 15 14 14 07 07 06 06 05 05 04 04|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |77 77 76 76 75 75 74 74 67 67 66 66 65 65 64 64 57 57 56 56 55 55 54 54 47 47 46 46 45 45 44 44|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//\r
+// Transpose UV (8x8 words), The first step\r
+mov (16)       CUR_TEMP_W(0,0)<1>              SRC_UW(0,0)<8;4,1>\r
+mov (16)       CUR_TEMP_W(1,0)<1>              SRC_UW(2,0)<8;4,1>\r
+mov (16)       CUR_TEMP_W(2,0)<1>              SRC_UW(0,4)<8;4,1>\r
+mov (16)       CUR_TEMP_W(3,0)<1>              SRC_UW(2,4)<8;4,1>\r
+\r
+\r
+//     Second step             (16)    <1>:w <=== <16;4,4>:w\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |71 71 61 61 51 51 41 41 31 31 21 21 11 11 01 01 70 70 60 60 50 50 40 40 30 30 20 20 10 10 00 00|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |73 73 63 63 53 53 43 43 33 33 23 23 13 13 03 03 72 72 62 62 52 52 42 42 32 32 22 22 12 12 02 02|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |75 75 65 65 55 55 45 45 35 35 25 25 15 15 05 05 74 74 64 64 54 54 44 44 34 34 24 24 14 14 04 04|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |77 77 67 67 57 57 47 47 37 37 27 27 17 17 07 07 76 76 66 66 56 56 46 46 36 36 26 26 16 16 06 06|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//\r
+// Transpose UV (8x8 words), The second step\r
+mov (8)                SRC_UW(0,0)<1>          CUR_TEMP_W(0,0)<16;4,4>         { NoDDClr }\r
+mov (8)                SRC_UW(0,8)<1>          CUR_TEMP_W(0,1)<16;4,4>         { NoDDChk }\r
+mov (8)                SRC_UW(1,0)<1>          CUR_TEMP_W(0,2)<16;4,4>         { NoDDClr }\r
+mov (8)                SRC_UW(1,8)<1>          CUR_TEMP_W(0,3)<16;4,4>         { NoDDChk }\r
+mov (8)                SRC_UW(2,0)<1>          CUR_TEMP_W(2,0)<16;4,4>         { NoDDClr }\r
+mov (8)                SRC_UW(2,8)<1>          CUR_TEMP_W(2,1)<16;4,4>         { NoDDChk }\r
+mov (8)                SRC_UW(3,0)<1>          CUR_TEMP_W(2,2)<16;4,4>         { NoDDClr }\r
+mov (8)                SRC_UW(3,8)<1>          CUR_TEMP_W(2,3)<16;4,4>         { NoDDChk }\r
+\r
+// U and V are now transposed and separated.\r
diff --git a/i965_drv_video/shaders/h264/ildb/Transpose_Cur_UV_Right_Most_2x8.asm b/i965_drv_video/shaders/h264/ildb/Transpose_Cur_UV_Right_Most_2x8.asm
new file mode 100644 (file)
index 0000000..be7feba
--- /dev/null
@@ -0,0 +1,25 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+//     Transpose Cur MB Right Most 2x8 to 8x2\r
+//  Assume source is LEFT_TEMP_W(0), and detination is LEFT_TEMP_W(1)\r
+\r
+//     Input from dport for transpose: \r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |71 71 70 70 61 61 60 60 51 51 50 50 41 41 40 40 31 31 30 30 21 21 20 20 11 11 10 10 01 01 00 00|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//\r
+//     Output of transpose:    <1>     <=== <16;8,2>:w\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |71 71 61 61 51 51 41 41 31 31 21 21 11 11 01 01 70 70 60 60 50 50 40 40 30 30 20 20 10 10 00 00|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+\r
+//     mov (8) LEFT_TEMP_W(1,0)<1>             LEFT_TEMP_W(0,0)<16;8,2>                { NoDDClr }\r
+//     mov (8) LEFT_TEMP_W(1,8)<1>             LEFT_TEMP_W(0,1)<16;8,2>                { NoDDChk }\r
+\r
+       mov (16)        LEFT_TEMP_W(1,0)<1>             LEFT_TEMP_W(0,0)<1;8,2>\r
diff --git a/i965_drv_video/shaders/h264/ildb/Transpose_Cur_Y_16x16.asm b/i965_drv_video/shaders/h264/ildb/Transpose_Cur_Y_16x16.asm
new file mode 100644 (file)
index 0000000..8c20f74
--- /dev/null
@@ -0,0 +1,74 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+//////////////////////////////////////////////////////////////////////////////////////////\r
+//     Module name: Transpose_Y_16x16.asm\r
+//     \r
+//     Transpose Y 16x16 block.\r
+//\r
+//----------------------------------------------------------------------------------------\r
+//  Symbols need to be defined before including this module\r
+//\r
+//     Source region is :ub\r
+//     SRC_YB:                 SRC_YB Base=rxx ElementSize=1 SrcRegion=REGION(16,1) Type=ub    // 8 GRFs\r
+//\r
+//  Temp buffer:\r
+//     CUR_TEMP_B:             BUF_B Base=rxx ElementSize=1 SrcRegion=REGION(16,1) Type=ub             // 8 GRFs\r
+//\r
+//////////////////////////////////////////////////////////////////////////////////////////\r
+\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0xDDDA:w\r
+#endif\r
+\r
+\r
+// Transpose Y (16x16 bytes)\r
+\r
+// The first step\r
+mov (16)       CUR_TEMP_B(0,0)<1>              SRC_YB(0,0)<16;4,1>             { NoDDClr } \r
+mov (16)       CUR_TEMP_B(0,16)<1>             SRC_YB(2,0)<16;4,1>             { NoDDChk }\r
+mov (16)       CUR_TEMP_B(1,0)<1>              SRC_YB(4,0)<16;4,1>             { NoDDClr }\r
+mov (16)       CUR_TEMP_B(1,16)<1>             SRC_YB(6,0)<16;4,1>             { NoDDChk }\r
+\r
+mov (16)       CUR_TEMP_B(2,0)<1>              SRC_YB(0,4)<16;4,1>             { NoDDClr }\r
+mov (16)       CUR_TEMP_B(2,16)<1>             SRC_YB(2,4)<16;4,1>             { NoDDChk }\r
+mov (16)       CUR_TEMP_B(3,0)<1>              SRC_YB(4,4)<16;4,1>             { NoDDClr }\r
+mov (16)       CUR_TEMP_B(3,16)<1>             SRC_YB(6,4)<16;4,1>             { NoDDChk }\r
+\r
+mov (16)       CUR_TEMP_B(4,0)<1>              SRC_YB(0,8)<16;4,1>             { NoDDClr }\r
+mov (16)       CUR_TEMP_B(4,16)<1>             SRC_YB(2,8)<16;4,1>             { NoDDChk }\r
+mov (16)       CUR_TEMP_B(5,0)<1>              SRC_YB(4,8)<16;4,1>             { NoDDClr }\r
+mov (16)       CUR_TEMP_B(5,16)<1>             SRC_YB(6,8)<16;4,1>             { NoDDChk }\r
+\r
+mov (16)       CUR_TEMP_B(6,0)<1>              SRC_YB(0,12)<16;4,1>    { NoDDClr }\r
+mov (16)       CUR_TEMP_B(6,16)<1>             SRC_YB(2,12)<16;4,1>    { NoDDChk }\r
+mov (16)       CUR_TEMP_B(7,0)<1>              SRC_YB(4,12)<16;4,1>    { NoDDClr }\r
+mov (16)       CUR_TEMP_B(7,16)<1>             SRC_YB(6,12)<16;4,1>    { NoDDChk }\r
+\r
+// The second step\r
+mov (16)       SRC_YB(0,0)<1>          CUR_TEMP_B(0,0)<32;8,4>         { NoDDClr }\r
+mov (16)       SRC_YB(0,16)<1>         CUR_TEMP_B(0,1)<32;8,4>         { NoDDChk }\r
+mov (16)       SRC_YB(1,0)<1>          CUR_TEMP_B(0,2)<32;8,4>         { NoDDClr }\r
+mov (16)       SRC_YB(1,16)<1>         CUR_TEMP_B(0,3)<32;8,4>         { NoDDChk }\r
+\r
+mov (16)       SRC_YB(2,0)<1>          CUR_TEMP_B(2,0)<32;8,4>         { NoDDClr }\r
+mov (16)       SRC_YB(2,16)<1>         CUR_TEMP_B(2,1)<32;8,4>         { NoDDChk }\r
+mov (16)       SRC_YB(3,0)<1>          CUR_TEMP_B(2,2)<32;8,4>         { NoDDClr }\r
+mov (16)       SRC_YB(3,16)<1>         CUR_TEMP_B(2,3)<32;8,4>         { NoDDChk }\r
+\r
+mov (16)       SRC_YB(4,0)<1>          CUR_TEMP_B(4,0)<32;8,4>         { NoDDClr }\r
+mov (16)       SRC_YB(4,16)<1>         CUR_TEMP_B(4,1)<32;8,4>         { NoDDChk }\r
+mov (16)       SRC_YB(5,0)<1>          CUR_TEMP_B(4,2)<32;8,4>         { NoDDClr }\r
+mov (16)       SRC_YB(5,16)<1>         CUR_TEMP_B(4,3)<32;8,4>         { NoDDChk }\r
+\r
+mov (16)       SRC_YB(6,0)<1>          CUR_TEMP_B(6,0)<32;8,4>         { NoDDClr }\r
+mov (16)       SRC_YB(6,16)<1>         CUR_TEMP_B(6,1)<32;8,4>         { NoDDChk }\r
+mov (16)       SRC_YB(7,0)<1>          CUR_TEMP_B(6,2)<32;8,4>         { NoDDClr }\r
+mov (16)       SRC_YB(7,16)<1>         CUR_TEMP_B(6,3)<32;8,4>         { NoDDChk }\r
+\r
+// Y is transposed.\r
diff --git a/i965_drv_video/shaders/h264/ildb/Transpose_Cur_Y_4x16.asm b/i965_drv_video/shaders/h264/ildb/Transpose_Cur_Y_4x16.asm
new file mode 100644 (file)
index 0000000..70c0b1c
--- /dev/null
@@ -0,0 +1,75 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+//////////////////////////////////////////////////////////////////////////////////////////\r
+//     Module name: Transpose_Y_4x16.asm\r
+//     \r
+//     Transpose a 4x16 internal planar to 16x4 internal planar block.\r
+//     The src block is 16x16.  Right moft 4 columns are transposed.\r
+//\r
+//----------------------------------------------------------------------------------------\r
+//  Symbols need to be defined before including this module\r
+//\r
+//     Source region is :ub\r
+//     SRC_YB:                 SRC_YB Base=rxx ElementSize=1 SrcRegion=REGION(16,1) Type=ub    // 8 GRFs\r
+//\r
+//  Temp buffer:\r
+//     BUF_B:                  BUF_B Base=rxx ElementSize=1 SrcRegion=REGION(16,1) Type=ub             // 8 GRFs\r
+//\r
+//////////////////////////////////////////////////////////////////////////////////////////\r
+\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0xDDDB:w\r
+#endif\r
+\r
+// Transpose Y (4x16) right most 4 columns\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |1f 1e 1d 1c 1b 1a 19 18 17 16 15 14 13 12 11 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |3f 3e 3d 3c 3b 3a 39 38 37 36 35 34 33 32 31 30 2f 2e 2d 2c 2b 2a 29 28 27 26 25 24 23 22 21 20|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |5f 5e 5d 5c 5b 5a 59 58 57 56 55 54 53 52 51 50 4f 4e 4d 4c 4b 4a 49 48 47 46 45 44 43 42 41 40|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |7f 7e 7d 7c 7b 7a 79 78 77 76 75 74 73 72 71 70 6f 6e 6d 6c 6b 6a 69 68 67 66 65 64 63 62 61 60|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |9f 9e 9d 9c 9b 9a 99 98 97 96 95 94 93 92 91 90 8f 8e 8d 8c 8b 8a 89 88 87 86 85 84 83 82 81 80|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |bf be bd bc bb ba b9 b8 b7 b6 b5 b4 b3 b2 b1 b0 af ae ad ac ab aa a9 a8 a7 a6 a5 a4 a3 a2 a1 a0|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |df de dd dc db da d9 d8 d7 d6 d5 d4 d3 d2 d1 d0 cf ce cd cc cb ca c9 c8 c7 c6 c5 c4 c3 c2 c1 c0|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |ff fe fd fc fb fa f9 f8 f7 f6 f5 f4 f3 f2 f1 f0 ef ee ed ec eb ea e9 e8 e7 e6 e5 e4 e3 e2 e1 e0|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+\r
+// The first step\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |7f 7e 7d 7c 6f 6e 6d 6c 5f 5e 5d 5c 4f 4e 4d 4c 3f 3e 3d 3c 2f 2e 2d 2c 1f 1e 1d 1c 0f 0e 0d 0c|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |ff fe fd fc ef ee ed ec df de dd dc cf ce cd cc bf be bd bc af ae ad ac 9f 9e 9d 9c 8f 8e 8d 8c|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+\r
+// The second step\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |fd ed dd cd bd ad 9d 8d 7d 6d 5d 4d 3d 2d 1d 0d fc ec dc cc bc ac 9c 8c 7c 6c 5c 4c 3c 2c 1c 0c|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |ff ef df cf bf af 9f 8f 7f 6f 5f 4f 3f 2f 1f 0f fe ee de ce be ae 9e 8e 7e 6e 5e 4e 3e 2e 1e 0e|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+\r
+\r
+mov (16)       LEFT_TEMP_B(0,0)<1>             SRC_YB(0,12)<16;4,1>            { NoDDClr }     \r
+mov (16)       LEFT_TEMP_B(0,16)<1>    SRC_YB(2,12)<16;4,1>            { NoDDChk }\r
+mov (16)       LEFT_TEMP_B(1,0)<1>             SRC_YB(4,12)<16;4,1>            { NoDDClr }\r
+mov (16)       LEFT_TEMP_B(1,16)<1>    SRC_YB(6,12)<16;4,1>            { NoDDChk }\r
+\r
+// The second step\r
+mov (16)       LEFT_TEMP_B(2,0)<1>             LEFT_TEMP_B(0,0)<32;8,4>                { NoDDClr }     \r
+mov (16)       LEFT_TEMP_B(2,16)<1>    LEFT_TEMP_B(0,1)<32;8,4>                { NoDDChk }\r
+mov (16)       LEFT_TEMP_B(3,0)<1>             LEFT_TEMP_B(0,2)<32;8,4>                { NoDDClr }\r
+mov (16)       LEFT_TEMP_B(3,16)<1>    LEFT_TEMP_B(0,3)<32;8,4>                { NoDDChk }\r
+\r
+// Y is now transposed. the result is in LEFT_TEMP_B(2) and LEFT_TEMP_B(3).\r
diff --git a/i965_drv_video/shaders/h264/ildb/Transpose_Cur_Y_Right_Most_4x16.asm b/i965_drv_video/shaders/h264/ildb/Transpose_Cur_Y_Right_Most_4x16.asm
new file mode 100644 (file)
index 0000000..c458f85
--- /dev/null
@@ -0,0 +1,31 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+//     Transpose cur Y right most 4x16 to 16x4\r
+//  Assume source is LEFT_TEMP_B(0), and detination is LEFT_TEMP_B(2)\r
+\r
+\r
+//     Input received from dport:\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0 b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+\r
+//     Output of transpose:            <1>     <= <32;8,4>\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01 f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03 f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+\r
+       // Transpose the data, also occupy 2 GRFs\r
+       mov (16)        LEFT_TEMP_B(2)<1>                       LEFT_TEMP_B(0, 0)<32;8,4>               { NoDDClr }\r
+       mov (16)        LEFT_TEMP_B(2, 16)<1>           LEFT_TEMP_B(0, 1)<32;8,4>               { NoDDChk }\r
+       mov (16)        LEFT_TEMP_B(3)<1>                       LEFT_TEMP_B(0, 2)<32;8,4>               { NoDDClr }\r
+       mov (16)        LEFT_TEMP_B(3, 16)<1>           LEFT_TEMP_B(0, 3)<32;8,4>               { NoDDChk }\r
diff --git a/i965_drv_video/shaders/h264/ildb/Transpose_Left_UV_2x8.asm b/i965_drv_video/shaders/h264/ildb/Transpose_Left_UV_2x8.asm
new file mode 100644 (file)
index 0000000..678456e
--- /dev/null
@@ -0,0 +1,28 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+//     Transpose left MB 2x8 to 8x2\r
+//  Assume source is LEFT_TEMP_W, and detination is PREV_MB_UW\r
+\r
+//     Input from dport for transpose: \r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |71 71 70 70 61 61 60 60 51 51 50 50 41 41 40 40 31 31 30 30 21 21 20 20 11 11 10 10 01 01 00 00|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//\r
+//     Output of transpose:    <1>     <=== <16;8,2>:w\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |71 71 61 61 51 51 41 41 31 31 21 21 11 11 01 01 70 70 60 60 50 50 40 40 30 30 20 20 10 10 00 00|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+\r
+//     mov (8) PREV_MB_UW(0,0)<1>              BUF_W(0,0)<16;8,2>              { NoDDClr }\r
+//     mov (8) PREV_MB_UW(0,8)<1>              BUF_W(0,1)<16;8,2>              { NoDDChk }\r
+       \r
+//     mov (8) PREV_MB_UW(0,0)<1>              LEFT_TEMP_W(0,0)<16;8,2>                { NoDDClr }\r
+//     mov (8) PREV_MB_UW(0,8)<1>              LEFT_TEMP_W(0,1)<16;8,2>                { NoDDChk }\r
+\r
+       mov (16)        PREV_MB_UW(0,0)<1>              LEFT_TEMP_W(0,0)<1;8,2>\r
diff --git a/i965_drv_video/shaders/h264/ildb/Transpose_Left_Y_4x16.asm b/i965_drv_video/shaders/h264/ildb/Transpose_Left_Y_4x16.asm
new file mode 100644 (file)
index 0000000..435996c
--- /dev/null
@@ -0,0 +1,31 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+//     Transpose left MB 4x16 to 16x4\r
+//  Assume source is LEFT_TEMP_B, and detination is PREV_MB_YB\r
+\r
+\r
+//     Input received from dport:\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0 b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+\r
+//     Output of transpose:            <1>     <= <32;8,4>\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01 f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03 f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+\r
+       // Transpose the data, also occupy 2 GRFs\r
+       mov (16)        PREV_MB_YB(0)<1>                        LEFT_TEMP_B(0, 0)<32;8,4>               { NoDDClr }\r
+       mov (16)        PREV_MB_YB(0, 16)<1>            LEFT_TEMP_B(0, 1)<32;8,4>               { NoDDChk }\r
+       mov (16)        PREV_MB_YB(1)<1>                        LEFT_TEMP_B(0, 2)<32;8,4>               { NoDDClr }\r
+       mov (16)        PREV_MB_YB(1, 16)<1>            LEFT_TEMP_B(0, 3)<32;8,4>               { NoDDChk }\r
diff --git a/i965_drv_video/shaders/h264/ildb/loadNV12_16x16T.asm b/i965_drv_video/shaders/h264/ildb/loadNV12_16x16T.asm
new file mode 100644 (file)
index 0000000..d5aa552
--- /dev/null
@@ -0,0 +1,53 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Module name: loadNV12_16x16T.asm\r
+//\r
+// Load and transpose NV12 16x16 block \r
+//\r
+//----------------------------------------------------------------\r
+//  Symbols need to be defined before including this module\r
+//\r
+//     Source region in :ud\r
+//     SRC_YD:                 SRC_YD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud                     // 8 GRFs\r
+//     SRC_UD:                 SRC_UD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud   (U+V for NV12)    // 4 GRFs\r
+//\r
+//     Source region is :ub.  The same region as :ud region\r
+//     SRC_YB:                 SRC_YB Base=rxx ElementSize=1 SrcRegion=REGION(16,1) Type=ub            // 8 GRFs\r
+//     SRC_UB:                 SRC_UB Base=rxx ElementSize=1 SrcRegion=REGION(16,1) Type=ub            // 2 GRFs\r
+//     SRC_VB:                 SRC_VB Base=rxx ElementSize=1 SrcRegion=REGION(16,1) Type=ub            // 2 GRFs\r
+//\r
+//     Binding table index: \r
+//     BI_SRC_Y:               Binding table index of Y surface\r
+//     BI_SRC_UV:              Binding table index of UV surface (NV12)\r
+//\r
+//     Temp buffer:\r
+//     BUF_B:                  BUF_B Base=rxx ElementSize=1 SrcRegion=REGION(16,1) Type=ub\r
+//\r
+//----------------------------------------------------------------\r
+\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0xDDD1:w\r
+#endif\r
+\r
+       // Read Y\r
+    mov (2)    MSGSRC.0<1>:ud  ORIX_CUR<2;2,1>:w               // Block origin\r
+    mov (1)    MSGSRC.2<1>:ud  0x000F000F:ud           // Block width and height (16x16)\r
+    send (8) SRC_YD(0)<1>      MSGHDRY MSGSRC<8;8,1>:ud        DAPREAD RESP_LEN(8)+DWBRMSGDSC_RC+BI_SRC_Y      // Read 8 GRFs\r
+\r
+       // Read U+V\r
+    asr (1)    MSGSRC.1:ud             MSGSRC.1:ud                     1:w                                             // NV12 U+V block origin y = half of Y comp\r
+    mov (1)    MSGSRC.2<1>:ud  0x0007000F:ud           // NV12 U+V block width and height (16x8)\r
+    send (8) SRC_UD(0)<1>      MSGHDRU MSGSRC<8;8,1>:ud        DAPREAD RESP_LEN(4)+DWBRMSGDSC_RC+BI_SRC_UV     // Read 4 GRFs\r
+\r
+       #include "TransposeNV12_16x16.asm"\r
+\r
+//     #include "Transpose_Y_16x16.asm"        \r
+//     #include "Transpose_NV12_UV_16x8.asm"   \r
+               \r
+// End of loadNV12_16x16T\r
diff --git a/i965_drv_video/shaders/h264/ildb/loadNV12_16x4.asm b/i965_drv_video/shaders/h264/ildb/loadNV12_16x4.asm
new file mode 100644 (file)
index 0000000..a2e7dfd
--- /dev/null
@@ -0,0 +1,54 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Module Name: Loadnv12_16X4.Asm\r
+//\r
+// Load Nv12 16X4 Block \r
+//\r
+//----------------------------------------------------------------\r
+//  Symbols Need To Be Defined Before Including This Module\r
+//\r
+//     Source Region In :Ud\r
+//     Src_Yd:                 Src_Yd Base=Rxx Elementsize=4 Srcregion=Region(8,1) Type=Ud                     // 3 Grfs (2 For Y, 1 For U+V)\r
+//\r
+//     Source Region Is :Ub.  The Same Region As :Ud Region\r
+//     Src_Yb:                 Src_Yb Base=Rxx Elementsize=1 Srcregion=Region(16,1) Type=Ub            // 2 Grfs\r
+//     Src_Ub:                 Src_Ub Base=Rxx Elementsize=1 Srcregion=Region(16,1) Type=Ub            // 0.5 Grf\r
+//     Src_Vb:                 Src_Vb Base=Rxx Elementsize=1 Srcregion=Region(16,1) Type=Ub            // 0.5 Grf\r
+//\r
+//     Binding Table Index: \r
+//     Bi_Src_Y:               Binding Table Index Of Y Surface\r
+//     Bi_Src_UV:              Binding Table Index Of UV Surface (Nv12)\r
+//\r
+//     Temp Buffer:\r
+//     Buf_D:                  Buf_D Base=Rxx Elementsize=4 Srcregion=Region(8,1) Type=Ud\r
+//     Buf_B:                  Buf_B Base=Rxx Elementsize=1 Srcregion=Region(16,1) Type=Ub\r
+//\r
+//----------------------------------------------------------------\r
+\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0xDDD2:w\r
+#endif\r
+\r
+       // Read Y\r
+    mov (2)    MSGSRC.0<1>:ud  ORIX<2;2,1>:w           // Block origin\r
+    mov (1)    MSGSRC.2<1>:ud  0x0003000F:ud           // Block width and height (16x4)\r
+    send (8) PREV_MB_YD(0)<1>  MSGHDRY MSGSRC<8;8,1>:ud        DAPREAD RESP_LEN(2)+DWBRMSGDSC_RC+BI_SRC_Y      // Read 2 GRFs\r
+\r
+       // Read U+V\r
+    asr (1)    MSGSRC.1:ud             MSGSRC.1:ud                     1:w                                             // NV12 U+V block origin y = half of Y comp\r
+    mov (1)    MSGSRC.2<1>:ud  0x0001000F:ud           // NV12 U+V block width and height (16x2)\r
+\r
+       // Load NV12 U+V tp a temp buf  \r
+       send (8) BUF_D(0)<1>    MSGHDRU MSGSRC<8;8,1>:ud        DAPREAD RESP_LEN(1)+DWBRMSGDSC_RC+BI_SRC_UV     // Read 1 GRF\r
+\r
+       // Convert NV12 U+V to internal planar U and V and place them right after Y.\r
+//     mov (16)        SRC_UB(0,0)<1>          BUF_B(0,0)<32;16,2>\r
+//     mov (16)        SRC_VB(0,0)<1>          BUF_B(0,1)<32;16,2>     \r
+       \r
+// End of loadNV12_16x4.asm\r
diff --git a/i965_drv_video/shaders/h264/ildb/load_Cur_UV_8x8T.asm b/i965_drv_video/shaders/h264/ildb/load_Cur_UV_8x8T.asm
new file mode 100644 (file)
index 0000000..25cb96c
--- /dev/null
@@ -0,0 +1,65 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Module name: load_Cur_UV_8x8T.asm\r
+//\r
+// Load and transpose UV 8x8 block (NV12: 8x8U and 8x8V mixed)\r
+//\r
+//----------------------------------------------------------------\r
+//  Symbols need to be defined before including this module\r
+//\r
+//     Source region in :ud\r
+//     SRC_UD:                 SRC_UD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud   (U+V for NV12)    // 4 GRFs\r
+//\r
+//     Binding table index: \r
+//     BI_SRC_UV:              Binding table index of UV surface (NV12)\r
+//\r
+//----------------------------------------------------------------\r
+\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0xDDD1:w\r
+#endif\r
+\r
+       // Read U+V blk\r
+#if defined(_PROGRESSIVE) \r
+    mov (1)    MSGSRC.0:ud             ORIX_CUR:w                              { NoDDClr }             // Block origin\r
+    asr (1)    MSGSRC.1:ud             ORIY_CUR:w              1:w             { NoDDClr, NoDDChk }    // NV12 U+V block origin y = half of Y comp\r
+    mov (1)    MSGSRC.2:ud             0x0007000F:ud                   { NoDDChk }                     // NV12 U+V block width and height (16x8 bytes)\r
+\r
+    //send (8) SRC_UD(0)<1>    MSGHDRU         MSGSRC<8;8,1>:ud        DWBRMSGDSC_SC+0x00040000+BI_SRC_UV\r
+    mov (1)    MSGDSC  RESP_LEN(4)+DWBRMSGDSC_SC+BI_SRC_UV:ud  \r
+#endif\r
+\r
+#if defined(_FIELD)\r
+//    cmp.z.f0.0 (1)  NULLREGW         PicTypeC:w      0:w                                             // Get pic type flag\r
+    and.nz.f0.1 (1) NULLREGW   BitFields:w     BotFieldFlag:w                  // Get bottom field flag\r
+       // they are used later in this file\r
+\r
+    mov (1)    MSGSRC.0:ud             ORIX_CUR:w                              { NoDDClr }             // Block origin\r
+    asr (1)    MSGSRC.1:ud             ORIY_CUR:w              1:w             { NoDDClr, NoDDChk }    // NV12 U+V block origin y = half of Y comp\r
+    mov (1)    MSGSRC.2:ud             0x0007000F:ud                   { NoDDChk }                     // NV12 U+V block width and height (16x8 bytes)\r
+\r
+    // Set message descriptor\r
+\r
+    // Frame picture\r
+//    (f0.0) mov (1)   MSGDSC  RESP_LEN(4)+DWBRMSGDSC_SC+BI_SRC_UV:ud                  // Read 4 GRFs from SRC_UV\r
+//     (f0.0) jmpi             load_UV_8x8T\r
+\r
+       // Field picture\r
+    (f0.1) mov (1)     MSGDSC  RESP_LEN(4)+DWBRMSGDSC_SC_BF+BI_SRC_UV:ud  // Read 4 GRFs from SRC_UV bottom field\r
+    (-f0.1) mov (1)    MSGDSC  RESP_LEN(4)+DWBRMSGDSC_SC_TF+BI_SRC_UV:ud  // Read 4 GRFs from SRC_UV top field\r
+\r
+//load_UV_8x8T:\r
+\r
+#endif\r
+\r
+    send (8) SRC_UD(0)<1>      MSGHDRU         MSGSRC<8;8,1>:ud        DAPREAD MSGDSC\r
+\r
+//     #include "Transpose_Cur_UV_8x8.asm"\r
+\r
+// End of load_UV_8x8T\r
diff --git a/i965_drv_video/shaders/h264/ildb/load_Cur_UV_8x8T_Mbaff.asm b/i965_drv_video/shaders/h264/ildb/load_Cur_UV_8x8T_Mbaff.asm
new file mode 100644 (file)
index 0000000..82b7d9e
--- /dev/null
@@ -0,0 +1,62 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Module name: load_Cur_UV_8x8T.asm\r
+//\r
+// Load and transpose UV 8x8 block (NV12: 8x8U and 8x8V mixed)\r
+//\r
+//----------------------------------------------------------------\r
+//  Symbols need to be defined before including this module\r
+//\r
+//     Source region in :ud\r
+//     SRC_UD:                 SRC_UD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud   (U+V for NV12)    // 4 GRFs\r
+//\r
+//     Binding table index: \r
+//     BI_SRC_UV:              Binding table index of UV surface (NV12)\r
+//\r
+//----------------------------------------------------------------\r
+\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0xDDD1:w\r
+#endif\r
+    // FieldModeCurrentMbFlag determines how to access left MB\r
+       and.z.f0.0 (1)  null:w          r[ECM_AddrReg, BitFlags]:ub             FieldModeCurrentMbFlag:w                \r
+\r
+    and.nz.f0.1 (1)    NULLREGW        BitFields:w     BotFieldFlag:w                                  // Get bottom field flag\r
+\r
+       // Read U+V\r
+    mov (1)    MSGSRC.0:ud             ORIX_CUR:w                                              { NoDDClr }             // Block origin\r
+    asr (1)    MSGSRC.1:ud             ORIY_CUR:w                      1:w                     { NoDDClr, NoDDChk }    // NV12 U+V block origin y = half of Y comp\r
+    mov (1)    MSGSRC.2:ud             0x0007000F:ud                                   { NoDDChk }                     // NV12 U+V block width and height (16x8 bytes)\r
+\r
+    // Set message descriptor\r
+\r
+       (f0.0)  if      (1)             ILDB_LABEL(ELSE_UV_8X8T)\r
+\r
+    // Frame picture\r
+    mov (1)    MSGDSC  RESP_LEN(4)+DWBRMSGDSC_SC+BI_SRC_UV:ud                  // Read 4 GRFs from SRC_UV\r
+\r
+       (f0.1) add (1)  MSGSRC.1:d      MSGSRC.1:d              8:w             // Add vertical offset 8 for bot MB in MBAFF mode\r
+    \r
+ILDB_LABEL(ELSE_UV_8X8T): \r
+       else    (1)             ILDB_LABEL(ENDIF_UV_8X8T)\r
+\r
+       // Field picture\r
+    (f0.1) mov (1)     MSGDSC  RESP_LEN(4)+DWBRMSGDSC_SC_BF+BI_SRC_UV:ud  // Read 4 GRFs from SRC_UV bottom field\r
+    (-f0.1) mov (1)    MSGDSC  RESP_LEN(4)+DWBRMSGDSC_SC_TF+BI_SRC_UV:ud  // Read 4 GRFs from SRC_UV top field\r
+\r
+       asr (1) MSGSRC.1:d              MSGSRC.1:d              1:w                                     // Reduce y by half in field access mode\r
+\r
+       endif\r
+ILDB_LABEL(ENDIF_UV_8X8T):\r
+\r
+    send (8) SRC_UD(0)<1>      MSGHDRU MSGSRC<8;8,1>:ud        DAPREAD MSGDSC\r
+\r
+//     #include "Transpose_Cur_UV_8x8.asm"\r
+\r
+// End of load_UV_8x8T\r
diff --git a/i965_drv_video/shaders/h264/ildb/load_Cur_UV_Right_Most_2x8.asm b/i965_drv_video/shaders/h264/ildb/load_Cur_UV_Right_Most_2x8.asm
new file mode 100644 (file)
index 0000000..426a518
--- /dev/null
@@ -0,0 +1,61 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Module Name: Load_Cur_UV_Right_Most_2X8.Asm\r
+\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0xDDD0:w\r
+#endif\r
+\r
+#if defined(_PROGRESSIVE) \r
+       // Read U+V, (UV MB size = 16x8)\r
+    add (1)    MSGSRC.0:ud             ORIX_CUR:w                      12:w                    { NoDDClr }             // Block origin\r
+    asr (1)    MSGSRC.1:ud             ORIY_CUR:w                      1:w                             { NoDDClr, NoDDChk }            // NV12 U+V block origin y = half of Y comp\r
+    mov (1)    MSGSRC.2:ud             0x00070003:ud                                           { NoDDChk }             // NV12 U+V block width and height (4x8)\r
+       send (8) LEFT_TEMP_D(0)<1>      MSGHDRU MSGSRC<8;8,1>:ud        DAPREAD RESP_LEN(1)+DWBRMSGDSC_RC+BI_DEST_UV    \r
+#endif\r
+\r
+#if defined(_FIELD) || defined(_MBAFF)\r
+\r
+    // FieldModeCurrentMbFlag determines how to access left MB\r
+       and.z.f0.0 (1)  null:w          r[ECM_AddrReg, BitFlags]:ub             FieldModeCurrentMbFlag:w                \r
+\r
+    and.nz.f0.1 (1)    NULLREGW                BitFields:w     BotFieldFlag:w                          // Get bottom field flag\r
+\r
+       // Read U+V\r
+    add (1)    MSGSRC.0:ud             ORIX_CUR:w                      12:w                            { NoDDClr }             // Block origin\r
+    asr (1)    MSGSRC.1:ud             ORIY_CUR:w                      1:w                             { NoDDClr, NoDDChk }            // NV12 U+V block origin y = half of Y comp\r
+    mov (1)    MSGSRC.2:ud             0x00070003:ud                                           { NoDDChk }             // NV12 U+V block width and height (4x8)\r
+\r
+       // Load NV12 U+V \r
+       \r
+    // Set message descriptor\r
+\r
+       (f0.0)  if      (1)             ILDB_LABEL(ELSE_Y_2x8T)\r
+\r
+    // Frame picture\r
+    mov (1)    MSGDSC  RESP_LEN(1)+DWBRMSGDSC_RC+BI_DEST_UV:ud                 // Read 1 GRF from SRC_UV\r
+\r
+       (f0.1) add (1)  MSGSRC.1:d              MSGSRC.1:d              8:w             // Add vertical offset 8 for bot MB in MBAFF mode\r
+\r
+ILDB_LABEL(ELSE_Y_2x8T): \r
+       else    (1)             ILDB_LABEL(ENDIF_Y_2x8T)\r
+\r
+       // Field picture\r
+    (f0.1) mov (1)     MSGDSC  RESP_LEN(1)+DWBRMSGDSC_RC_BF+BI_DEST_UV:ud  // Read 1 GRF from SRC_Y bottom field\r
+    (-f0.1) mov (1)    MSGDSC  RESP_LEN(1)+DWBRMSGDSC_RC_TF+BI_DEST_UV:ud  // Read 1 GRF from SRC_Y top field\r
+\r
+       endif\r
+ILDB_LABEL(ENDIF_Y_2x8T):\r
+\r
+       // Read 1 GRF from DEST surface as the above MB has been deblocked.\r
+//     send (8) BUF_D(0)<1>    MSGHDRU MSGSRC<8;8,1>:ud        MSGDSC  \r
+       send (8) LEFT_TEMP_D(0)<1>      MSGHDRU MSGSRC<8;8,1>:ud        DAPREAD MSGDSC  \r
+\r
+#endif\r
+\r
diff --git a/i965_drv_video/shaders/h264/ildb/load_Cur_Y_16x16T.asm b/i965_drv_video/shaders/h264/ildb/load_Cur_Y_16x16T.asm
new file mode 100644 (file)
index 0000000..d70b101
--- /dev/null
@@ -0,0 +1,63 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Module name: load_Y_16x16T.asm\r
+//\r
+// Load and transpose Y 16x16 block \r
+//\r
+//----------------------------------------------------------------\r
+//  Symbols need to be defined before including this module\r
+//\r
+//     Source region in :ud\r
+//     SRC_YD:                 SRC_YD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud                     // 8 GRFs\r
+//\r
+//     Binding table index: \r
+//     BI_SRC_Y:               Binding table index of Y surface\r
+//\r
+//----------------------------------------------------------------\r
+\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0xDDD1:w\r
+#endif\r
+       // Read Y\r
+       \r
+#if defined(_PROGRESSIVE) \r
+    mov (2)    MSGSRC.0<1>:ud  ORIX_CUR<2;2,1>:w               { NoDDClr }                     // Block origin\r
+       mov (1) MSGSRC.2<1>:ud  0x000F000F:ud                   { NoDDChk }                     // Block width and height (16x16)\r
+\r
+    //send (8) SRC_YD(0)<1>    MSGHDRC         MSGSRC<8;8,1>:ud        DWBRMSGDSC_SMPLR+0x00080000+BI_SRC_Y\r
+       mov (1) MSGDSC  RESP_LEN(8)+DWBRMSGDSC_SC+BI_SRC_Y:ud           \r
+#endif\r
+\r
+    \r
+#if defined(_FIELD)\r
+//    cmp.z.f0.0 (1)  NULLREGW         PicTypeC:w      0:w                                             // Get pic type flag\r
+    and.nz.f0.1 (1) NULLREGW   BitFields:w     BotFieldFlag:w                  // Get bottom field flag\r
+       // they are used later in this file\r
+\r
+    mov (2)    MSGSRC.0<1>:ud  ORIX_CUR<2;2,1>:w               { NoDDClr }                     // Block origin\r
+    mov (1)    MSGSRC.2<1>:ud  0x000F000F:ud                   { NoDDChk }                     // Block width and height (16x16)\r
+    \r
+    // Set message descriptor\r
+    // Frame picture\r
+//     (f0.0) mov (1)  MSGDSC  RESP_LEN(8)+DWBRMSGDSC_SC+BI_SRC_Y:ud                   // Read 8 GRFs from SRC_Y\r
+//     (f0.0) jmpi             load_Y_16x16T\r
+\r
+       // Non frame picture\r
+    (f0.1) mov (1)     MSGDSC  RESP_LEN(8)+DWBRMSGDSC_SC_BF+BI_SRC_Y:ud  // Read 8 GRFs from SRC_Y bottom field\r
+    (-f0.1) mov (1)    MSGDSC  RESP_LEN(8)+DWBRMSGDSC_SC_TF+BI_SRC_Y:ud  // Read 8 GRFs from SRC_Y top field\r
+\r
+//load_Y_16x16T:\r
+\r
+#endif\r
+\r
+    send (8) SRC_YD(0)<1>      MSGHDRC         MSGSRC<8;8,1>:ud        DAPREAD MSGDSC\r
+       \r
+//     #include "Transpose_Cur_Y_16x16.asm"\r
+\r
+// End of load_Y_16x16T\r
diff --git a/i965_drv_video/shaders/h264/ildb/load_Cur_Y_16x16T_Mbaff.asm b/i965_drv_video/shaders/h264/ildb/load_Cur_Y_16x16T_Mbaff.asm
new file mode 100644 (file)
index 0000000..f9c4745
--- /dev/null
@@ -0,0 +1,62 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Module name: load_Y_16x16T.asm\r
+//\r
+// Load and transpose Y 16x16 block \r
+//\r
+//----------------------------------------------------------------\r
+//  Symbols need to be defined before including this module\r
+//\r
+//     Source region in :ud\r
+//     SRC_YD:                 SRC_YD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud                     // 8 GRFs\r
+//\r
+//     Binding table index: \r
+//     BI_SRC_Y:               Binding table index of Y surface\r
+//\r
+//----------------------------------------------------------------\r
+\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0xDDD1:w\r
+#endif\r
+\r
+    // FieldModeCurrentMbFlag determines how to access left MB\r
+       and.z.f0.0 (1)  null:w          r[ECM_AddrReg, BitFlags]:ub             FieldModeCurrentMbFlag:w                \r
+       \r
+    and.nz.f0.1 (1)    NULLREGW        BitFields:w     BotFieldFlag:w          // Get bottom field flag\r
+\r
+       // Read Y\r
+    mov (2)    MSGSRC.0<1>:d   ORIX_CUR<2;2,1>:w               { NoDDClr }             // Block origin\r
+    mov (1)    MSGSRC.2<1>:ud  0x000F000F:ud                   { NoDDChk }             // Block width and height (16x16)\r
+    \r
+    // Set message descriptor, etc.\r
+    \r
+       (f0.0)  if      (1)             ILDB_LABEL(ELSE_Y_16x16T)\r
+\r
+    // Frame picture\r
+    mov (1)    MSGDSC  RESP_LEN(8)+DWBRMSGDSC_SC+BI_SRC_Y:ud                   // Read 8 GRFs from SRC_Y\r
+    \r
+       (f0.1) add (1)  MSGSRC.1:d      MSGSRC.1:d              16:w            // Add vertical offset 16 for bot MB in MBAFF mode\r
+    \r
+ILDB_LABEL(ELSE_Y_16x16T): \r
+       else    (1)             ILDB_LABEL(ENDIF_Y_16x16T)\r
+\r
+       // Field picture\r
+    (f0.1) mov (1)     MSGDSC  RESP_LEN(8)+DWBRMSGDSC_SC_BF+BI_SRC_Y:ud  // Read 8 GRFs from SRC_Y bottom field\r
+    (-f0.1) mov (1)    MSGDSC  RESP_LEN(8)+DWBRMSGDSC_SC_TF+BI_SRC_Y:ud  // Read 8 GRFs from SRC_Y top field\r
+\r
+       asr (1) MSGSRC.1:d              MSGSRC.1:d              1:w                                     // Reduce y by half in field access mode\r
+\r
+       endif\r
+ILDB_LABEL(ENDIF_Y_16x16T):\r
+\r
+    send (8) SRC_YD(0)<1>      MSGHDRY MSGSRC<8;8,1>:ud        DAPREAD MSGDSC\r
+\r
+//     #include "Transpose_Cur_Y_16x16.asm"\r
+\r
+// End of load_Y_16x16T\r
diff --git a/i965_drv_video/shaders/h264/ildb/load_Cur_Y_Right_Most_4x16.asm b/i965_drv_video/shaders/h264/ildb/load_Cur_Y_Right_Most_4x16.asm
new file mode 100644 (file)
index 0000000..cd25ace
--- /dev/null
@@ -0,0 +1,85 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Module name: load_Cur_Y_Right_Most_4x16.asm\r
+//\r
+// Load luma cur MB right most 4x16 into LEFT_TEMP_B\r
+\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0xDDD0:w\r
+#endif\r
+\r
+\r
+#if defined(_PROGRESSIVE) \r
+       // Read Y \r
+    add (1)    MSGSRC.0<1>:ud  ORIX_CUR:w              12:w    { NoDDClr }                             // Block origin, move right 12 bytes\r
+    mov (1)    MSGSRC.1<1>:ud  ORIY_CUR:w                              { NoDDClr, NoDDChk }    // Block origin\r
+    mov (1)    MSGSRC.2<1>:ud  0x000F0003:ud                   { NoDDChk }                             // Block width and height (4x16)\r
+    send (8) LEFT_TEMP_D(0)<1> MSGHDRL         MSGSRC<8;8,1>:ud        DAPREAD RESP_LEN(2)+DWBRMSGDSC_RC+BI_DEST_Y     \r
+#endif\r
+\r
+\r
+#if defined(_FIELD) || defined(_MBAFF)\r
+\r
+    // FieldModeCurrentMbFlag determines how to access left MB\r
+       and.z.f0.0 (1)  null:w          r[ECM_AddrReg, BitFlags]:ub             FieldModeCurrentMbFlag:w                \r
+\r
+    and.nz.f0.1 (1)    NULLREGW                BitFields:w     BotFieldFlag:w  // Get bottom field flag\r
+\r
+       // Read Y\r
+    add (1)    MSGSRC.0<1>:ud  ORIX_CUR:w              12:w    { NoDDClr }                             // Block origin, move right 12 bytes\r
+    mov (1)    MSGSRC.1<1>:ud  ORIY_CUR:w                              { NoDDClr, NoDDChk }    // Block origin\r
+    mov (1)    MSGSRC.2<1>:ud  0x000F0003:ud                   { NoDDChk }                             // Block width and height (4x16)\r
+    \r
+    // Set message descriptor, etc.\r
+    \r
+       (f0.0)  if      (1)             ILDB_LABEL(ELSE_Y_4x16T)\r
+\r
+    // Frame picture\r
+    mov (1)    MSGDSC  RESP_LEN(2)+DWBRMSGDSC_RC+BI_DEST_Y:ud                  // Read 2 GRFs from DEST_Y\r
+    \r
+       (f0.1) add (1)  MSGSRC.1:d              MSGSRC.1:d              16:w            // Add vertical offset 16 for bot MB in MBAFF mode\r
+    \r
+ILDB_LABEL(ELSE_Y_4x16T): \r
+       else    (1)             ILDB_LABEL(ENDIF_Y_4x16T)\r
+\r
+       // Field picture\r
+    (f0.1) mov (1)     MSGDSC  RESP_LEN(2)+DWBRMSGDSC_RC_BF+BI_DEST_Y:ud  // Read 2 GRFs from DEST_Y bottom field\r
+    (-f0.1) mov (1)    MSGDSC  RESP_LEN(2)+DWBRMSGDSC_RC_TF+BI_DEST_Y:ud  // Read 2 GRFs from DEST_Y top field\r
+\r
+       endif\r
+ILDB_LABEL(ENDIF_Y_4x16T):\r
+\r
+//    send (8) BUF_D(0)<1>     MSGHDRY MSGSRC<8;8,1>:ud        MSGDSC\r
+    send (8) LEFT_TEMP_D(0)<1> MSGHDRL         MSGSRC<8;8,1>:ud        DAPREAD MSGDSC\r
+#endif\r
+\r
+//     Transpose 4x16 to 16x4\r
+\r
+//     Input received from dport:\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0 b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+\r
+//     Output of transpose:            <1>     <= <32;8,4>\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01 f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03 f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+/*\r
+       // Transpose the data, also occupy 2 GRFs\r
+       mov (16)        PREV_MB_YB(0)<1>                        BUF_B(0, 0)<32;8,4>             { NoDDClr }\r
+       mov (16)        PREV_MB_YB(0, 16)<1>            BUF_B(0, 1)<32;8,4>             { NoDDChk }\r
+       mov (16)        PREV_MB_YB(1)<1>                        BUF_B(0, 2)<32;8,4>             { NoDDClr }\r
+       mov (16)        PREV_MB_YB(1, 16)<1>            BUF_B(0, 3)<32;8,4>             { NoDDChk }\r
+*/\r
+// End of load_Y_4x16T\r
+\r
diff --git a/i965_drv_video/shaders/h264/ildb/load_Left_UV_2x8T.asm b/i965_drv_video/shaders/h264/ildb/load_Left_UV_2x8T.asm
new file mode 100644 (file)
index 0000000..a5f622c
--- /dev/null
@@ -0,0 +1,76 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Module Name: Load_Left_UV_2X8T.Asm\r
+//\r
+// Load UV 8X2 Block \r
+//\r
+//----------------------------------------------------------------\r
+//  Symbols ceed To be defined before including this module\r
+//\r
+//     Source Region Is :UB\r
+//     BUF_D:                  BUF_D Base=Rxx Elementsize=4 Srcregion=Region(8,1) Type=UD\r
+\r
+//     Binding Table Index: \r
+//     BI_SRC_UV:              Binding Table Index Of UV Surface (NV12)\r
+//\r
+//----------------------------------------------------------------\r
+\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0xDDD0:w\r
+#endif\r
+\r
+#if defined(_PROGRESSIVE) \r
+       // Read U+V\r
+    mov (1)    MSGSRC.0:ud             ORIX_LEFT:w                                                     { NoDDClr }             // Block origin\r
+    asr (1)    MSGSRC.1:ud             ORIY_LEFT:w                     1:w                             { NoDDClr, NoDDChk }            // NV12 U+V block origin y = half of Y comp\r
+    mov (1)    MSGSRC.2:ud             0x00070003:ud                                           { NoDDChk }             // NV12 U+V block width and height (4x8)\r
+\r
+       send (8) LEFT_TEMP_D(0)<1>      MSGHDRU MSGSRC<8;8,1>:ud        DAPREAD RESP_LEN(1)+DWBRMSGDSC_RC+BI_DEST_UV    \r
+#endif\r
+\r
+#if defined(_FIELD) || defined(_MBAFF)\r
+\r
+    // FieldModeCurrentMbFlag determines how to access left MB\r
+       and.z.f0.0 (1)  null:w          r[ECM_AddrReg, BitFlags]:ub             FieldModeCurrentMbFlag:w                \r
+\r
+    and.nz.f0.1 (1)    NULLREGW                BitFields:w     BotFieldFlag:w                          // Get bottom field flag\r
+\r
+       // Read U+V\r
+    mov (1)    MSGSRC.0:ud             ORIX_LEFT:w                                                     { NoDDClr }             // Block origin\r
+    asr (1)    MSGSRC.1:ud             ORIY_LEFT:w                     1:w                             { NoDDClr, NoDDChk }            // NV12 U+V block origin y = half of Y comp\r
+    mov (1)    MSGSRC.2:ud             0x00070003:ud                                           { NoDDChk }             // NV12 U+V block width and height (4x8)\r
+\r
+       // Load NV12 U+V \r
+       \r
+    // Set message descriptor\r
+\r
+       (f0.0)  if      (1)             ILDB_LABEL(ELSE_Y_2x8T)\r
+\r
+    // Frame picture\r
+    mov (1)    MSGDSC  RESP_LEN(1)+DWBRMSGDSC_RC+BI_DEST_UV:ud                 // Read 1 GRF from SRC_UV\r
+\r
+       (f0.1) add (1)  MSGSRC.1:d              MSGSRC.1:d              8:w             // Add vertical offset 8 for bot MB in MBAFF mode\r
+\r
+ILDB_LABEL(ELSE_Y_2x8T): \r
+       else    (1)             ILDB_LABEL(ENDIF_Y_2x8T)\r
+\r
+       // Field picture\r
+    (f0.1) mov (1)     MSGDSC  RESP_LEN(1)+DWBRMSGDSC_RC_BF+BI_DEST_UV:ud  // Read 1 GRF from SRC_Y bottom field\r
+    (-f0.1) mov (1)    MSGDSC  RESP_LEN(1)+DWBRMSGDSC_RC_TF+BI_DEST_UV:ud  // Read 1 GRF from SRC_Y top field\r
+\r
+       endif\r
+ILDB_LABEL(ENDIF_Y_2x8T):\r
+\r
+       // Read 1 GRF from DEST surface as the above MB has been deblocked.\r
+//     send (8) BUF_D(0)<1>    MSGHDRU MSGSRC<8;8,1>:ud        MSGDSC  \r
+       send (8) LEFT_TEMP_D(0)<1>      MSGHDRU MSGSRC<8;8,1>:ud        DAPREAD MSGDSC  \r
+\r
+#endif\r
+\r
+// End of load_Left_UV_2x8T.asm\r
diff --git a/i965_drv_video/shaders/h264/ildb/load_Left_UV_2x8T_Mbaff.asm b/i965_drv_video/shaders/h264/ildb/load_Left_UV_2x8T_Mbaff.asm
new file mode 100644 (file)
index 0000000..fefda4f
--- /dev/null
@@ -0,0 +1,79 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Module Name: Load_Left_UV_2X8T.Asm\r
+//\r
+// Load UV 8X2 Block \r
+//\r
+//----------------------------------------------------------------\r
+//  Symbols ceed To be defined before including this module\r
+//\r
+//     Source Region Is :UB\r
+//     BUF_D:                  BUF_D Base=Rxx Elementsize=4 Srcregion=Region(8,1) Type=UD\r
+\r
+//     Binding Table Index: \r
+//     BI_SRC_UV:              Binding Table Index Of UV Surface (NV12)\r
+//\r
+//----------------------------------------------------------------\r
+\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0xDDD0:w\r
+#endif\r
+\r
+    // FieldModeCurrentMbFlag determines how to access left MB\r
+       and.z.f0.0 (1)  null:w          r[ECM_AddrReg, BitFlags]:ub             FieldModeCurrentMbFlag:w                \r
+\r
+    and.nz.f0.1 (1)    NULLREGW                BitFields:w     BotFieldFlag:w                          // Get bottom field flag\r
+\r
+       // Read U+V\r
+    mov (1)    MSGSRC.0:ud             ORIX_LEFT:w                                                     { NoDDClr }             // Block origin\r
+    asr (1)    MSGSRC.1:ud             ORIY_LEFT:w                     1:w                             { NoDDClr, NoDDChk }            // NV12 U+V block origin y = half of Y comp\r
+    mov (1)    MSGSRC.2:ud             0x00070003:ud                                           { NoDDChk }             // NV12 U+V block width and height (4x8)\r
+\r
+       // Load NV12 U+V \r
+       \r
+    // Set message descriptor\r
+\r
+       (f0.0)  if      (1)             ILDB_LABEL(ELSE_Y_2x8T)\r
+\r
+    // Frame picture\r
+    mov (1)    MSGDSC  RESP_LEN(1)+DWBRMSGDSC_RC+BI_DEST_UV:ud                 // Read 1 GRF from SRC_UV\r
+\r
+       (f0.1) add (1)  MSGSRC.1:d              MSGSRC.1:d              8:w             // Add vertical offset 8 for bot MB in MBAFF mode\r
+\r
+ILDB_LABEL(ELSE_Y_2x8T): \r
+       else    (1)             ILDB_LABEL(ENDIF_Y_2x8T)\r
+\r
+       // Field picture\r
+    (f0.1) mov (1)     MSGDSC  RESP_LEN(1)+DWBRMSGDSC_RC_BF+BI_DEST_UV:ud  // Read 1 GRF from SRC_Y bottom field\r
+    (-f0.1) mov (1)    MSGDSC  RESP_LEN(1)+DWBRMSGDSC_RC_TF+BI_DEST_UV:ud  // Read 1 GRF from SRC_Y top field\r
+\r
+       asr (1) MSGSRC.1:d              MSGSRC.1:d              1:w                                     // Reduce y by half in field access mode\r
+\r
+       endif\r
+ILDB_LABEL(ENDIF_Y_2x8T):\r
+\r
+       // Read 1 GRF from DEST surface as the above MB has been deblocked.\r
+//     send (8) BUF_D(0)<1>    MSGHDRU MSGSRC<8;8,1>:ud        MSGDSC  \r
+       send (8) LEFT_TEMP_D(0)<1>      MSGHDRU MSGSRC<8;8,1>:ud        DAPREAD MSGDSC  \r
+\r
+\r
+//     Input from dport for transpose: \r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |71 71 70 70 61 61 60 60 51 51 50 50 41 41 40 40 31 31 30 30 21 21 20 20 11 11 10 10 01 01 00 00|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//\r
+//     Output of transpose:    <1>     <=== <16;8,2>:w\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |71 71 61 61 51 51 41 41 31 31 21 21 11 11 01 01 70 70 60 60 50 50 40 40 30 30 20 20 10 10 00 00|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+/*\r
+       mov (8) PREV_MB_UW(0,0)<1>              BUF_W(0,0)<16;8,2>              { NoDDClr }\r
+       mov (8) PREV_MB_UW(0,8)<1>              BUF_W(0,1)<16;8,2>              { NoDDChk }\r
+*/\r
+// End of load_Left_UV_2x8T.asm\r
diff --git a/i965_drv_video/shaders/h264/ildb/load_Left_Y_4x16T.asm b/i965_drv_video/shaders/h264/ildb/load_Left_Y_4x16T.asm
new file mode 100644 (file)
index 0000000..ab454f1
--- /dev/null
@@ -0,0 +1,96 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Module name: load_Y_4x16T.asm\r
+//\r
+// Load luma left MB 4x16 and transpose 4x16 to 16x4.\r
+//\r
+//----------------------------------------------------------------\r
+//  Symbols need to be defined before including this module\r
+//\r
+//     Source region in :ud\r
+//     PREV_MB_YD:             PREV_MB_YD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud                 // 2 GRFs\r
+//\r
+//     Binding table index: \r
+//     BI_SRC_Y:               Binding table index of Y surface\r
+//\r
+//----------------------------------------------------------------\r
+\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0xDDD0:w\r
+#endif\r
+\r
+\r
+#if defined(_PROGRESSIVE) \r
+       // Read Y\r
+    mov (2)    MSGSRC.0<1>:ud  ORIX_LEFT<2;2,1>:w              { NoDDClr }             // Block origin\r
+    mov (1)    MSGSRC.2<1>:ud  0x000F0003:ud                   { NoDDChk }             // Block width and height (4x16)\r
+    \r
+//    mov (1)  MSGDSC  DWBRMSGDSC_RC+0x00020000+BI_DEST_Y:ud                   // Read 2 GRFs from DEST_Y\r
+    send (8) LEFT_TEMP_D(0)<1> MSGHDRL         MSGSRC<8;8,1>:ud        DAPREAD RESP_LEN(2)+DWBRMSGDSC_RC+BI_DEST_Y     \r
+#endif\r
+\r
+\r
+#if defined(_FIELD) || defined(_MBAFF)\r
+\r
+    // FieldModeCurrentMbFlag determines how to access left MB\r
+       and.z.f0.0 (1)  null:w          r[ECM_AddrReg, BitFlags]:ub             FieldModeCurrentMbFlag:w                \r
+\r
+    and.nz.f0.1 (1)    NULLREGW                BitFields:w     BotFieldFlag:w  // Get bottom field flag\r
+\r
+       // Read Y\r
+    mov (2)    MSGSRC.0<1>:ud  ORIX_LEFT<2;2,1>:w              { NoDDClr }             // Block origin\r
+    mov (1)    MSGSRC.2<1>:ud  0x000F0003:ud                   { NoDDChk }             // Block width and height (4x16)\r
+    \r
+    // Set message descriptor, etc.\r
+    \r
+       (f0.0)  if      (1)             ILDB_LABEL(ELSE_Y_4x16T)\r
+\r
+    // Frame picture\r
+    mov (1)    MSGDSC  RESP_LEN(2)+DWBRMSGDSC_RC+BI_DEST_Y:ud                  // Read 2 GRFs from DEST_Y\r
+    \r
+       (f0.1) add (1)  MSGSRC.1:d              MSGSRC.1:d              16:w            // Add vertical offset 16 for bot MB in MBAFF mode\r
+    \r
+ILDB_LABEL(ELSE_Y_4x16T): \r
+       else    (1)             ILDB_LABEL(ENDIF_Y_4x16T)\r
+\r
+       // Field picture\r
+    (f0.1) mov (1)     MSGDSC  RESP_LEN(2)+DWBRMSGDSC_RC_BF+BI_DEST_Y:ud  // Read 2 GRFs from DEST_Y bottom field\r
+    (-f0.1) mov (1)    MSGDSC  RESP_LEN(2)+DWBRMSGDSC_RC_TF+BI_DEST_Y:ud  // Read 2 GRFs from DEST_Y top field\r
+\r
+       endif\r
+ILDB_LABEL(ENDIF_Y_4x16T):\r
+\r
+//    send (8) BUF_D(0)<1>     MSGHDRY MSGSRC<8;8,1>:ud        MSGDSC\r
+    send (8) LEFT_TEMP_D(0)<1> MSGHDRL         MSGSRC<8;8,1>:ud        DAPREAD MSGDSC\r
+#endif\r
+\r
+//     Transpose 4x16 to 16x4\r
+\r
+//     Input received from dport:\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0 b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+\r
+//     Output of transpose:            <1>     <= <32;8,4>\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01 f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03 f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+/*\r
+       // Transpose the data, also occupy 2 GRFs\r
+       mov (16)        PREV_MB_YB(0)<1>                        BUF_B(0, 0)<32;8,4>             { NoDDClr }\r
+       mov (16)        PREV_MB_YB(0, 16)<1>            BUF_B(0, 1)<32;8,4>             { NoDDChk }\r
+       mov (16)        PREV_MB_YB(1)<1>                        BUF_B(0, 2)<32;8,4>             { NoDDClr }\r
+       mov (16)        PREV_MB_YB(1, 16)<1>            BUF_B(0, 3)<32;8,4>             { NoDDChk }\r
+*/\r
+// End of load_Y_4x16T\r
+\r
diff --git a/i965_drv_video/shaders/h264/ildb/load_Left_Y_4x16T_Mbaff.asm b/i965_drv_video/shaders/h264/ildb/load_Left_Y_4x16T_Mbaff.asm
new file mode 100644 (file)
index 0000000..95f73a3
--- /dev/null
@@ -0,0 +1,84 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Module name: load_Y_4x16T.asm\r
+//\r
+// Load luma left MB 4x16 and transpose 4x16 to 16x4.\r
+//\r
+//----------------------------------------------------------------\r
+//  Symbols need to be defined before including this module\r
+//\r
+//     Source region in :ud\r
+//     PREV_MB_YD:             PREV_MB_YD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud                 // 2 GRFs\r
+//\r
+//     Binding table index: \r
+//     BI_SRC_Y:               Binding table index of Y surface\r
+//\r
+//----------------------------------------------------------------\r
+\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0xDDD0:w\r
+#endif\r
+\r
+    // FieldModeCurrentMbFlag determines how to access left MB\r
+       and.z.f0.0 (1)  null:w          r[ECM_AddrReg, BitFlags]:ub             FieldModeCurrentMbFlag:w                \r
+\r
+    and.nz.f0.1 (1)    NULLREGW                BitFields:w     BotFieldFlag:w  // Get bottom field flag\r
+\r
+       // Read Y\r
+    mov (2)    MSGSRC.0<1>:ud  ORIX_LEFT<2;2,1>:w              { NoDDClr }             // Block origin\r
+    mov (1)    MSGSRC.2<1>:ud  0x000F0003:ud                   { NoDDChk }             // Block width and height (4x16)\r
+    \r
+    // Set message descriptor, etc.\r
+    \r
+       (f0.0)  if      (1)             ELSE_Y_4x16T\r
+\r
+    // Frame picture\r
+    mov (1)    MSGDSC  RESP_LEN(2)+DWBRMSGDSC_RC+BI_DEST_Y:ud                  // Read 2 GRFs from DEST_Y\r
+    \r
+       (f0.1) add (1)  MSGSRC.1:d              MSGSRC.1:d              16:w            // Add vertical offset 16 for bot MB in MBAFF mode\r
+    \r
+ELSE_Y_4x16T: \r
+       else    (1)             ENDIF_Y_4x16T\r
+\r
+       // Field picture\r
+    (f0.1) mov (1)     MSGDSC  RESP_LEN(2)+DWBRMSGDSC_RC_BF+BI_DEST_Y:ud  // Read 2 GRFs from DEST_Y bottom field\r
+    (-f0.1) mov (1)    MSGDSC  RESP_LEN(2)+DWBRMSGDSC_RC_TF+BI_DEST_Y:ud  // Read 2 GRFs from DEST_Y top field\r
+\r
+       asr (1) MSGSRC.1:d              MSGSRC.1:d              1:w                                     // Reduce y by half in field access mode\r
+\r
+       endif\r
+ENDIF_Y_4x16T:\r
+\r
+//    send (8) BUF_D(0)<1>     MSGHDRY MSGSRC<8;8,1>:ud        MSGDSC\r
+    send (8) LEFT_TEMP_D(0)<1> MSGHDRL         MSGSRC<8;8,1>:ud        DAPREAD MSGDSC\r
+\r
+\r
+//     Transpose 4x16 to 16x4\r
+\r
+//     Input received from dport:\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0 b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+\r
+//     Output of transpose:            <1>     <= <32;8,4>\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01 f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03 f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+/*\r
+       // Transpose the data, also occupy 2 GRFs\r
+       mov (16)        PREV_MB_YB(0)<1>                        BUF_B(0, 0)<32;8,4>             { NoDDClr }\r
+       mov (16)        PREV_MB_YB(0, 16)<1>            BUF_B(0, 1)<32;8,4>             { NoDDChk }\r
+       mov (16)        PREV_MB_YB(1)<1>                        BUF_B(0, 2)<32;8,4>             { NoDDClr }\r
+       mov (16)        PREV_MB_YB(1, 16)<1>            BUF_B(0, 3)<32;8,4>             { NoDDChk }\r
+*/\r
+// End of load_Y_4x16T\r
diff --git a/i965_drv_video/shaders/h264/ildb/load_Top_UV_8x2.asm b/i965_drv_video/shaders/h264/ildb/load_Top_UV_8x2.asm
new file mode 100644 (file)
index 0000000..844291f
--- /dev/null
@@ -0,0 +1,70 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Module Name: Load_Top_UV_8X2.Asm\r
+//\r
+// Load UV 8X2 Block \r
+//\r
+//----------------------------------------------------------------\r
+//  Symbols ceed To be defined before including this module\r
+//\r
+//     Source Region Is :UB\r
+//     BUF_D:                  BUF_D Base=Rxx Elementsize=4 Srcregion=Region(8,1) Type=UD\r
+\r
+//     Binding Table Index: \r
+//     BI_SRC_UV:              Binding Table Index Of UV Surface (NV12)\r
+//\r
+//----------------------------------------------------------------\r
+\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0xDDD2:w\r
+#endif\r
+\r
+#if defined(_PROGRESSIVE) \r
+       // Read U+V\r
+    mov (1)    MSGSRC.0:ud             ORIX_TOP:w                                              { NoDDClr }                     // Block origin\r
+    asr (1)    MSGSRC.1:ud             ORIY_TOP:w                      1:w                     { NoDDClr, NoDDChk }    // NV12 U+V block origin y = half of Y comp\r
+    mov (1)    MSGSRC.2:ud             0x0001000F:ud                                   { NoDDChk }                     // NV12 U+V block width and height (16x2)\r
+\r
+       // Read 1 GRF from DEST surface as the above MB has been deblocked.\r
+       //send (8) TOP_MB_UD(0)<1>      MSGHDRU         MSGSRC<8;8,1>:ud        RESP_LEN(1)+DWBRMSGDSC_RC+BI_DEST_UV    \r
+       mov (1) MSGDSC  RESP_LEN(1)+DWBRMSGDSC_RC+BI_DEST_UV:ud \r
+#endif\r
+\r
+#if defined(_FIELD)\r
+\r
+//    cmp.z.f0.0 (1)  NULLREGW PicTypeC:w      0:w                                                     // Get pic type flag\r
+    and.nz.f0.1 (1)  NULLREGW  BitFields:w     BotFieldFlag:w                  // Get bottom field flag\r
+       // They are used later in this file\r
+\r
+       // Read U+V\r
+    mov (1)    MSGSRC.0:ud             ORIX_TOP:w                                              { NoDDClr }                     // Block origin\r
+    asr (1)    MSGSRC.1:ud             ORIY_TOP:w                      1:w                     { NoDDClr, NoDDChk }    // NV12 U+V block origin y = half of Y comp\r
+    mov (1)    MSGSRC.2:ud             0x0001000F:ud                                   { NoDDChk }                     // NV12 U+V block width and height (16x2)\r
+\r
+       // Load NV12 U+V \r
+       \r
+    // Set message descriptor\r
+    // Frame picture\r
+//    (f0.0) mov (1)   MSGDSC  DWBRMSGDSC_RC+0x00010000+BI_DEST_UV:ud                  // Read 1 GRF from SRC_UV\r
+//     (f0.0) jmpi             Load_Top_UV_8x2\r
+\r
+       // Field picture\r
+    (f0.1) mov (1)     MSGDSC  RESP_LEN(1)+DWBRMSGDSC_RC_BF+BI_DEST_UV:ud  // Read 1 GRF from SRC_Y bottom field\r
+    (-f0.1) mov (1)    MSGDSC  RESP_LEN(1)+DWBRMSGDSC_RC_TF+BI_DEST_UV:ud  // Read 1 GRF from SRC_Y top field\r
+\r
+//Load_Top_UV_8x2:\r
+\r
+       // Read 1 GRF from DEST surface as the above MB has been deblocked.\r
+//     send (8) PREV_MB_UD(0)<1>       MSGHDRU         MSGSRC<8;8,1>:ud        MSGDSC  \r
+\r
+#endif\r
+\r
+       send (8) TOP_MB_UD(0)<1>        MSGHDRU         MSGSRC<8;8,1>:ud        DAPREAD MSGDSC  \r
+               \r
+// End of load_Top_UV_8x2.asm\r
diff --git a/i965_drv_video/shaders/h264/ildb/load_Top_UV_8x2_Mbaff.asm b/i965_drv_video/shaders/h264/ildb/load_Top_UV_8x2_Mbaff.asm
new file mode 100644 (file)
index 0000000..d60aa4e
--- /dev/null
@@ -0,0 +1,79 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Module Name: Load_Top_UV_8X2.Asm\r
+//\r
+// Load UV 8X2 Block \r
+//\r
+//----------------------------------------------------------------\r
+//  Symbols ceed To be defined before including this module\r
+//\r
+//     Source Region Is :UB\r
+//     BUF_D:                  BUF_D Base=Rxx Elementsize=4 Srcregion=Region(8,1) Type=UD\r
+\r
+//     Binding Table Index: \r
+//     BI_SRC_UV:              Binding Table Index Of UV Surface (NV12)\r
+//\r
+//----------------------------------------------------------------\r
+\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0xDDD2:w\r
+#endif\r
+\r
+    // FieldModeCurrentMbFlag determines how to access above MB\r
+       and.z.f0.0 (1)  null:w          r[ECM_AddrReg, BitFlags]:ub             FieldModeCurrentMbFlag:w                \r
+\r
+    and.nz.f0.1 (1) NULLREGW   BitFields:w     BotFieldFlag:w\r
+\r
+       // Read U+V\r
+    mov (1)    MSGSRC.0:ud             ORIX_TOP:w                                              { NoDDClr }                     // Block origin\r
+    asr (1)    MSGSRC.1:d              ORIY_TOP:w                      1:w                     { NoDDClr, NoDDChk }    // NV12 U+V block origin y = half of Y comp\r
+    mov (1)    MSGSRC.2:ud             0x0001000F:ud                                   { NoDDChk }                     // NV12 U+V block width and height (16x2)\r
+\r
+       // Load NV12 U+V \r
+       \r
+    // Set message descriptor\r
+    \r
+       (f0.0)  if      (1)             ELSE_UV_8X2\r
+\r
+    // Frame picture\r
+    mov (1)    MSGDSC  RESP_LEN(1)+DWBRMSGDSC_RC+BI_DEST_UV:ud                 // Read 1 GRF from SRC_UV\r
+\r
+       // Add vertical offset 8 for bot MB in MBAFF mode\r
+       (f0.1) add (1)  MSGSRC.1:d      MSGSRC.1:d              8:w             \r
+    \r
+       // Dual field mode setup\r
+       and.z.f0.1 (1) NULLREGW         DualFieldMode:w         1:w\r
+       (f0.1) jmpi NOT_DUAL_FIELD_UV\r
+\r
+    add (1)    MSGSRC.1:d              MSGSRC.1:d              -2:w                    { NoDDClr }                     // Load 4 lines in stead of 2\r
+       mov (1) MSGSRC.2:ud             0x0003000F:ud                                   { NoDDChk }                     // New block width and height (16x8)\r
+\r
+       add (1) MSGDSC                  MSGDSC                  RESP_LEN(1):ud  // 1 more GRF to receive\r
+\r
+NOT_DUAL_FIELD_UV:    \r
+    \r
+ELSE_UV_8X2: \r
+       else    (1)             ENDIF_UV_8X2\r
+\r
+       // Field picture\r
+       asr (1) MSGSRC.1:d              ORIY_CUR:w              2:w                     // asr 1: NV12 U+V block origin y = half of Y comp\r
+                                                                                                               // asr 1: Reduce y by half in field access mode\r
+       \r
+    (f0.1) mov (1)     MSGDSC  RESP_LEN(1)+DWBRMSGDSC_RC_BF+BI_DEST_UV:ud  // Read 1 GRF from SRC_Y bottom field\r
+    (-f0.1) mov (1)    MSGDSC  RESP_LEN(1)+DWBRMSGDSC_RC_TF+BI_DEST_UV:ud  // Read 1 GRF from SRC_Y top field\r
+\r
+       add (1) MSGSRC.1:d              MSGSRC.1:d              -2:w                            // for last 2 rows of above MB\r
+\r
+       endif\r
+ENDIF_UV_8X2:\r
+\r
+       // Read 1 GRF from DEST surface as the above MB has been deblocked.\r
+       send (8) PREV_MB_UD(0)<1>       MSGHDRU MSGSRC<8;8,1>:ud        DAPREAD MSGDSC  \r
+\r
+// End of load_Top_UV_8x2.asm\r
diff --git a/i965_drv_video/shaders/h264/ildb/load_Top_Y_16x4.asm b/i965_drv_video/shaders/h264/ildb/load_Top_Y_16x4.asm
new file mode 100644 (file)
index 0000000..7590d62
--- /dev/null
@@ -0,0 +1,70 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Module Name: Load_Y_16X4.asm\r
+//\r
+// Load Y 16X4 Block to PREV_MB_YD\r
+//\r
+//----------------------------------------------------------------\r
+//  Symbols Need To Be Defined Before Including This Module\r
+//\r
+//     Source Region In :Ud\r
+//     Src_YD:                 Src_Yd Base=Rxx Elementsize=4 Srcregion=Region(8,1) Type=Ud                     // 3 Grfs (2 For Y, 1 For U+V)\r
+//\r
+//     Source Region Is :Ub.  The Same Region As :Ud Region\r
+//     Src_YB:                 Src_Yb Base=Rxx Elementsize=1 Srcregion=Region(16,1) Type=Ub            // 2 Grfs\r
+//\r
+//     Binding Table Index: \r
+//     Bi_Src_Y:               Binding Table Index Of Y Surface\r
+//\r
+//     Temp Buffer:\r
+//     Buf_D:                  Buf_D Base=Rxx Elementsize=4 Srcregion=Region(8,1) Type=Ud\r
+//     Buf_B:                  Buf_B Base=Rxx Elementsize=1 Srcregion=Region(16,1) Type=Ub\r
+//\r
+//----------------------------------------------------------------\r
+\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0xDDD2:w\r
+#endif\r
+\r
+#if defined(_PROGRESSIVE) \r
+       // Read Y\r
+    mov (2)    MSGSRC.0<1>:ud  ORIX_TOP<2;2,1>:w               { NoDDClr }             // Block origin\r
+    mov (1)    MSGSRC.2<1>:ud  0x0003000F:ud                   { NoDDChk }             // Block width and height (16x4)\r
+\r
+    mov (1)    MSGDSC  RESP_LEN(2)+DWBRMSGDSC_RC+BI_DEST_Y:ud                  // Read 2 GRFs from SRC_Y\r
+#endif\r
+\r
+#if defined(_FIELD)\r
+\r
+//    cmp.z.f0.0 (1)  NULLREGW         PicTypeC:w      0:w                                             // Get pic type flag\r
+    and.nz.f0.1 (1) NULLREGW   BitFields:w     BotFieldFlag:w                  // Get bottom field flag\r
+       // they are used later in this file\r
+       \r
+    mov (2)    MSGSRC.0<1>:ud  ORIX_TOP<2;2,1>:w               { NoDDClr }             // Block origin\r
+    mov (1)    MSGSRC.2<1>:ud  0x0003000F:ud                   { NoDDChk }             // Block width and height (16x4)\r
+   \r
+    // Set message descriptor\r
+\r
+    // Frame picture\r
+//     (f0.0) mov (1)  MSGDSC  DWBRMSGDSC_RC+0x00020000+BI_DEST_Y:ud                   // Read 2 GRFs from SRC_Y\r
+//     (f0.0) jmpi             load_Y_16x4\r
+\r
+       // Field picture\r
+    (f0.1) mov (1)     MSGDSC  RESP_LEN(2)+DWBRMSGDSC_RC_BF+BI_DEST_Y:ud  // Read 2 GRFs from SRC_Y bottom field\r
+    (-f0.1) mov (1)    MSGDSC  RESP_LEN(2)+DWBRMSGDSC_RC_TF+BI_DEST_Y:ud  // Read 2 GRFs from SRC_Y top field\r
+\r
+//load_Y_16x4:\r
+    // Read 2 GRFs from DEST surface, as the above MB has been deblocked\r
+//    send (8) PREV_MB_YD(0)<1>        MSGHDRY         MSGSRC<8;8,1>:ud        MSGDSC\r
+    \r
+#endif\r
+    \r
+    send (8) TOP_MB_YD(0)<1>   MSGHDRT         MSGSRC<8;8,1>:ud        DAPREAD MSGDSC\r
+           \r
+// End of load_Y_16x4.asm\r
diff --git a/i965_drv_video/shaders/h264/ildb/load_Top_Y_16x4_Mbaff.asm b/i965_drv_video/shaders/h264/ildb/load_Top_Y_16x4_Mbaff.asm
new file mode 100644 (file)
index 0000000..0237882
--- /dev/null
@@ -0,0 +1,81 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Module Name: Load_Y_16X4.asm\r
+//\r
+// Load Y 16X4 Block to PREV_MB_YD\r
+//\r
+//----------------------------------------------------------------\r
+//  Symbols Need To Be Defined Before Including This Module\r
+//\r
+//     Source Region In :Ud\r
+//     Src_YD:                 Src_Yd Base=Rxx Elementsize=4 Srcregion=Region(8,1) Type=Ud                     // 3 Grfs (2 For Y, 1 For U+V)\r
+//\r
+//     Source Region Is :Ub.  The Same Region As :Ud Region\r
+//     Src_YB:                 Src_Yb Base=Rxx Elementsize=1 Srcregion=Region(16,1) Type=Ub            // 2 Grfs\r
+//\r
+//     Binding Table Index: \r
+//     Bi_Src_Y:               Binding Table Index Of Y Surface\r
+//\r
+//     Temp Buffer:\r
+//     Buf_D:                  Buf_D Base=Rxx Elementsize=4 Srcregion=Region(8,1) Type=Ud\r
+//     Buf_B:                  Buf_B Base=Rxx Elementsize=1 Srcregion=Region(16,1) Type=Ub\r
+//\r
+//----------------------------------------------------------------\r
+\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0xDDD2:w\r
+#endif\r
+    // FieldModeCurrentMbFlag determines how to access above MB\r
+       and.z.f0.0 (1)  null:w          r[ECM_AddrReg, BitFlags]:ub             FieldModeCurrentMbFlag:w                \r
+\r
+    and.nz.f0.1 (1) NULLREGW   BitFields:w   BotFieldFlag:w\r
+\r
+       // Read Y\r
+    mov (2)    MSGSRC.0<1>:ud  ORIX_TOP<2;2,1>:w               { NoDDClr }             // Block origin\r
+    mov (1)    MSGSRC.2<1>:ud  0x0003000F:ud                   { NoDDChk }             // Block width and height (16x4)\r
+   \r
+    // Set message descriptor\r
+\r
+       (f0.0)  if      (1)             ELSE_Y_16x4\r
+\r
+    // Frame picture\r
+    mov (1)    MSGDSC  RESP_LEN(2)+DWBRMSGDSC_RC+BI_DEST_Y:ud                  // Read 2 GRFs from SRC_Y\r
+\r
+       // Add vertical offset 16 for bot MB in MBAFF mode\r
+       (f0.1) add (1)  MSGSRC.1:d      MSGSRC.1:d              16:w            \r
+       \r
+       // Dual field mode setup\r
+       and.z.f0.1 (1) NULLREGW         DualFieldMode:w         1:w\r
+       (f0.1) jmpi NOT_DUAL_FIELD\r
+\r
+    add (1)    MSGSRC.1:d              MSGSRC.1:d              -4:w    { NoDDClr }             // Load 8 lines in above MB\r
+       mov (1) MSGSRC.2:ud             0x0007000F:ud                   { NoDDChk }             // New block width and height (16x8)\r
+       \r
+       add (1) MSGDSC                  MSGDSC                  RESP_LEN(2):ud  // 2 more GRF to receive\r
+\r
+NOT_DUAL_FIELD:\r
+\r
+ELSE_Y_16x4: \r
+       else    (1)             ENDIF_Y_16x4\r
+\r
+       asr (1) MSGSRC.1:d              ORIY_CUR:w              1:w             // Reduce y by half in field access mode\r
+\r
+       // Field picture\r
+    (f0.1) mov (1)     MSGDSC  RESP_LEN(2)+DWBRMSGDSC_RC_BF+BI_DEST_Y:ud  // Read 2 GRFs from SRC_Y bottom field\r
+    (-f0.1) mov (1)    MSGDSC  RESP_LEN(2)+DWBRMSGDSC_RC_TF+BI_DEST_Y:ud  // Read 2 GRFs from SRC_Y top field\r
+\r
+       add (1) MSGSRC.1:d              MSGSRC.1:d              -4:w    // for last 4 rows of above MB\r
+\r
+       endif\r
+ENDIF_Y_16x4:\r
+        \r
+    // Read 2 GRFs from DEST surface, as the above MB has been deblocked\r
+    send (8) PREV_MB_YD(0)<1>  MSGHDRY MSGSRC<8;8,1>:ud        DAPREAD MSGDSC\r
+\r
+// End of load_Y_16x4.asm\r
diff --git a/i965_drv_video/shaders/h264/ildb/saveNV12_16x16.asm b/i965_drv_video/shaders/h264/ildb/saveNV12_16x16.asm
new file mode 100644 (file)
index 0000000..1cbe27b
--- /dev/null
@@ -0,0 +1,53 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Module name: saveNV12_16x16.asm\r
+//\r
+// Save a NV12 16x16 block \r
+//\r
+//----------------------------------------------------------------\r
+//  Symbols need to be defined before including this module\r
+//\r
+//     Source region in :ud\r
+//     SRC_YD:                 SRC_YD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud                     // 8 GRFs\r
+//     SRC_UD:                 SRC_UD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud                     // 4 GRF\r
+//\r
+//     Binding table index: \r
+//     BI_DEST_Y:              Binding table index of Y surface\r
+//     BI_DEST_UV:             Binding table index of UV surface (NV12)\r
+//\r
+//----------------------------------------------------------------\r
+\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0xDDD4:w\r
+#endif\r
+\r
+\r
+    mov (2)    MSGSRC.0<1>:ud  ORIX_CUR<2;2,1>:w               // Block origin\r
+    mov (1)    MSGSRC.2<1>:ud  0x000F000F:ud           // Block width and height (16x16)\r
+\r
+       // Pack Y    \r
+       mov     (16)    MSGPAYLOADD(0)<1>               SRC_YD(0)               // Compressed inst\r
+       mov (16)        MSGPAYLOADD(2)<1>               SRC_YD(2)\r
+       mov (16)        MSGPAYLOADD(4)<1>               SRC_YD(4)\r
+       mov (16)        MSGPAYLOADD(6)<1>               SRC_YD(6)\r
+    \r
+    send (8)   NULLREG MSGHDR  MSGSRC<8;8,1>:ud        DAPWRITE        MSG_LEN(8)+DWBWMSGDSC+BI_DEST_Y         // Write 8 GRFs\r
+\r
+\r
+\r
+    asr (1)    MSGSRC.1:ud             MSGSRC.1:ud                     1:w                                             // NV12 U+V block origin y = half of Y comp\r
+    mov (1)    MSGSRC.2<1>:ud  0x0007000F:ud                                                           // NV12 U+V block width and height (16x8)\r
+\r
+       mov (16)        MSGPAYLOADD(0)<1>               SRC_UD(0)               // Compressed inst\r
+       mov (16)        MSGPAYLOADD(2)<1>               SRC_UD(2)\r
+\r
+    send (8)   NULLREG MSGHDR  MSGSRC<8;8,1>:ud        DAPWRITE        MSG_LEN(4)+DWBWMSGDSC+BI_DEST_UV                // Write 4 GRFs\r
+\r
+\r
+// End of saveNV12_16x16.asm\r
diff --git a/i965_drv_video/shaders/h264/ildb/saveNV12_16x4.asm b/i965_drv_video/shaders/h264/ildb/saveNV12_16x4.asm
new file mode 100644 (file)
index 0000000..3a99995
--- /dev/null
@@ -0,0 +1,50 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Module name: saveNV12_16x4.asm\r
+//\r
+// Save a NV12 16x4 block \r
+//\r
+//----------------------------------------------------------------\r
+//  Symbols need to be defined before including this module\r
+//\r
+//     Source region in :ud\r
+//     SRC_YD:                 SRC_YD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud                     // 2 GRFs\r
+//     SRC_UD:                 SRC_UD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud                     // 1 GRF\r
+//\r
+//     Binding table index: \r
+//     BI_DEST_Y:              Binding table index of Y surface\r
+//     BI_DEST_UV:             Binding table index of UV surface (NV12)\r
+//\r
+//----------------------------------------------------------------\r
+\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0xDDD5:w\r
+#endif\r
+\r
+    mov (2)    MSGSRC.0<1>:ud  ORIX_TOP<2;2,1>:w                                                       // Block origin\r
+    mov (1)    MSGSRC.2<1>:ud  0x0003000F:ud                                                           // Block width and height (16x4)\r
+\r
+       // Pack Y    \r
+       mov     (16)    MSGPAYLOADD(0)<1>               SRC_YD(0)                                               // Compressed inst\r
+    \r
+    send (8)   NULLREG MSGHDR  MSGSRC<8;8,1>:ud        DAPWRITE        MSG_LEN(2)+DWBWMSGDSC+BI_DEST_Y         // Write 2 GRFs\r
+\r
+\r
+    asr (1)    MSGSRC.1:ud             MSGSRC.1:ud                     1:w                                             // NV12 U+V block origin y = half of Y comp\r
+    mov (1)    MSGSRC.2<1>:ud  0x0001000F:ud                                                           // NV12 U+V block width and height (16x2)\r
+\r
+       // Pack U and V\r
+//     mov (16)        MSGPAYLOADB(0,0)<2>             SRC_UB(0,0)\r
+//     mov (16)        MSGPAYLOADB(0,1)<2>             SRC_VB(0,0)\r
+       \r
+       mov (8) MSGPAYLOADD(0,0)<1>             SRC_UD(0)       \r
+       \r
+    send (8)   NULLREG MSGHDR  MSGSRC<8;8,1>:ud        DAPWRITE        MSG_LEN(1)+DWBWMSGDSC+BI_DEST_UV        // Write 1 GRF\r
+\r
+// End of saveNV12_16x4.asm\r
diff --git a/i965_drv_video/shaders/h264/ildb/saveNV12_16x4T.asm b/i965_drv_video/shaders/h264/ildb/saveNV12_16x4T.asm
new file mode 100644 (file)
index 0000000..66085d1
--- /dev/null
@@ -0,0 +1,113 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Module name: saveNV12_16x4T.asm\r
+//\r
+// Transpose 16x4 to 4x16 YNV12 data and write to memory \r
+//\r
+//----------------------------------------------------------------\r
+//  Symbols need to be defined before including this module\r
+//\r
+//     Left MB region:\r
+//     PREV_MB_YB:             Base=rxx        ElementSize=1 SrcRegion=REGION(16,1) Type=ub\r
+//     PREV_MB_UW:     Base=ryy        ElementSize=2 SrcRegion=REGION(8,1) Type=uw\r
+\r
+//     Binding table index: \r
+//     BI_SRC_Y:               Binding table index of Y surface\r
+//     BI_SRC_UV:              Binding table index of UV surface (NV12)\r
+//\r
+//     Temp buffer:\r
+//     BUF_B:                  BUF_B Base=rxx ElementSize=1 SrcRegion=REGION(16,1) Type=ub\r
+//     BUF_W:                  BUF_W Base=rxx ElementSize=1 SrcRegion=REGION(8,1) Type=uw\r
+//\r
+//\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0xDDD6:w\r
+#endif\r
+\r
+    mov (2)    MSGSRC.0<1>:ud  ORIX_LEFT<2;2,1>:w              // Block origin\r
+    mov (1)    MSGSRC.2<1>:ud  0x000F0003:ud                   // 4x16\r
+    \r
+// Transpose Y, save them to MRFs\r
+\r
+//     16x4 Y src in GRF (each pix is specified as yx)\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01 f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03 f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//\r
+//  First step         (16)    <1>     <=== <16;4,1>\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |73 63 53 43 72 62 52 42 71 61 51 41 70 60 50 40 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |f3 e3 d3 c3 f2 e2 d2 c2 f1 e1 d1 c1 f0 e0 d0 c0 b3 a3 93 83 b2 a2 92 82 b1 a1 91 81 b0 a0 90 80|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+\r
+       // The first step\r
+       mov (16)        BUF_B(0,0)<1>                   PREV_MB_YB(0,0)<16;4,1>\r
+       mov (16)        BUF_B(0,16)<1>                  PREV_MB_YB(0,4)<16;4,1>\r
+       mov (16)        BUF_B(1,0)<1>                   PREV_MB_YB(0,8)<16;4,1>\r
+       mov (16)        BUF_B(1,16)<1>                  PREV_MB_YB(0,12)<16;4,1>\r
+\r
+//\r
+//  Second step                (16)    <1>     <=== <1;4,4>\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0 b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//\r
+       // The second step\r
+//     mov     (16)    MSGPAYLOADB(0,0)<1>             BUF_B(0,0)<32;8,4>                      // Read 2 rows, write 1 row\r
+//     mov (16)        MSGPAYLOADB(0,16)<1>    BUF_B(0,1)<32;8,4>\r
+//     mov (16)        MSGPAYLOADB(1,0)<1>             BUF_B(0,2)<32;8,4>\r
+//     mov (16)        MSGPAYLOADB(1,16)<1>    BUF_B(0,3)<32;8,4>\r
+\r
+       mov     (16)    MSGPAYLOADB(0,0)<1>             BUF_B(0,0)<1;4,4>\r
+       mov (16)        MSGPAYLOADB(0,16)<1>    BUF_B(0,16)<1;4,4>\r
+       mov (16)        MSGPAYLOADB(1,0)<1>             BUF_B(1,0)<1;4,4>\r
+       mov (16)        MSGPAYLOADB(1,16)<1>    BUF_B(1,16)<1;4,4>\r
+\r
+//  Transposed Y in 4x16 is ready for writting to dataport.\r
+//\r
+    send (8)   NULLREG MSGHDR  MSGSRC<8;8,1>:ud        DAPWRITE        MSG_LEN(2)+DWBWMSGDSC+BI_DEST_Y                         // Write 2 GRFs\r
+\r
+\r
+\r
+/////////////////////////////////////////////////////////////////////////////////////////////////////\r
+\r
+       // Transpose U/V, save them to MRFs in NV12 format\r
+    asr (1)    MSGSRC.1:ud             MSGSRC.1:ud                     1:w                                             // NV12 U+V block origin y = half of Y comp\r
+    mov (1)    MSGSRC.2<1>:ud  0x00070003:ud                                                           // NV12 U+V block width and height (4x8)\r
+\r
+\r
+//     16x2 UV src in GRF (each pix is specified as yx)\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |71 71 61 61 51 51 41 41 31 31 21 21 11 11 01 01 70 70 60 60 50 50 40 40 30 30 20 20 10 10 00 00|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//\r
+//     First step              (8)             <1>     <=== <8;4,1>:w\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |71 71 61 61 51 51 41 41 70 70 60 60 50 50 40 40 31 31 21 21 11 11 01 01 30 30 20 20 10 10 00 00|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+       mov (8) BUF_W(0,0)<1>           PREV_MB_UW(0,0)<8;4,1>\r
+       mov (8) BUF_W(0,8)<1>           PREV_MB_UW(0,4)<8;4,1>\r
+\r
+//     Second step             (8)             <1>     <=== <1;2,4>\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |71 71 70 70 61 61 60 60 51 51 50 50 41 41 40 40 31 31 30 30 21 21 20 20 11 11 10 10 01 01 00 00|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+       mov (8) MSGPAYLOADW(0,0)<1>             BUF_W(0,0)<1;2,4>\r
+       mov (8) MSGPAYLOADW(0,8)<1>             BUF_W(0,8)<1;2,4>\r
+\r
+//  Transposed U+V in NV12 in 4x8 is ready for writting to dataport.\r
\r
+    send (8)   NULLREG MSGHDR  MSGSRC<8;8,1>:ud        DAPWRITE        MSG_LEN(1)+DWBWMSGDSC+BI_DEST_UV                // Write 1 GRF\r
+    \r
+\r
diff --git a/i965_drv_video/shaders/h264/ildb/save_Cur_UV_8x8.asm b/i965_drv_video/shaders/h264/ildb/save_Cur_UV_8x8.asm
new file mode 100644 (file)
index 0000000..39d4b09
--- /dev/null
@@ -0,0 +1,53 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Module name: save_Cur_UV_8x8.asm\r
+//\r
+// Save UV 8x8 block (8x8U + 8x8V in NV12)\r
+//\r
+//----------------------------------------------------------------\r
+//  Symbols need to be defined before including this module\r
+//\r
+//     Source region in :ud\r
+//     SRC_UD:                 SRC_UD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud                     // 4 GRF\r
+//\r
+//     Binding table index: \r
+//     BI_DEST_UV:             Binding table index of UV surface (NV12)\r
+//\r
+//----------------------------------------------------------------\r
+\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0xDDD4:w\r
+#endif\r
+\r
+#if defined(_FIELD)\r
+    and.nz.f0.1 (1) NULLREGW   BitFields:w     BotFieldFlag:w                  // Get bottom field flag\r
+#endif\r
+\r
+    mov (1)    MSGSRC.0:ud             ORIX_CUR:w                                      { NoDDClr }     // Block origin\r
+    asr (1)    MSGSRC.1:ud             ORIY_CUR:w                      1:w             { NoDDClr, NoDDChk }    // NV12 U+V block origin y = half of Y comp\r
+    mov (1)    MSGSRC.2:ud             0x0007000F:ud                           { NoDDChk }             // NV12 U+V block width and height (16x8)\r
+\r
+       mov (16)        MSGPAYLOADD(0)<1>               SRC_UD(0)               // Compressed inst\r
+       mov (16)        MSGPAYLOADD(2)<1>               SRC_UD(2)\r
+       \r
+#if defined(_PROGRESSIVE) \r
+       mov (1)         MSGDSC  MSG_LEN(4)+DWBWMSGDSC+BI_DEST_UV:ud\r
+//    send (8) NULLREG         MSGHDR          MSGSRC<8;8,1>:ud        DWBWMSGDSC+0x00400000+BI_DEST_UV\r
+#endif\r
+\r
+#if defined(_FIELD)\r
+       // Field picture\r
+    (f0.1) mov (1)     MSGDSC  MSG_LEN(4)+DWBWMSGDSC+ENMSGDSCBF+BI_DEST_UV:ud  // Write 4 GRFs to DEST_UV bottom field\r
+    (-f0.1) mov (1)    MSGDSC  MSG_LEN(4)+DWBWMSGDSC+ENMSGDSCTF+BI_DEST_UV:ud  // Write 4 GRFs to DEST_UV top field\r
+\r
+#endif\r
+\r
+    send (8)   null:ud         MSGHDR          MSGSRC<8;8,1>:ud        DAPWRITE        MSGDSC\r
+\r
+// End of save_Cur_UV_8x8.asm\r
diff --git a/i965_drv_video/shaders/h264/ildb/save_Cur_UV_8x8_Mbaff.asm b/i965_drv_video/shaders/h264/ildb/save_Cur_UV_8x8_Mbaff.asm
new file mode 100644 (file)
index 0000000..6f469ea
--- /dev/null
@@ -0,0 +1,62 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Module name: save_Cur_UV_8x8.asm\r
+//\r
+// Save UV 8x8 block (8x8U + 8x8V in NV12)\r
+//\r
+//----------------------------------------------------------------\r
+//  Symbols need to be defined before including this module\r
+//\r
+//     Source region in :ud\r
+//     SRC_UD:                 SRC_UD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud                     // 4 GRF\r
+//\r
+//     Binding table index: \r
+//     BI_DEST_UV:             Binding table index of UV surface (NV12)\r
+//\r
+//----------------------------------------------------------------\r
+\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0xDDD4:w\r
+#endif\r
+\r
+       and.z.f0.0 (1)  null:w          r[ECM_AddrReg, BitFlags]:ub             FieldModeCurrentMbFlag:w                \r
+\r
+    and.nz.f0.1 (1)    NULLREGW        BitFields:w     BotFieldFlag:w\r
+\r
+    mov (1)    MSGSRC.0:ud             ORIX_CUR:w                                      { NoDDClr }     // Block origin\r
+    asr (1)    MSGSRC.1:ud             ORIY_CUR:w                      1:w             { NoDDClr, NoDDChk }    // NV12 U+V block origin y = half of Y comp\r
+    mov (1)    MSGSRC.2:ud             0x0007000F:ud                           { NoDDChk }             // NV12 U+V block width and height (16x8)\r
+\r
+       mov (16)        MSGPAYLOADD(0)<1>               SRC_UD(0)               // Compressed inst\r
+       mov (16)        MSGPAYLOADD(2)<1>               SRC_UD(2)\r
+\r
+    // Set message descriptor\r
+    \r
+       (f0.0)  if      (1)             ELSE_UV_8X8\r
+    \r
+    // Frame picture\r
+    mov (1)    MSGDSC  MSG_LEN(4)+DWBWMSGDSC+BI_DEST_UV:ud                     // Write 4 GRFs to DEST_UV\r
+\r
+       (f0.1) add (1)  MSGSRC.1:d      MSGSRC.1:d              8:w             // Add vertical offset 8 for bot MB in MBAFF mode\r
+\r
+ELSE_UV_8X8: \r
+       else    (1)             ENDIF_UV_8X8\r
+\r
+       // Field picture\r
+    (f0.1) mov (1)     MSGDSC  MSG_LEN(4)+DWBWMSGDSC+ENMSGDSCBF+BI_DEST_UV:ud  // Write 4 GRFs to DEST_UV bottom field\r
+    (-f0.1) mov (1)    MSGDSC  MSG_LEN(4)+DWBWMSGDSC+ENMSGDSCTF+BI_DEST_UV:ud  // Write 4 GRFs to DEST_UV top field\r
+\r
+       asr (1) MSGSRC.1:d              MSGSRC.1:d              1:w                                     // Reduce y by half in field access mode\r
+\r
+       endif\r
+ENDIF_UV_8X8:\r
+    \r
+    send (8)   null:ud         MSGHDR          MSGSRC<8;8,1>:ud        DAPWRITE        MSGDSC\r
+\r
+// End of save_Cur_UV_8x8.asm\r
diff --git a/i965_drv_video/shaders/h264/ildb/save_Cur_Y_16x16.asm b/i965_drv_video/shaders/h264/ildb/save_Cur_Y_16x16.asm
new file mode 100644 (file)
index 0000000..fac22cd
--- /dev/null
@@ -0,0 +1,56 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Module name: save_Cur_Y_16x16.asm\r
+//\r
+// Save a Y 16x16 block \r
+//\r
+//----------------------------------------------------------------\r
+//  Symbols need to be defined before including this module\r
+//\r
+//     Source region in :ud\r
+//     SRC_YD:                 SRC_YD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud                     // 8 GRFs\r
+//\r
+//     Binding table index: \r
+//     BI_DEST_Y:              Binding table index of Y surface\r
+//\r
+//----------------------------------------------------------------\r
+\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0xDDD4:w\r
+#endif\r
+\r
+#if defined(_FIELD)\r
+    and.nz.f0.1 (1) NULLREGW   BitFields:w     BotFieldFlag:w                  // Get bottom field flag\r
+#endif\r
+\r
+    mov (2)    MSGSRC.0<1>:ud  ORIX_CUR<2;2,1>:w       { NoDDClr }             // Block origin\r
+    mov (1)    MSGSRC.2<1>:ud  0x000F000F:ud           { NoDDChk }             // Block width and height (16x16)\r
+\r
+       // Pack Y    \r
+       mov     (16)    MSGPAYLOADD(0)<1>               SRC_YD(0)               // Compressed inst\r
+       mov (16)        MSGPAYLOADD(2)<1>               SRC_YD(2)\r
+       mov (16)        MSGPAYLOADD(4)<1>               SRC_YD(4)\r
+       mov (16)        MSGPAYLOADD(6)<1>               SRC_YD(6)\r
+    \r
+\r
+#if defined(_PROGRESSIVE) \r
+       mov (1) MSGDSC  MSG_LEN(8)+DWBWMSGDSC+BI_DEST_Y:ud      \r
+//    send (8) NULLREG         MSGHDR          MSGSRC<8;8,1>:ud        DWBWMSGDSC+0x00800000+BI_DEST_Y\r
+#endif\r
+\r
+#if defined(_FIELD)\r
+       // Field picture\r
+    (f0.1) mov (1)     MSGDSC  MSG_LEN(8)+DWBWMSGDSC+ENMSGDSCBF+BI_DEST_Y:ud  // Write 8 GRFs to DEST_Y bottom field\r
+    (-f0.1) mov (1)    MSGDSC  MSG_LEN(8)+DWBWMSGDSC+ENMSGDSCTF+BI_DEST_Y:ud  // Write 8 GRFs to DEST_Y top field\r
+\r
+#endif\r
+\r
+    send (8)   null:ud         MSGHDR          MSGSRC<8;8,1>:ud        DAPWRITE        MSGDSC\r
+       \r
+// End of save_Cur_Y_16x16.asm\r
diff --git a/i965_drv_video/shaders/h264/ildb/save_Cur_Y_16x16_Mbaff.asm b/i965_drv_video/shaders/h264/ildb/save_Cur_Y_16x16_Mbaff.asm
new file mode 100644 (file)
index 0000000..6ab78dc
--- /dev/null
@@ -0,0 +1,64 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Module name: save_Cur_Y_16x16.asm\r
+//\r
+// Save a Y 16x16 block \r
+//\r
+//----------------------------------------------------------------\r
+//  Symbols need to be defined before including this module\r
+//\r
+//     Source region in :ud\r
+//     SRC_YD:                 SRC_YD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud                     // 8 GRFs\r
+//\r
+//     Binding table index: \r
+//     BI_DEST_Y:              Binding table index of Y surface\r
+//\r
+//----------------------------------------------------------------\r
+\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0xDDD4:w\r
+#endif\r
+\r
+       and.z.f0.0 (1)  null:w          r[ECM_AddrReg, BitFlags]:ub             FieldModeCurrentMbFlag:w                \r
+\r
+    and.nz.f0.1 (1)    NULLREGW                BitFields:w     BotFieldFlag:w\r
+\r
+    mov (2)    MSGSRC.0<1>:ud  ORIX_CUR<2;2,1>:w               { NoDDClr }             // Block origin\r
+    mov (1)    MSGSRC.2<1>:ud  0x000F000F:ud                   { NoDDChk }             // Block width and height (16x16 or 12x16)\r
+\r
+       // Pack Y    \r
+       mov     (16)    MSGPAYLOADD(0)<1>               SRC_YD(0)               // Compressed inst\r
+       mov (16)        MSGPAYLOADD(2)<1>               SRC_YD(2)\r
+       mov (16)        MSGPAYLOADD(4)<1>               SRC_YD(4)\r
+       mov (16)        MSGPAYLOADD(6)<1>               SRC_YD(6)\r
+    \r
+    // Set message descriptor\r
+\r
+       (f0.0)  if      (1)             ELSE_Y_16x16\r
+    \r
+    // Frame picture\r
+    mov (1)    MSGDSC  MSG_LEN(8)+DWBWMSGDSC+BI_DEST_Y:ud                      // Write 8 GRFs to DEST_Y\r
+\r
+       (f0.1) add (1)  MSGSRC.1:d              MSGSRC.1:d              16:w            // Add vertical offset 16 for bot MB in MBAFF mode\r
+\r
+ELSE_Y_16x16: \r
+       else    (1)             ENDIF_Y_16x16\r
+\r
+       // Field picture\r
+    (f0.1) mov (1)     MSGDSC  MSG_LEN(8)+DWBWMSGDSC+ENMSGDSCBF+BI_DEST_Y:ud  // Write 8 GRFs to DEST_Y bottom field\r
+    (-f0.1) mov (1)    MSGDSC  MSG_LEN(8)+DWBWMSGDSC+ENMSGDSCTF+BI_DEST_Y:ud  // Write 8 GRFs to DEST_Y top field\r
+\r
+       asr (1) MSGSRC.1:d              MSGSRC.1:d              1:w                                     // Reduce y by half in field access mode\r
+\r
+       endif\r
+ENDIF_Y_16x16:\r
+    \r
+    send (8)   null:ud         MSGHDR          MSGSRC<8;8,1>:ud        DAPWRITE        MSGDSC\r
+\r
+// End of save_Cur_Y_16x16.asm\r
diff --git a/i965_drv_video/shaders/h264/ildb/save_Left_UV_8x2T.asm b/i965_drv_video/shaders/h264/ildb/save_Left_UV_8x2T.asm
new file mode 100644 (file)
index 0000000..172002e
--- /dev/null
@@ -0,0 +1,72 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Module name: save_Left_UV_8x2T.asm\r
+//\r
+// Transpose 8x2 to 2x8 UV data and write to memory \r
+//\r
+//----------------------------------------------------------------\r
+//  Symbols need to be defined before including this module\r
+//\r
+//     Left MB region:\r
+//     PREV_MB_UW:     Base=ryy        ElementSize=2 SrcRegion=REGION(8,1) Type=uw\r
+\r
+//     Binding table index: \r
+//     BI_SRC_UV:              Binding table index of UV surface (NV12)\r
+//\r
+//     Temp buffer:\r
+//     BUF_W:                  BUF_W Base=rxx ElementSize=1 SrcRegion=REGION(8,1) Type=uw\r
+//\r
+//\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0xDDD6:w\r
+#endif\r
+\r
+#if defined(_FIELD)\r
+    and.nz.f0.1 (1)  NULLREGW  BitFields:w     BotFieldFlag:w                  // Get bottom field flag\r
+#endif\r
+\r
+       // Transpose U/V, save them to MRFs in NV12 format\r
+    mov (1)    MSGSRC.0:ud             ORIX_LEFT:w                                             { NoDDClr }                     // Block origin\r
+    asr (1)    MSGSRC.1:ud             ORIY_LEFT:w                     1:w                     { NoDDClr, NoDDChk }    // NV12 U+V block origin y = half of Y comp\r
+    mov (1)    MSGSRC.2:ud             0x00070003:ud                                   { NoDDChk }                     // NV12 U+V block width and height (4x8)\r
+\r
+\r
+//     16x2 UV src in GRF (each pix is specified as yx)\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |71 71 61 61 51 51 41 41 31 31 21 21 11 11 01 01 70 70 60 60 50 50 40 40 30 30 20 20 10 10 00 00|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//\r
+//     First step              (8)             <1>     <=== <8;4,1>:w\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |71 71 61 61 51 51 41 41 70 70 60 60 50 50 40 40 31 31 21 21 11 11 01 01 30 30 20 20 10 10 00 00|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+       mov (8) LEFT_TEMP_W(0,0)<1>             PREV_MB_UW(0,0)<8;4,1>          { NoDDClr }\r
+       mov (8) LEFT_TEMP_W(0,8)<1>             PREV_MB_UW(0,4)<8;4,1>          { NoDDChk }\r
+\r
+//     Second step             (8)             <1>     <=== <1;2,4>\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |71 71 70 70 61 61 60 60 51 51 50 50 41 41 40 40 31 31 30 30 21 21 20 20 11 11 10 10 01 01 00 00|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+       mov (8) MSGPAYLOADW(0,0)<1>             LEFT_TEMP_W(0,0)<1;2,4>\r
+       mov (8) MSGPAYLOADW(0,8)<1>             LEFT_TEMP_W(0,8)<1;2,4>\r
+\r
+//  Transposed U+V in NV12 in 4x8 is ready for writting to dataport.\r
\r
+#if defined(_PROGRESSIVE) \r
+       mov (1) MSGDSC  MSG_LEN(1)+DWBWMSGDSC+BI_DEST_UV:ud\r
+//    send (8) NULLREG         MSGHDR          MSGSRC<8;8,1>:ud        DWBWMSGDSC+0x00100000+BI_DEST_UV\r
+#endif\r
+\r
+#if defined(_FIELD)\r
+       // Field picture\r
+    (f0.1) mov (1)     MSGDSC  MSG_LEN(1)+DWBWMSGDSC+ENMSGDSCBF+BI_DEST_UV:ud  // Write 1 GRF to DEST_UV bottom field\r
+    (-f0.1) mov (1)    MSGDSC  MSG_LEN(1)+DWBWMSGDSC+ENMSGDSCTF+BI_DEST_UV:ud  // Write 1 GRF to DEST_UV top field\r
+\r
+#endif\r
+    send (8)   null:ud         MSGHDR          MSGSRC<8;8,1>:ud        DAPWRITE        MSGDSC\r
diff --git a/i965_drv_video/shaders/h264/ildb/save_Left_UV_8x2T_Mbaff.asm b/i965_drv_video/shaders/h264/ildb/save_Left_UV_8x2T_Mbaff.asm
new file mode 100644 (file)
index 0000000..f98b311
--- /dev/null
@@ -0,0 +1,82 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Module name: save_Left_UV_8x2T.asm\r
+//\r
+// Transpose 8x2 to 2x8 UV data and write to memory \r
+//\r
+//----------------------------------------------------------------\r
+//  Symbols need to be defined before including this module\r
+//\r
+//     Left MB region:\r
+//     PREV_MB_UW:     Base=ryy        ElementSize=2 SrcRegion=REGION(8,1) Type=uw\r
+\r
+//     Binding table index: \r
+//     BI_SRC_UV:              Binding table index of UV surface (NV12)\r
+//\r
+//     Temp buffer:\r
+//     BUF_W:                  BUF_W Base=rxx ElementSize=1 SrcRegion=REGION(8,1) Type=uw\r
+//\r
+//\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0xDDD6:w\r
+#endif\r
+\r
+       and.z.f0.0 (1)  null:w          r[ECM_AddrReg, BitFlags]:ub             FieldModeCurrentMbFlag:w                \r
+   \r
+    and.nz.f0.1 (1)    NULLREGW                BitFields:w     BotFieldFlag:w\r
+\r
+       // Transpose U/V, save them to MRFs in NV12 format\r
+    mov (1)    MSGSRC.0:ud             ORIX_LEFT:w                                     { NoDDClr }                     // Block origin\r
+    asr (1)    MSGSRC.1:ud             ORIY_LEFT:w                     1:w             { NoDDClr, NoDDChk }    // NV12 U+V block origin y = half of Y comp\r
+    mov (1)    MSGSRC.2:ud             0x00070003:ud                           { NoDDChk }                             // NV12 U+V block width and height (4x8)\r
+\r
+\r
+//     16x2 UV src in GRF (each pix is specified as yx)\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |71 71 61 61 51 51 41 41 31 31 21 21 11 11 01 01 70 70 60 60 50 50 40 40 30 30 20 20 10 10 00 00|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//\r
+//     First step              (8)             <1>     <=== <8;4,1>:w\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |71 71 61 61 51 51 41 41 70 70 60 60 50 50 40 40 31 31 21 21 11 11 01 01 30 30 20 20 10 10 00 00|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+       mov (8) BUF_W(0,0)<1>           PREV_MB_UW(0,0)<8;4,1>  { NoDDClr }\r
+       mov (8) BUF_W(0,8)<1>           PREV_MB_UW(0,4)<8;4,1>  { NoDDChk }\r
+\r
+//     Second step             (8)             <1>     <=== <1;2,4>\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |71 71 70 70 61 61 60 60 51 51 50 50 41 41 40 40 31 31 30 30 21 21 20 20 11 11 10 10 01 01 00 00|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+       mov (8) MSGPAYLOADW(0,0)<1>             BUF_W(0,0)<1;2,4>\r
+       mov (8) MSGPAYLOADW(0,8)<1>             BUF_W(0,8)<1;2,4>\r
+\r
+//  Transposed U+V in NV12 in 4x8 is ready for writting to dataport.\r
\r
+    // Set message descriptor\r
+    \r
+       (f0.0)  if      (1)             ELSE_UV_8X2T\r
+    \r
+    // Frame picture\r
+    mov (1)    MSGDSC  MSG_LEN(1)+DWBWMSGDSC+BI_DEST_UV:ud                     // Write 1 GRF to DEST_UV\r
+\r
+       (f0.1) add (1)  MSGSRC.1:d              MSGSRC.1:d              8:w             // Add vertical offset 8 for bot MB in MBAFF mode\r
+\r
+ELSE_UV_8X2T: \r
+       else    (1)             ENDIF_UV_8X2T\r
+\r
+       // Field picture\r
+    (f0.1) mov (1)     MSGDSC  MSG_LEN(1)+DWBWMSGDSC+ENMSGDSCBF+BI_DEST_UV:ud  // Write 1 GRF to DEST_UV bottom field\r
+    (-f0.1) mov (1)    MSGDSC  MSG_LEN(1)+DWBWMSGDSC+ENMSGDSCTF+BI_DEST_UV:ud  // Write 1 GRF to DEST_UV top field\r
+\r
+       asr (1) MSGSRC.1:d              MSGSRC.1:d              1:w                                     // Reduce y by half in field access mode\r
+\r
+       endif\r
+ENDIF_UV_8X2T:\r
+\r
+    send (8)   null:ud         MSGHDR  MSGSRC<8;8,1>:ud        DAPWRITE        MSGDSC\r
diff --git a/i965_drv_video/shaders/h264/ildb/save_Left_Y_16x4T.asm b/i965_drv_video/shaders/h264/ildb/save_Left_Y_16x4T.asm
new file mode 100644 (file)
index 0000000..84d81e9
--- /dev/null
@@ -0,0 +1,89 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Module name: save_Left_Y_16x4T.asm\r
+//\r
+// Transpose 16x4 to 4x16 Y data and write to memory \r
+//\r
+//----------------------------------------------------------------\r
+//  Symbols need to be defined before including this module\r
+//\r
+//     Left MB region:\r
+//     PREV_MB_YB:             Base=rxx        ElementSize=1 SrcRegion=REGION(16,1) Type=ub\r
+\r
+//     Binding table index: \r
+//     BI_SRC_Y:               Binding table index of Y surface\r
+//\r
+//     Temp buffer:\r
+//     BUF_B:                  BUF_B Base=rxx ElementSize=1 SrcRegion=REGION(16,1) Type=ub\r
+//\r
+//\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0xDDD6:w\r
+#endif\r
+\r
+#if defined(_FIELD)\r
+    and.nz.f0.1 (1) NULLREGW   BitFields:w     BotFieldFlag:w                  // Get bottom field flag\r
+#endif\r
+\r
+    mov (2)    MSGSRC.0<1>:ud  ORIX_LEFT<2;2,1>:w              { NoDDClr }             // Block origin\r
+    mov (1)    MSGSRC.2<1>:ud  0x000F0003:ud                   { NoDDChk }             // 4x16\r
+    \r
+// Transpose Y, save them to MRFs\r
+\r
+//     16x4 Y src in GRF (each pix is specified as yx)\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01 f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03 f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//\r
+//  First step         (16)    <1>     <=== <16;4,1>\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |73 63 53 43 72 62 52 42 71 61 51 41 70 60 50 40 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |f3 e3 d3 c3 f2 e2 d2 c2 f1 e1 d1 c1 f0 e0 d0 c0 b3 a3 93 83 b2 a2 92 82 b1 a1 91 81 b0 a0 90 80|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+\r
+       // The first step\r
+       mov (16)        LEFT_TEMP_B(0,0)<1>                     PREV_MB_YB(0,0)<16;4,1>         { NoDDClr }     \r
+       mov (16)        LEFT_TEMP_B(0,16)<1>            PREV_MB_YB(0,4)<16;4,1>         { NoDDChk }\r
+       mov (16)        LEFT_TEMP_B(1,0)<1>                     PREV_MB_YB(0,8)<16;4,1>         { NoDDClr }\r
+       mov (16)        LEFT_TEMP_B(1,16)<1>            PREV_MB_YB(0,12)<16;4,1>        { NoDDChk }\r
+\r
+//\r
+//  Second step                (16)    <1>     <=== <1;4,4>\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0 b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//\r
+       // The second step\r
+       mov     (16)    MSGPAYLOADB(0,0)<1>             LEFT_TEMP_B(0,0)<1;4,4>\r
+       mov (16)        MSGPAYLOADB(0,16)<1>    LEFT_TEMP_B(0,16)<1;4,4>\r
+       mov (16)        MSGPAYLOADB(1,0)<1>             LEFT_TEMP_B(1,0)<1;4,4>\r
+       mov (16)        MSGPAYLOADB(1,16)<1>    LEFT_TEMP_B(1,16)<1;4,4>\r
+\r
+//  Transposed Y in 4x16 is ready for writting to dataport.\r
+\r
+\r
+#if defined(_PROGRESSIVE) \r
+       mov (1) MSGDSC  MSG_LEN(2)+DWBWMSGDSC+BI_DEST_Y:ud\r
+//    send (8) NULLREG         MSGHDR          MSGSRC<8;8,1>:ud        DWBWMSGDSC+0x00200000+BI_DEST_Y\r
+#endif\r
+\r
+#if defined(_FIELD)\r
+       // Field picture\r
+    (f0.1) mov (1)     MSGDSC  MSG_LEN(2)+DWBWMSGDSC+ENMSGDSCBF+BI_DEST_Y:ud  // Write 2 GRFs to DEST_Y bottom field\r
+    (-f0.1) mov (1)    MSGDSC  MSG_LEN(2)+DWBWMSGDSC+ENMSGDSCTF+BI_DEST_Y:ud  // Write 2 GRFs to DEST_Y top field\r
+\r
+#endif\r
+\r
+    send (8)   null:ud         MSGHDR          MSGSRC<8;8,1>:ud        DAPWRITE        MSGDSC\r
+\r
diff --git a/i965_drv_video/shaders/h264/ildb/save_Left_Y_16x4T_Mbaff.asm b/i965_drv_video/shaders/h264/ildb/save_Left_Y_16x4T_Mbaff.asm
new file mode 100644 (file)
index 0000000..977ad4f
--- /dev/null
@@ -0,0 +1,101 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Module name: save_Left_Y_16x4T.asm\r
+//\r
+// Transpose 16x4 to 4x16 Y data and write to memory \r
+//\r
+//----------------------------------------------------------------\r
+//  Symbols need to be defined before including this module\r
+//\r
+//     Left MB region:\r
+//     PREV_MB_YB:             Base=rxx        ElementSize=1 SrcRegion=REGION(16,1) Type=ub\r
+\r
+//     Binding table index: \r
+//     BI_SRC_Y:               Binding table index of Y surface\r
+//\r
+//     Temp buffer:\r
+//     BUF_B:                  BUF_B Base=rxx ElementSize=1 SrcRegion=REGION(16,1) Type=ub\r
+//\r
+//\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0xDDD6:w\r
+#endif\r
+\r
+       and.z.f0.0 (1)  null:w          r[ECM_AddrReg, BitFlags]:ub             FieldModeCurrentMbFlag:w                \r
+\r
+    and.nz.f0.1 (1)    NULLREGW                BitFields:w     BotFieldFlag:w\r
+\r
+    mov (2)    MSGSRC.0<1>:ud  ORIX_LEFT<2;2,1>:w              { NoDDClr }             // Block origin\r
+    mov (1)    MSGSRC.2<1>:ud  0x000F0003:ud                   { NoDDChk }             // 4x16\r
+    \r
+// Transpose Y, save them to MRFs\r
+\r
+//     16x4 Y src in GRF (each pix is specified as yx)\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01 f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03 f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//\r
+//  First step         (16)    <1>     <=== <16;4,1>\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |73 63 53 43 72 62 52 42 71 61 51 41 70 60 50 40 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |f3 e3 d3 c3 f2 e2 d2 c2 f1 e1 d1 c1 f0 e0 d0 c0 b3 a3 93 83 b2 a2 92 82 b1 a1 91 81 b0 a0 90 80|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+\r
+       // The first step\r
+       mov (16)        BUF_B(0,0)<1>                   PREV_MB_YB(0,0)<16;4,1>         { NoDDClr }\r
+       mov (16)        BUF_B(0,16)<1>                  PREV_MB_YB(0,4)<16;4,1>         { NoDDChk }\r
+       mov (16)        BUF_B(1,0)<1>                   PREV_MB_YB(0,8)<16;4,1>         { NoDDClr }\r
+       mov (16)        BUF_B(1,16)<1>                  PREV_MB_YB(0,12)<16;4,1>        { NoDDChk }     \r
+\r
+//\r
+//  Second step                (16)    <1>     <=== <1;4,4>\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//     |f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0 b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80|\r
+//     +-----------------------+-----------------------+-----------------------+-----------------------+\r
+//\r
+       // The second step\r
+       mov     (16)    MSGPAYLOADB(0,0)<1>             BUF_B(0,0)<1;4,4>\r
+       mov (16)        MSGPAYLOADB(0,16)<1>    BUF_B(0,16)<1;4,4>\r
+       mov (16)        MSGPAYLOADB(1,0)<1>             BUF_B(1,0)<1;4,4>\r
+       mov (16)        MSGPAYLOADB(1,16)<1>    BUF_B(1,16)<1;4,4>\r
+\r
+//  Transposed Y in 4x16 is ready for writting to dataport.\r
+\r
+       //***** Left MB is loaded the same as indicated by FieldModeCurrentMbFlag.\r
+\r
+    // Set message descriptor\r
\r
+       (f0.0)  if      (1)             ELSE_Y_16x4T\r
+    \r
+    // Frame picture\r
+    mov (1)    MSGDSC  MSG_LEN(2)+DWBWMSGDSC+BI_DEST_Y:ud                      // Write 2 GRFs to DEST_Y\r
+\r
+       (f0.1) add (1)  MSGSRC.1:d              MSGSRC.1:d              16:w            // Add vertical offset 16 for bot MB in MBAFF mode\r
+\r
+ELSE_Y_16x4T: \r
+       else    (1)             ENDIF_Y_16x4T\r
+\r
+       // Field picture\r
+    (f0.1) mov (1)     MSGDSC  MSG_LEN(2)+DWBWMSGDSC+ENMSGDSCBF+BI_DEST_Y:ud  // Write 2 GRFs to DEST_Y bottom field\r
+    (-f0.1) mov (1)    MSGDSC  MSG_LEN(2)+DWBWMSGDSC+ENMSGDSCTF+BI_DEST_Y:ud  // Write 2 GRFs to DEST_Y top field\r
+\r
+       asr (1) MSGSRC.1:d              MSGSRC.1:d              1:w                                     // Reduce y by half in field access mode\r
+\r
+       endif\r
+ENDIF_Y_16x4T:\r
+\r
+    send (8)   null:ud         MSGHDR  MSGSRC<8;8,1>:ud        DAPWRITE        MSGDSC\r
+\r
+\r
+\r
diff --git a/i965_drv_video/shaders/h264/ildb/save_Top_UV_8x2.asm b/i965_drv_video/shaders/h264/ildb/save_Top_UV_8x2.asm
new file mode 100644 (file)
index 0000000..5263c35
--- /dev/null
@@ -0,0 +1,52 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Module name: save_Top_UV_8x2.asm\r
+//\r
+// Save UV 8x2 block (8x2U + 8x2V in NV12)\r
+//\r
+//----------------------------------------------------------------\r
+//  Symbols need to be defined before including this module\r
+//\r
+//     Source region in :ud\r
+//     SRC_UD:                 SRC_UD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud                     // 1 GRF\r
+//\r
+//     Binding table index: \r
+//     BI_DEST_UV:             Binding table index of UV surface (NV12)\r
+//\r
+//----------------------------------------------------------------\r
+\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0xDDD5:w\r
+#endif\r
+       \r
+#if defined(_FIELD)\r
+    and.nz.f0.1 (1) NULLREGW   BitFields:w     BotFieldFlag:w                  // Get bottom field flag\r
+#endif\r
+\r
+    mov (1)    MSGSRC.0:ud             ORIX_TOP:w                                      { NoDDClr }                             // Block origin\r
+    asr (1)    MSGSRC.1:ud             ORIY_TOP:w                      1:w             { NoDDClr, NoDDChk }    // NV12 U+V block origin y = half of Y comp\r
+    mov (1)    MSGSRC.2:ud             0x0001000F:ud                           { NoDDChk }                             // NV12 U+V block width and height (16x2)\r
+\r
+       mov (8) MSGPAYLOADD(0,0)<1>             TOP_MB_UD(0)    \r
+       \r
+\r
+#if defined(_PROGRESSIVE) \r
+       mov (1) MSGDSC  MSG_LEN(1)+DWBWMSGDSC_WC+BI_DEST_UV:ud\r
+//    send (8) NULLREG         MSGHDR          MSGSRC<8;8,1>:ud        DWBWMSGDSC+0x00100000+BI_DEST_UV\r
+#endif\r
+\r
+#if defined(_FIELD)\r
+       // Field picture\r
+    (f0.1) mov (1)     MSGDSC  MSG_LEN(1)+DWBWMSGDSC_WC+ENMSGDSCBF+BI_DEST_UV:ud  // Write 1 GRF to DEST_Y bottom field\r
+    (-f0.1) mov (1)    MSGDSC  MSG_LEN(1)+DWBWMSGDSC_WC+ENMSGDSCTF+BI_DEST_UV:ud  // Write 1 GRF to DEST_Y top field\r
+\r
+#endif\r
+\r
+    send (8)   WritebackResponse(0)<1>         MSGHDR  MSGSRC<8;8,1>:ud        DAPWRITE        MSGDSC\r
+// End of save_Top_UV_8x2.asm\r
diff --git a/i965_drv_video/shaders/h264/ildb/save_Top_UV_8x2_Mbaff.asm b/i965_drv_video/shaders/h264/ildb/save_Top_UV_8x2_Mbaff.asm
new file mode 100644 (file)
index 0000000..ef2ba84
--- /dev/null
@@ -0,0 +1,69 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Module name: save_Top_UV_8x2.asm\r
+//\r
+// Save UV 8x2 block (8x2U + 8x2V in NV12)\r
+//\r
+//----------------------------------------------------------------\r
+//  Symbols need to be defined before including this module\r
+//\r
+//     Source region in :ud\r
+//     SRC_UD:                 SRC_UD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud                     // 1 GRF\r
+//\r
+//     Binding table index: \r
+//     BI_DEST_UV:             Binding table index of UV surface (NV12)\r
+//\r
+//----------------------------------------------------------------\r
+\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0xDDD5:w\r
+#endif\r
+       and.z.f0.1 (8) NULLREGW         DualFieldMode<0;1,0>:w          1:w\r
+\r
+    // FieldModeCurrentMbFlag determines how to access above MB\r
+       and.z.f0.0 (1)  null:w          r[ECM_AddrReg, BitFlags]:ub             FieldModeCurrentMbFlag:w                \r
+    \r
+       // Pack U and V\r
+    mov (1)    MSGSRC.0:ud             ORIX_TOP:w                                      { NoDDClr }                     // Block origin\r
+    asr (1)    MSGSRC.1:ud             ORIY_TOP:w                      1:w             { NoDDClr, NoDDChk }    // NV12 U+V block origin y = half of Y comp\r
+    mov (1)    MSGSRC.2:ud             0x0001000F:ud                           { NoDDChk }                             // NV12 U+V block width and height (16x2)\r
+\r
+       // Dual field mode\r
+       (f0.1) mov (8)  MSGPAYLOADD(0)<1>               PREV_MB_UD(0)\r
+    (-f0.1) mov (8) MSGPAYLOADD(0)<1>          PREV_MB_UD(1)   // for dual field mode, write last 2 rows\r
+       \r
+    // Set message descriptor\r
+\r
+    and.nz.f0.1 (1) NULLREGW           BitFields:w   BotFieldFlag:w\r
+    \r
+       (f0.0)  if      (1)             ELSE_UV_8X2_SAVE\r
+\r
+    // Frame picture\r
+    mov (1)    MSGDSC  MSG_LEN(1)+DWBWMSGDSC_WC+BI_DEST_UV:ud                  // Write 1 GRFs to DEST_UV\r
+\r
+       // Add vertical offset 8 for bot MB in MBAFF mode\r
+       (f0.1) add (1)  MSGSRC.1:d              MSGSRC.1:d              8:w             \r
+\r
+ELSE_UV_8X2_SAVE: \r
+       else    (1)             ENDIF_UV_8X2_SAVE\r
+\r
+       asr (1) MSGSRC.1:d              ORIY_CUR:w              2:w                     // asr 1: NV12 U+V block origin y = half of Y comp\r
+                                                                                                               // asr 1: Reduce y by half in field access mode\r
+       // Field picture\r
+    (f0.1) mov (1)     MSGDSC  MSG_LEN(1)+DWBWMSGDSC_WC+ENMSGDSCBF+BI_DEST_UV:ud  // Write 1 GRF to DEST_Y bottom field\r
+    (-f0.1) mov (1)    MSGDSC  MSG_LEN(1)+DWBWMSGDSC_WC+ENMSGDSCTF+BI_DEST_UV:ud  // Write 1 GRF to DEST_Y top field\r
+\r
+       add (1) MSGSRC.1:d              MSGSRC.1:d              -2:w            // for last 4 rows of above MB\r
+\r
+       endif\r
+ENDIF_UV_8X2_SAVE:\r
+\r
+    send (8)   WritebackResponse(0)<1>         MSGHDR          MSGSRC<8;8,1>:ud        DAPWRITE        MSGDSC\r
+\r
+// End of save_Top_UV_8x2.asm\r
diff --git a/i965_drv_video/shaders/h264/ildb/save_Top_Y_16x4.asm b/i965_drv_video/shaders/h264/ildb/save_Top_Y_16x4.asm
new file mode 100644 (file)
index 0000000..8889087
--- /dev/null
@@ -0,0 +1,52 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Module name: save_Top_Y_16x4.asm\r
+//\r
+// Save a Y 16x4 block \r
+//\r
+//----------------------------------------------------------------\r
+//  Symbols need to be defined before including this module\r
+//\r
+//     Source region in :ud\r
+//     SRC_YD:                 SRC_YD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud                     // 2 GRFs\r
+//\r
+//     Binding table index: \r
+//     BI_DEST_Y:              Binding table index of Y surface\r
+//\r
+//----------------------------------------------------------------\r
+\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0xDDD5:w\r
+#endif\r
+\r
+#if defined(_FIELD)\r
+    and.nz.f0.1 (1) NULLREGW   BitFields:w     BotFieldFlag:w                  // Get bottom field flag\r
+#endif\r
+\r
+    mov (2)    MSGSRC.0<1>:ud  ORIX_TOP<2;2,1>:w                       { NoDDClr }             // Block origin\r
+    mov (1)    MSGSRC.2<1>:ud  0x0003000F:ud                           { NoDDChk }             // Block width and height (16x4)\r
+\r
+       // Pack Y    \r
+       mov     (16)    MSGPAYLOADD(0)<1>               TOP_MB_YD(0)                                    // Compressed inst\r
+    \r
+\r
+#if defined(_PROGRESSIVE) \r
+       mov (1) MSGDSC  MSG_LEN(2)+DWBWMSGDSC_WC+BI_DEST_Y:ud\r
+//    send (8) NULLREG         MSGHDR          MSGSRC<8;8,1>:ud        DWBWMSGDSC+0x00200000+BI_DEST_Y\r
+#endif\r
+\r
+#if defined(_FIELD)\r
+       // Field picture\r
+    (f0.1) mov (1)     MSGDSC  MSG_LEN(2)+DWBWMSGDSC_WC+ENMSGDSCBF+BI_DEST_Y:ud  // Write 2 GRFs to DEST_Y bottom field\r
+    (-f0.1) mov (1)    MSGDSC  MSG_LEN(2)+DWBWMSGDSC_WC+ENMSGDSCTF+BI_DEST_Y:ud  // Write 2 GRFs to DEST_Y top field\r
+\r
+#endif\r
+\r
+    send (8)   WritebackResponse(0)<1>         MSGHDR  MSGSRC<8;8,1>:ud        DAPWRITE        MSGDSC\r
+// End of save_Top_Y_16x4.asm\r
diff --git a/i965_drv_video/shaders/h264/ildb/save_Top_Y_16x4_Mbaff.asm b/i965_drv_video/shaders/h264/ildb/save_Top_Y_16x4_Mbaff.asm
new file mode 100644 (file)
index 0000000..d8bb9a7
--- /dev/null
@@ -0,0 +1,69 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Module name: save_Top_Y_16x4.asm\r
+//\r
+// Save a Y 16x4 block \r
+//\r
+//----------------------------------------------------------------\r
+//  Symbols need to be defined before including this module\r
+//\r
+//     Source region in :ud\r
+//     SRC_YD:                 SRC_YD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud                     // 2 GRFs\r
+//\r
+//     Binding table index: \r
+//     BI_DEST_Y:              Binding table index of Y surface\r
+//\r
+//----------------------------------------------------------------\r
+\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0xDDD5:w\r
+#endif\r
+\r
+       and.z.f0.1 (16) NULLREGW                DualFieldMode<0;1,0>:w          1:w\r
+\r
+    // FieldModeCurrentMbFlag determines how to access above MB\r
+       and.z.f0.0 (1)  null:w          r[ECM_AddrReg, BitFlags]:ub             FieldModeCurrentMbFlag:w                \r
+\r
+    mov (2)    MSGSRC.0<1>:ud  ORIX_TOP<2;2,1>:w               { NoDDClr }                     // Block origin\r
+    mov (1)    MSGSRC.2<1>:ud  0x0003000F:ud                   { NoDDChk }                     // Block width and height (16x4)\r
+\r
+       // Pack Y\r
+       // Dual field mode\r
+       (f0.1) mov      (16) MSGPAYLOADD(0)<1>          PREV_MB_YD(0)                           // Compressed inst\r
+    (-f0.1)  mov (16) MSGPAYLOADD(0)<1>                PREV_MB_YD(2)                           // for dual field mode, write last 4 rows\r
+    \r
+    // Set message descriptor\r
+\r
+    and.nz.f0.1 (1) NULLREGW           BitFields:w   BotFieldFlag:w\r
+\r
+       (f0.0)  if      (1)             ELSE_Y_16x4_SAVE\r
+    \r
+    // Frame picture\r
+    mov (1)    MSGDSC  MSG_LEN(2)+DWBWMSGDSC_WC+BI_DEST_Y:ud                   // Write 2 GRFs to DEST_Y\r
+\r
+       // Add vertical offset 16 for bot MB in MBAFF mode\r
+       (f0.1) add (1)  MSGSRC.1:d              MSGSRC.1:d              16:w            \r
+\r
+ELSE_Y_16x4_SAVE: \r
+       else    (1)             ENDIF_Y_16x4_SAVE\r
+\r
+       asr (1) MSGSRC.1:d              ORIY_CUR:w              1:w                                     // Reduce y by half in field access mode\r
+\r
+       // Field picture\r
+    (f0.1) mov (1)     MSGDSC  MSG_LEN(2)+DWBWMSGDSC_WC+ENMSGDSCBF+BI_DEST_Y:ud  // Write 2 GRFs to DEST_Y bottom field\r
+    (-f0.1) mov (1)    MSGDSC  MSG_LEN(2)+DWBWMSGDSC_WC+ENMSGDSCTF+BI_DEST_Y:ud  // Write 2 GRFs to DEST_Y top field\r
+\r
+       add (1) MSGSRC.1:d              MSGSRC.1:d              -4:w    // for last 4 rows of above MB\r
+\r
+       endif\r
+ENDIF_Y_16x4_SAVE:\r
+    \r
+    send (8)   WritebackResponse(0)<1>         MSGHDR          MSGSRC<8;8,1>:ud        DAPWRITE        MSGDSC\r
+\r
+// End of save_Top_Y_16x4.asm\r
diff --git a/i965_drv_video/shaders/h264/ildb/writeURB.asm b/i965_drv_video/shaders/h264/ildb/writeURB.asm
new file mode 100644 (file)
index 0000000..c75b220
--- /dev/null
@@ -0,0 +1,38 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Module name: WriteURB.asm\r
+//\r
+// General purpose module to write data to URB using the URB handle/offset in r0\r
+//\r
+//----------------------------------------------------------------\r
+//     Assume:\r
+//     - a0.0 and a0.1 is meg desc, has been assign with URB offset and msg size\r
+//     - MRFs are alrady assigned with data.\r
+//----------------------------------------------------------------\r
+//\r
+//  16x16 byte pixel block can be saved using just 1 "send" instruction.\r
+\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignature:w                        0x3535:w\r
+#endif\r
+\r
+// URB write header:\r
+//mov (8) MSGSRC.0:ud  r0.0<8;8,1>:ud                  // Copy parent R0 header\r
+\r
+//shr (1)      Temp2_W:uw      URBOffset:uw    1:w     // divide by 2, because URB entry is counted by 512bits.  Offset is counted by 256bits.\r
+//add (1) MSGSRC.0:uw          r0.0:uw         Temp2_W:uw      \r
+\r
+\r
+shr (1)        MSGSRC.0:uw             URBOffset:uw    1:w     // divide by 2, because URB entry is counted by 512bits.  Offset is counted by 256bits.\r
+//mov (1)      MSGSRC.0:uw             URBOffset_2:uw\r
+\r
+//mov (1) MSGSRC.1:ud  0:ud                                    // Reset Handle 1\r
+\r
+send  null:uw  m0        MSGSRC:uw             URBWRITE        URBWriteMsgDesc:ud              // URB write\r
+//send  null:ud        MRF0     null:ud        URBWriteMsgDesc:ud              // URB write    \r
diff --git a/i965_drv_video/shaders/h264/ildb/writeURB_UV_Child.asm b/i965_drv_video/shaders/h264/ildb/writeURB_UV_Child.asm
new file mode 100644 (file)
index 0000000..27d257a
--- /dev/null
@@ -0,0 +1,39 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Module name: WriteURB_Child.asm\r
+//\r
+// General purpose module to write data to URB using the URB handle/offset in r0\r
+//\r
+//----------------------------------------------------------------\r
+//     Assume:\r
+//     - a0.0 and a0.1 is meg desc, has been assign with URB offset and msg size\r
+//     - MRFs are alrady assigned with data.\r
+//----------------------------------------------------------------\r
+//\r
+//  16x16 byte pixel block can be saved using just 1 "send" instruction.\r
+\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0x3535:w\r
+#endif\r
+\r
+// URB write header:\r
+//mov (8) MSGSRC.0:ud  r0.0<8;8,1>:ud                  // Copy parent R0 header\r
+\r
+//shr (1)      Temp2_W:uw      URBOffsetC:uw   1:w     // divide by 2, because URB entry is counted by 512bits.  Offset is counted by 256bits.\r
+//add (1) MSGSRC.0:uw          r0.0:uw         Temp2_W:uw      \r
+\r
+shr (1)        MSGSRC.0:uw             URBOffsetC:uw   1:w     // divide by 2, because URB entry is counted by 512bits.  Offset is counted by 256bits.\r
+\r
+//mov (1) MSGSRC.1:ud  0:ud                                    // Reset Handle 1\r
+\r
+       // URB write 1 MRFs, \r
+       // Current MB offset is in URBOffset, use it as write origin\r
+       // Add 2 to offset to store data be be passed to the right MB\r
+\r
+send  null:uw  m0        MSGSRC:uw             URBWRITE        MSG_LEN(1)+URBWMSGDSC+0x20              // URB write\r
diff --git a/i965_drv_video/shaders/h264/ildb/writeURB_Y_Child.asm b/i965_drv_video/shaders/h264/ildb/writeURB_Y_Child.asm
new file mode 100644 (file)
index 0000000..e51e4ad
--- /dev/null
@@ -0,0 +1,40 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Module name: WriteURB_Child.asm\r
+//\r
+// General purpose module to write data to URB using the URB handle/offset in r0\r
+//\r
+//----------------------------------------------------------------\r
+//     Assume:\r
+//     - a0.0 and a0.1 is meg desc, has been assign with URB offset and msg size\r
+//     - MRFs are alrady assigned with data.\r
+//----------------------------------------------------------------\r
+//\r
+//  16x16 byte pixel block can be saved using just 1 "send" instruction.\r
+\r
+#if defined(_DEBUG) \r
+       mov             (1)             EntrySignatureC:w                       0x3535:w\r
+#endif\r
+\r
+// URB write header:\r
+//mov (8) MSGSRC.0:ud  r0.0<8;8,1>:ud                  // Copy parent R0 header\r
+\r
+//shr (1)      Temp2_W:uw      URBOffsetC:uw   1:w     // divide by 2, because URB entry is counted by 512bits.  Offset is counted by 256bits.\r
+//add (1) MSGSRC.0:uw          r0.0:uw         Temp2_W:uw      \r
+\r
+shr (1)        MSGSRC.0:uw             URBOffsetC:uw   1:w     // divide by 2, because URB entry is counted by 512bits.  Offset is counted by 256bits.\r
+\r
+//mov (1) MSGSRC.1:ud  0:ud                                    // Reset Handle 1\r
+\r
+       // URB write 2 MRFs, \r
+       // Current MB offset is in URBOffset, use it as write origin\r
+       // Add 2 to offset to store data be be passed to the right MB\r
+       //mov (1) URBWriteMsgDesc:ud            0x06300020:ud\r
+\r
+send  null:uw  m0        MSGSRC:uw             URBWRITE        MSG_LEN(2)+URBWMSGDSC+0x20              // URB write\r
diff --git a/i965_drv_video/shaders/h264/mc/AVCMCInter.asm b/i965_drv_video/shaders/h264/mc/AVCMCInter.asm
new file mode 100644 (file)
index 0000000..691fb33
--- /dev/null
@@ -0,0 +1,254 @@
+/*\r
+ * All inter-prediction macroblock kernels \r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Kernel name: AVCMCInter.asm\r
+\r
+#ifdef INTERLABEL\r
+#undef INTERLABEL\r
+#endif\r
+\r
+#if defined(MBAFF)\r
+//                             < MBaff_Motion >\r
+#define INTERLABEL(x)  x##_##MBF\r
+#elif defined(FIELD)\r
+//                             < FieldMB_Motion >\r
+#define INTERLABEL(x)  x##_##FLD\r
+#else // FRAME\r
+//                             < FrameMB_Motion >\r
+#define INTERLABEL(x)  x##_##FRM\r
+#endif\r
+//\r
+// Decoding an inter-prediction macroblock (conditional compile)\r
+//     -DMBAFF : MBAff picture MB\r
+//     -DFRAME : Frame picture MB\r
+//     -DFIELD : Field picture MB\r
+//     -DMBAFF -DMONO : MBAff mono picture MB\r
+//     -DFRAME -DMONO : Frame mono picture MB\r
+//     -DFIELD -DMONO : Field mono picture MB\r
+\r
+\r
+//#if !defined(__AVCMCInter__)         // Make sure this is only included once\r
+//#define __AVCMCInter__\r
+\r
+\r
+// TODO: header files need to be in sync with intra prediction\r
+#include "header.inc"\r
+#include "inter_Header.inc"\r
+\r
+// TODO: Kernel names for mono cases\r
+#if defined(MBAFF)\r
+.kernel MBAff_Motion\r
+MBAFF_MB:\r
+#elif defined(FIELD)\r
+.kernel FieldMB_Motion\r
+FIELD_MB:\r
+#else // Frame\r
+.kernel FrameMB_Motion\r
+FRAME_MB:\r
+#endif\r
+\r
+#ifdef _DEBUG\r
+// WA for FULSIM so we'll know which kernel is being debugged\r
+#if defined(MBAFF)\r
+mov (1) acc0:ud 0x0aaa55a5:ud\r
+#elif defined(FIELD)\r
+mov (1) acc0:ud 0x0baa55a5:ud\r
+#else // Frame\r
+mov (1) acc0:ud 0x0caa55a5:ud\r
+#endif\r
+#endif\r
+\r
+\r
+#ifdef SW_SCOREBOARD\r
+    CALL(scoreboard_start_inter,1)\r
+#endif\r
+\r
+       mov (8)         gMSGSRC<1>:ud           r0.0<8;8,1>:ud          // Initialize message header payload with R0\r
+       \r
+       and (1)         gwMBTYPE<1>                     gMBTYPE:ub                                              nMBTYPE_MASK:w          // MB type\r
+       shl (2)         gX<1>:w                         gORIX<2;2,1>:ub                                 4:w // Convert MB origin to pixel unit\r
+       \r
+//     #include "process_inter16x16.asm"                                       // Handle B_L0_16x16 case with zero MVs and weighted pred off.\r
+       // In the case of B_L0_16x16 with zero MVs and weighted pred off, the kernel jumps to INTERLABEL(EXIT_LOOP).\r
+       \r
+INTERLABEL(INIT_MBPARA):\r
+       #include "initialize_MBPara.asm"\r
+       \r
+       \r
+    //========================= BEGIN - LOOP_SUBMB ===========================\r
+       mov (1)         gLOOP_SUBMB:uw          0:uw                            // 0, 2, 4, 6\r
+INTERLABEL(LOOP_SUBMB):\r
+\r
+       //========================== BEGIN - LOOP_DIR ============================\r
+       // Prediction flag (gPREDFLAG - 0:Pred_L0, 1:Pred_L1, 2:BiPred)\r
+       asr (1)         gPREDFLAG:w                     gSUBMB_MODE:ub                                  gLOOP_SUBMB:uw\r
+       mov (1)         gLOOP_DIR:uw            1:uw                            // 1, 0\r
+       and (1)         gPREDFLAG:w                     gPREDFLAG:w                                             0x3:w\r
+INTERLABEL(LOOP_DIR):\r
+\r
+       cmp.e.f0.0 (1) null:w                   gLOOP_DIR:w                                             gPREDFLAG:w     \r
+       (f0.0) jmpi     INTERLABEL(LOOP_DIR_CONTINUE)   \r
+       \r
+    // Get binding table index \r
+    // & reference picture parity (gREFPARITY - 0:top, 0x100:bottom, x:frame)\r
+    // & address of interpolation result\r
+    cmp.e.f0.1 (1) null:w                      gLOOP_DIR:w                                             1:w\r
+    (f0.1) mov (1)             gpINTP:ud                       nOFFSET_INTP0:ud                                                {NoDDClr} //\r
+    (f0.1) and (1)             gBIDX:w                         r[pBIDX]:ub                                             0x7f:w  {NoDDChk} //\r
+    (-f0.1) mov (1)            gpINTP:ud                       nOFFSET_INTP1:ud                                                {NoDDClr} //\r
+       (-f0.1) and (1)         gBIDX:w                         r[pBIDX,4]:ub                                   0x7f:w  {NoDDChk} //\r
+#if defined(MBAFF) || defined(FIELD)\r
+    (f0.1) and (1)             gREFPARITY:w                            r[pBIDX]:ub                                             0x80:w\r
+    (-f0.1) and (1)            gREFPARITY:w                            r[pBIDX,4]:ub                                   0x80:w\r
+    shl (1)            gREFPARITY:w            gREFPARITY<0;1,0>:w                             1:w\r
+#endif\r
+\r
+       // Sub MB shape\r
+       asr (1)         gSHAPETEMP:w            gSUBMB_SHAPE:ub                                 gLOOP_SUBMB:w\r
+       \r
+    // Chroma MV adjustment & Set message descriptor for frame/field read\r
+#if defined(MBAFF)\r
+       #include "chromaMVAdjust.asm" \r
+    and.nz.f0.0 (1) null:uw                    gFIELDMBFLAG:ub                                 nFIELDMB_MASK:uw\r
+    (f0.0) add (1) gD0:ud                      gBIDX:uw                                                nDWBRMSGDSC_SC_TF:ud\r
+    (-f0.0) add (1)    gMSGDSC_R:ud    gBIDX:uw                                                nDWBRMSGDSC_SC:ud\r
+    (f0.0) add (1) gMSGDSC_R:ud                gD0:ud                                                  gREFPARITY:uw\r
+#elif defined(FIELD)\r
+       #include "chromaMVAdjust.asm" \r
+    add (1)            gMSGDSC_R:ud            gBIDX:uw                                                nDWBRMSGDSC_SC_TF:ud\r
+    add (1)            gMSGDSC_R:ud            gMSGDSC_R:ud                                    gREFPARITY:uw\r
+#else // FRAME\r
+       add (1)         gMSGDSC_R:ud            gBIDX:uw                                                nDWBRMSGDSC_SC:ud\r
+#endif\r
+\r
+       and.nz.f0.1 (1) null:w                  gSHAPETEMP:w                                    3:w     \r
+       (f0.1) jmpi INTERLABEL(PROCESS4x4)\r
+       \r
+       //======================== BEGIN - PROCESS 8x8 ===========================\r
+       \r
+       // Reference block load\r
+       #include "loadRef_Y_16x13.asm"\r
+#ifndef MONO\r
+#if defined(MBAFF) || defined(FIELD)\r
+       add (1)         r[pMV,2]:w                      r[pMV,2]:w                                              gCHRMVADJ:w\r
+#endif\r
+       #include "loadRef_C_10x5.asm"\r
+#endif\r
+\r
+       // Interpolation\r
+       //CALL_INTER(INTERLABEL(Interpolate_Y_8x8_Func), 1)\r
+       #include "interpolate_Y_8x8.asm"\r
+#ifndef MONO\r
+       //CALL_INTER(INTERLABEL(Interpolate_C_4x4_Func), 1)\r
+       #include "interpolate_C_4x4.asm"\r
+#endif\r
+\r
+       jmpi INTERLABEL(ROUND_SHIFT_C)\r
+       //========================= END - PROCESS 8x8 ============================\r
+       \r
+       //======================== BEGIN - LOOP_SUBMBPT ==========================\r
+INTERLABEL(PROCESS4x4):\r
+\r
+       mov (1)         gLOOP_SUBMBPT:uw        4:uw                            // 4, 3, 2, 1\r
+INTERLABEL(LOOP_SUBMBPT):\r
+\r
+       // Reference block load\r
+       #include "loadRef_Y_16x9.asm"\r
+#ifndef MONO\r
+#if defined(MBAFF) || defined(FIELD)\r
+       add (1)         r[pMV,2]:w                      r[pMV,2]:w                                              gCHRMVADJ:w\r
+#endif\r
+       #include "loadRef_C_6x3.asm"\r
+#endif\r
+\r
+       // Interpolation\r
+       #include "interpolate_Y_4x4.asm"\r
+#ifndef MONO\r
+       #include "interpolate_C_2x2.asm"\r
+#endif\r
+       \r
+       cmp.e.f0.0 (1) null:w                   gLOOP_SUBMBPT:uw                                3:w\r
+       add.z.f0.1 (1) gLOOP_SUBMBPT:uw gLOOP_SUBMBPT:uw                                -1:w\r
+       add (1)         pMV:w                           pMV:w                                                   8:w     \r
+       (-f0.0) add (1) gpINTP:ud               gpINTP:ud                                               0x00080008:ud   // 8 & 8\r
+       (f0.0) add (1) gpINTP:ud                gpINTP:ud                                               0x00180038:ud   // 24 & 56\r
+       (-f0.1) jmpi INTERLABEL(LOOP_SUBMBPT)\r
+    \r
+    cmp.e.f0.1 null:w                          gLOOP_DIR:w                                             1:w\r
+    add (1)            pMV:w                           pMV:w                                                   -32:w\r
+    (f0.1) mov (1) gpINTP:ud           nOFFSET_INTP0:ud\r
+    (-f0.1) mov (1) gpINTP:ud          nOFFSET_INTP1:ud\r
+\r
+       mov (1)         pRESULT:uw                                      gpINTPC:uw\r
+    \r
+       //========================= END - LOOP_SUBMBPT ===========================\r
+    \r
+INTERLABEL(ROUND_SHIFT_C):\r
+    \r
+#ifndef MONO\r
+       #include "roundShift_C_4x4.asm"\r
+#endif\r
+\r
+INTERLABEL(LOOP_DIR_CONTINUE):\r
+       \r
+       add.nz.f0.1 (1) gLOOP_DIR:uw    gLOOP_DIR:uw                                    -1:w\r
+       add (1)         pMV:w                           pMV:w                                                   4:w\r
+    (-f0.1) jmpi INTERLABEL(LOOP_DIR)\r
+    //=========================== END - LOOP_DIR =============================\r
+\r
+INTERLABEL(Weighted_Prediction):\r
+       #include "weightedPred.asm"\r
+       \r
+       and.z.f0.1 (16) null<1>:w               gLOOP_SUBMB<0;1,0>:uw                   2:w\r
+\r
+       #include "recon_Y_8x8.asm"\r
+#ifndef MONO\r
+       #include "recon_C_4x4.asm"\r
+\r
+       (-f0.1) add (1) pERRORC:w               pERRORC:w                                               48:w\r
+#endif\r
+\r
+       cmp.e.f0.1 (1) null:w                   gLOOP_SUBMB:uw                                  6:w\r
+       add (1)         gLOOP_SUBMB:uw          gLOOP_SUBMB:uw                                  2:w\r
+\r
+       add (1)         pWGT_BIDX:ud            pWGT_BIDX:ud                                    0x00100001:ud   // 12 & 1\r
+       add (1)         pMV:w                           pMV:w                                                   gMVSTEP:w\r
+\r
+    (-f0.1) jmpi INTERLABEL(LOOP_SUBMB)\r
+    //========================== END - LOOP_SUBMB ============================\r
+    \r
+INTERLABEL(EXIT_LOOP):   \r
+       #include "writeRecon_YC.asm"    \r
+\r
+#ifdef SW_SCOREBOARD    \r
+       wait    n0:ud           //      Now wait for scoreboard to response\r
+    #include "Soreboard_update.asm"    // scorboard update function\r
+#else\r
+// Check for write commit first if SW scoreboard is disabled\r
+       mov     (1)     gREG_WRITE_COMMIT_Y<1>:ud       gREG_WRITE_COMMIT_Y<0;1,0>:ud           // Make sure Y write is committed\r
+       mov     (1)     gREG_WRITE_COMMIT_UV<1>:ud      gREG_WRITE_COMMIT_UV<0;1,0>:ud          // Make sure U/V write is committed\r
+#endif\r
+\r
+// Terminate the thread\r
+//\r
+    END_THREAD\r
+\r
+\r
+//#include "Interpolate_Y_8x8_Func.asm"\r
+//#include "Interpolate_C_4x4_Func.asm"\r
+//#include "WeightedPred_Y_Func.asm"   \r
+//#include "WeightedPred_C_Func.asm"   \r
+\r
+\r
+.end_code\r
+\r
+.end_kernel\r
+\r
+        \r
+//#endif       // !defined(__AVCMCInter__)\r
diff --git a/i965_drv_video/shaders/h264/mc/AllAVC.asm b/i965_drv_video/shaders/h264/mc/AllAVC.asm
new file mode 100644 (file)
index 0000000..a9149ff
--- /dev/null
@@ -0,0 +1,426 @@
+/*\r
+ * All HWMC kernels \r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+\r
+// Kernel name: AllAVC.asm\r
+//\r
+// All HWMC kernels merged into this file\r
+//\r
+//  $Revision: 2 $\r
+//  $Date: 9/10/06 2:02a $\r
+//\r
+\r
+// Note: To enable SW scoreboard for ILK AVC kernels, simply toggle the HW_SCOREBOARD \r
+//              and SW_SCOREBOARD definition as described below.\r
+//\r
+// ----------------------------------------------------\r
+//  Main: ALLINTRA\r
+// ----------------------------------------------------\r
+\r
+#define        COMBINED_KERNEL\r
+//#define      ENABLE_ILDB\r
+\r
+//     WA for *Stim tool issue, should be removed later\r
+\r
+#ifdef DEV_ILK\r
+#define INSTFACTOR     2       // 128-bit count as 2 instructions\r
+#else\r
+#define INSTFACTOR     1       // 128-bit is 1 instruction\r
+#endif // DEV_ILK\r
+\r
+#ifdef DEV_CTG\r
+  #define SW_SCOREBOARD                // SW Scoreboard should be enabled for CTG and earlier\r
+  #undef HW_SCOREBOARD         // HW Scoreboard should be disabled for CTG and earlier\r
+#else\r
+  #define HW_SCOREBOARD                // HW Scoreboard should be enabled for ILK and beyond\r
+  #undef SW_SCOREBOARD         // SW Scoreboard should be disabled for ILK and beyond\r
+#endif // DEV_CTG\r
+#include "export.inc"\r
+#if defined(_EXPORT)\r
+       #include "AllAVC_Export.inc"\r
+#elif defined(_BUILD)\r
+       #include "AllAVC.ich"                   // ISAasm dumped .exports\r
+       #include "AllAVC_Export.inc"    // Keep jumping targets aligned, only for CTG and beyond\r
+       #include "AllAVC_Build.inc"\r
+#else\r
+#endif\r
+\r
+.kernel AllAVC\r
+\r
+// Build all intra prediction kernels\r
+//\r
+#ifdef INTRA_16x16_PAD_NENOP\r
+    $for(0; <INTRA_16x16_PAD_NENOP; 1) {\r
+       nenop\r
+       }\r
+#endif\r
+#ifdef INTRA_16x16_PAD_NOP\r
+    $for(0; <INTRA_16x16_PAD_NOP; 1) {\r
+       nop\r
+       }\r
+#endif\r
+    #include "Intra_16x16.asm"\r
+\r
+#ifdef INTRA_8x8_PAD_NENOP\r
+    $for(0; <INTRA_8x8_PAD_NENOP; 1) {\r
+       nenop\r
+       }\r
+#endif\r
+#ifdef INTRA_8x8_PAD_NOP\r
+    $for(0; <INTRA_8x8_PAD_NOP; 1) {\r
+       nop\r
+       }\r
+#endif\r
+    #include "Intra_8x8.asm"\r
+\r
+#ifdef INTRA_4x4_PAD_NENOP\r
+    $for(0; <INTRA_4x4_PAD_NENOP; 1) {\r
+       nenop\r
+       }\r
+#endif\r
+#ifdef INTRA_4x4_PAD_NOP\r
+    $for(0; <INTRA_4x4_PAD_NOP; 1) {\r
+       nop\r
+       }\r
+#endif\r
+    #include "Intra_4x4.asm"\r
+\r
+#ifdef INTRA_PCM_PAD_NENOP\r
+    $for(0; <INTRA_PCM_PAD_NENOP; 1) {\r
+       nenop\r
+       }\r
+#endif\r
+#ifdef INTRA_PCM_PAD_NOP\r
+    $for(0; <INTRA_PCM_PAD_NOP; 1) {\r
+       nop\r
+       }\r
+#endif\r
+    #include "Intra_PCM.asm"\r
+\r
+// Build FrameMB_Motion kernel\r
+//\r
+#define FRAME\r
+\r
+  #ifdef FRAME_MB_PAD_NENOP\r
+    $for(0; <FRAME_MB_PAD_NENOP; 1) {\r
+       nenop\r
+       }\r
+  #endif\r
+  #ifdef FRAME_MB_PAD_NOP\r
+    $for(0; <FRAME_MB_PAD_NOP; 1) {\r
+       nop\r
+       }\r
+  #endif\r
+    #include "AVCMCInter.asm"\r
+#undef  FRAME\r
+\r
+// Build FieldMB_Motion kernel\r
+//\r
+#define FIELD\r
+\r
+  #ifdef FIELD_MB_PAD_NENOP\r
+    $for(0; <FIELD_MB_PAD_NENOP; 1) {\r
+       nenop\r
+       }\r
+  #endif\r
+  #ifdef FIELD_MB_PAD_NOP\r
+    $for(0; <FIELD_MB_PAD_NOP; 1) {\r
+       nop\r
+       }\r
+  #endif\r
+    #include "AVCMCInter.asm"\r
+#undef  FIELD\r
+\r
+// Build MBAff_Motion kernel\r
+//\r
+#define MBAFF\r
+\r
+  #ifdef MBAFF_MB_PAD_NENOP\r
+    $for(0; <MBAFF_MB_PAD_NENOP; 1) {\r
+       nenop\r
+       }\r
+  #endif\r
+  #ifdef MBAFF_MB_PAD_NOP\r
+    $for(0; <MBAFF_MB_PAD_NOP; 1) {\r
+       nop\r
+       }\r
+  #endif\r
+    #include "AVCMCInter.asm"\r
+#undef  MBAFF\r
+\r
+#ifdef SW_SCOREBOARD    \r
+\r
+// SW scoreboard kernel for non-MBAFF\r
+//\r
+#ifdef SCOREBOARD_PAD_NENOP\r
+    $for(0; <SCOREBOARD_PAD_NENOP; 1) {\r
+       nenop\r
+       }\r
+#endif\r
+#ifdef SCOREBOARD_PAD_NOP\r
+    $for(0; <SCOREBOARD_PAD_NOP; 1) {\r
+       nop\r
+       }\r
+#endif\r
+    #include "scoreboard.asm"\r
+\r
+//     SW scoreboard kernel for MBAFF\r
+\r
+#ifdef SCOREBOARD_MBAFF_PAD_NENOP\r
+    $for(0; <SCOREBOARD_MBAFF_PAD_NENOP; 1) {\r
+       nenop\r
+       }\r
+#endif\r
+#ifdef SCOREBOARD_MBAFF_PAD_NOP\r
+    $for(0; <SCOREBOARD_MBAFF_PAD_NOP; 1) {\r
+       nop\r
+       }\r
+#endif\r
+    #include "scoreboard_MBAFF.asm"\r
+\r
+#elif defined(HW_SCOREBOARD)\r
\r
+// SetHWscoreboard kernel for non-MBAFF\r
+//\r
+#ifdef SETHWSCOREBOARD_PAD_NENOP\r
+    $for(0; <SETHWSCOREBOARD_PAD_NENOP; 1) {\r
+       nenop\r
+       }\r
+#endif\r
+#ifdef SETHWSCOREBOARD_PAD_NOP\r
+    $for(0; <SETHWSCOREBOARD_PAD_NOP; 1) {\r
+       nop\r
+       }\r
+#endif\r
+    #include "SetHWScoreboard.asm"\r
+\r
+//     SetHWscoreboard kernel for MBAFF\r
+\r
+#ifdef SETHWSCOREBOARD_MBAFF_PAD_NENOP\r
+    $for(0; <SETHWSCOREBOARD_MBAFF_PAD_NENOP; 1) {\r
+       nenop\r
+       }\r
+#endif\r
+#ifdef SETHWSCOREBOARD_MBAFF_PAD_NOP\r
+    $for(0; <SETHWSCOREBOARD_MBAFF_PAD_NOP; 1) {\r
+       nop\r
+       }\r
+#endif\r
+    #include "SetHWScoreboard_MBAFF.asm"\r
+\r
+#endif // SW_SCOREBOARD\r
+\r
+#ifdef BSDRESET_PAD_NENOP\r
+    $for(0; <BSDRESET_PAD_NENOP; 1) {\r
+       nenop\r
+       }\r
+#endif\r
+#ifdef BSDRESET_PAD_NOP\r
+    $for(0; <BSDRESET_PAD_NOP; 1) {\r
+       nop\r
+       }\r
+#endif\r
+    #include "BSDReset.asm"\r
+\r
+#ifdef DCRESETDUMMY_PAD_NENOP\r
+    $for(0; <DCRESETDUMMY_PAD_NENOP; 1) {\r
+       nenop\r
+       }\r
+#endif\r
+#ifdef DCRESETDUMMY_PAD_NOP\r
+    $for(0; <DCRESETDUMMY_PAD_NOP; 1) {\r
+       nop\r
+       }\r
+#endif\r
+    #include "DCResetDummy.asm"\r
+\r
+#ifdef ENABLE_ILDB\r
+\r
+// Build all ILDB kernels\r
+//\r
+//     Undefine some previous defined symbols since they will be re-defined/re-declared in ILDB kernels\r
+#undef A\r
+#undef B\r
+#undef p0\r
+#undef p1\r
+\r
+#define MSGPAYLOADB MSGPAYLOADB_ILDB\r
+#define MSGPAYLOADW MSGPAYLOADW_ILDB\r
+#define MSGPAYLOADD MSGPAYLOADD_ILDB\r
+#define MSGPAYLOADF MSGPAYLOADF_ILDB\r
+\r
+//                             < Frame ILDB >\r
+#define _PROGRESSIVE\r
+#define ILDB_LABEL(x)  x##_ILDB_FRAME\r
+#ifdef AVC_ILDB_ROOT_Y_ILDB_FRAME_PAD_NENOP\r
+    $for(0; <AVC_ILDB_ROOT_Y_ILDB_FRAME_PAD_NENOP; 1) {\r
+       nenop\r
+       }\r
+#endif\r
+#ifdef AVC_ILDB_ROOT_Y_ILDB_FRAME_PAD_NOP\r
+    $for(0; <AVC_ILDB_ROOT_Y_ILDB_FRAME_PAD_NOP; 1) {\r
+       nop\r
+       }\r
+#endif\r
+    #include "AVC_ILDB_Root_Y.asm"\r
+\r
+#ifdef AVC_ILDB_CHILD_Y_ILDB_FRAME_PAD_NENOP\r
+    $for(0; <AVC_ILDB_CHILD_Y_ILDB_FRAME_PAD_NENOP; 1) {\r
+       nenop\r
+       }\r
+#endif\r
+#ifdef AVC_ILDB_CHILD_Y_ILDB_FRAME_PAD_NOP\r
+    $for(0; <AVC_ILDB_CHILD_Y_ILDB_FRAME_PAD_NOP; 1) {\r
+       nop\r
+       }\r
+#endif\r
+    #include "AVC_ILDB_Child_Y.asm"\r
+\r
+#ifdef AVC_ILDB_ROOT_UV_ILDB_FRAME_PAD_NENOP\r
+    $for(0; <AVC_ILDB_ROOT_UV_ILDB_FRAME_PAD_NENOP; 1) {\r
+       nenop\r
+       }\r
+#endif\r
+#ifdef AVC_ILDB_ROOT_UV_ILDB_FRAME_PAD_NOP\r
+    $for(0; <AVC_ILDB_ROOT_UV_ILDB_FRAME_PAD_NOP; 1) {\r
+       nop\r
+       }\r
+#endif\r
+    #include "AVC_ILDB_Root_UV.asm"\r
+\r
+#ifdef AVC_ILDB_CHILD_UV_ILDB_FRAME_PAD_NENOP\r
+    $for(0; <AVC_ILDB_CHILD_UV_ILDB_FRAME_PAD_NENOP; 1) {\r
+       nenop\r
+       }\r
+#endif\r
+#ifdef AVC_ILDB_CHILD_UV_ILDB_FRAME_PAD_NOP\r
+    $for(0; <AVC_ILDB_CHILD_UV_ILDB_FRAME_PAD_NOP; 1) {\r
+       nop\r
+       }\r
+#endif\r
+    #include "AVC_ILDB_Child_UV.asm"\r
+#undef ILDB_LABEL\r
+#undef _PROGRESSIVE\r
+\r
+//                             < Field ILDB >\r
+#define _FIELD\r
+#define ILDB_LABEL(x)  x##_ILDB_FIELD\r
+#ifdef AVC_ILDB_ROOT_Y_ILDB_FIELD_PAD_NENOP\r
+    $for(0; <AVC_ILDB_ROOT_Y_ILDB_FIELD_PAD_NENOP; 1) {\r
+       nenop\r
+       }\r
+#endif\r
+#ifdef AVC_ILDB_ROOT_Y_ILDB_FIELD_PAD_NOP\r
+    $for(0; <AVC_ILDB_ROOT_Y_ILDB_FIELD_PAD_NOP; 1) {\r
+       nop\r
+       }\r
+#endif\r
+    #include "AVC_ILDB_Root_Field_Y.asm"\r
+\r
+#ifdef AVC_ILDB_CHILD_Y_ILDB_FIELD_PAD_NENOP\r
+    $for(0; <AVC_ILDB_CHILD_Y_ILDB_FIELD_PAD_NENOP; 1) {\r
+       nenop\r
+       }\r
+#endif\r
+#ifdef AVC_ILDB_CHILD_Y_ILDB_FIELD_PAD_NOP\r
+    $for(0; <AVC_ILDB_CHILD_Y_ILDB_FIELD_PAD_NOP; 1) {\r
+       nop\r
+       }\r
+#endif\r
+    #include "AVC_ILDB_Child_Field_Y.asm"\r
+\r
+#ifdef AVC_ILDB_ROOT_UV_ILDB_FIELD_PAD_NENOP\r
+    $for(0; <AVC_ILDB_ROOT_UV_ILDB_FIELD_PAD_NENOP; 1) {\r
+       nenop\r
+       }\r
+#endif\r
+#ifdef AVC_ILDB_ROOT_UV_ILDB_FIELD_PAD_NOP\r
+    $for(0; <AVC_ILDB_ROOT_UV_ILDB_FIELD_PAD_NOP; 1) {\r
+       nop\r
+       }\r
+#endif\r
+    #include "AVC_ILDB_Root_Field_UV.asm"\r
+\r
+#ifdef AVC_ILDB_CHILD_UV_ILDB_FIELD_PAD_NENOP\r
+    $for(0; <AVC_ILDB_CHILD_UV_ILDB_FIELD_PAD_NENOP; 1) {\r
+       nenop\r
+       }\r
+#endif\r
+#ifdef AVC_ILDB_CHILD_UV_ILDB_FIELD_PAD_NOP\r
+    $for(0; <AVC_ILDB_CHILD_UV_ILDB_FIELD_PAD_NOP; 1) {\r
+       nop\r
+       }\r
+#endif\r
+    #include "AVC_ILDB_Child_Field_UV.asm"\r
+#undef ILDB_LABEL\r
+#undef _FIELD\r
+\r
+//                             < MBAFF Frame ILDB >\r
+#define _MBAFF\r
+#define ILDB_LABEL(x)  x##_ILDB_MBAFF\r
+#ifdef AVC_ILDB_ROOT_Y_ILDB_MBAFF_PAD_NENOP\r
+    $for(0; <AVC_ILDB_ROOT_Y_ILDB_MBAFF_PAD_NENOP; 1) {\r
+       nenop\r
+       }\r
+#endif\r
+#ifdef AVC_ILDB_ROOT_Y_ILDB_MBAFF_PAD_NOP\r
+    $for(0; <AVC_ILDB_ROOT_Y_ILDB_MBAFF_PAD_NOP; 1) {\r
+       nop\r
+       }\r
+#endif\r
+    #include "AVC_ILDB_Root_Mbaff_Y.asm"\r
+\r
+#ifdef AVC_ILDB_CHILD_Y_ILDB_MBAFF_PAD_NENOP\r
+    $for(0; <AVC_ILDB_CHILD_Y_ILDB_MBAFF_PAD_NENOP; 1) {\r
+       nenop\r
+       }\r
+#endif\r
+#ifdef AVC_ILDB_CHILD_Y_ILDB_MBAFF_PAD_NOP\r
+    $for(0; <AVC_ILDB_CHILD_Y_ILDB_MBAFF_PAD_NOP; 1) {\r
+       nop\r
+       }\r
+#endif\r
+    #include "AVC_ILDB_Child_Mbaff_Y.asm"\r
+\r
+#ifdef AVC_ILDB_ROOT_UV_ILDB_MBAFF_PAD_NENOP\r
+    $for(0; <AVC_ILDB_ROOT_UV_ILDB_MBAFF_PAD_NENOP; 1) {\r
+       nenop\r
+       }\r
+#endif\r
+#ifdef AVC_ILDB_ROOT_UV_ILDB_MBAFF_PAD_NOP\r
+    $for(0; <AVC_ILDB_ROOT_UV_ILDB_MBAFF_PAD_NOP; 1) {\r
+       nop\r
+       }\r
+#endif\r
+    #include "AVC_ILDB_Root_Mbaff_UV.asm"\r
+\r
+#ifdef AVC_ILDB_CHILD_UV_ILDB_MBAFF_PAD_NENOP\r
+    $for(0; <AVC_ILDB_CHILD_UV_ILDB_MBAFF_PAD_NENOP; 1) {\r
+       nenop\r
+       }\r
+#endif\r
+#ifdef AVC_ILDB_CHILD_UV_ILDB_MBAFF_PAD_NOP\r
+    $for(0; <AVC_ILDB_CHILD_UV_ILDB_MBAFF_PAD_NOP; 1) {\r
+       nop\r
+       }\r
+#endif\r
+    #include "AVC_ILDB_Child_Mbaff_UV.asm"\r
+#undef ILDB_LABEL\r
+#undef _MBAFF\r
+\r
+#endif         // ENABLE_ILDB\r
+\r
+AllAVC_END:\r
+nop\r
+// End of AllAVC\r
+\r
+.end_code\r
+\r
+.end_kernel\r
+\r
diff --git a/i965_drv_video/shaders/h264/mc/AllAVCField.asm b/i965_drv_video/shaders/h264/mc/AllAVCField.asm
new file mode 100644 (file)
index 0000000..88240c3
--- /dev/null
@@ -0,0 +1,70 @@
+/*\r
+ * All field picture HWMC kernels \r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+//     2857702934      // 0xAA551616 - GUID for Intra_16x16 luma prediction mode offsets\r
+//    0    // Offset to Intra_16x16 luma prediction mode 0\r
+//    9    // Offset to Intra_16x16 luma prediction mode 1\r
+//   19    // Offset to Intra_16x16 luma prediction mode 2\r
+//   42    // Offset to Intra_16x16 luma prediction mode 3\r
+//     2857699336      // 0xAA550808 - GUID for Intra_8x8 luma prediction mode offsets\r
+//    0    // Offset to Intra_8x8 luma prediction mode 0\r
+//    5    // Offset to Intra_8x8 luma prediction mode 1\r
+//   10    // Offset to Intra_8x8 luma prediction mode 2\r
+//   26    // Offset to Intra_8x8 luma prediction mode 3\r
+//   36    // Offset to Intra_8x8 luma prediction mode 4\r
+//   50    // Offset to Intra_8x8 luma prediction mode 5\r
+//   68    // Offset to Intra_8x8 luma prediction mode 6\r
+//   85    // Offset to Intra_8x8 luma prediction mode 7\r
+//   95    // Offset to Intra_8x8 luma prediction mode 8\r
+//     2857698308      // 0xAA550404 - GUID for Intra_4x4 luma prediction mode offsets\r
+//    0    // Offset to Intra_4x4 luma prediction mode 0\r
+//    2    // Offset to Intra_4x4 luma prediction mode 1\r
+//    4    // Offset to Intra_4x4 luma prediction mode 2\r
+//   16    // Offset to Intra_4x4 luma prediction mode 3\r
+//   23    // Offset to Intra_4x4 luma prediction mode 4\r
+//   32    // Offset to Intra_4x4 luma prediction mode 5\r
+//   45    // Offset to Intra_4x4 luma prediction mode 6\r
+//   59    // Offset to Intra_4x4 luma prediction mode 7\r
+//   66    // Offset to Intra_4x4 luma prediction mode 8\r
+//     2857700364      // 0xAA550C0C - GUID for intra chroma prediction mode offsets\r
+//    0    // Offset to intra chroma prediction mode 0\r
+//   30    // Offset to intra chroma prediction mode 1\r
+//   36    // Offset to intra chroma prediction mode 2\r
+//   41    // Offset to intra chroma prediction mode 3\r
+\r
+// Kernel name: AllAVCField.asm\r
+//\r
+// All field picture HWMC kernels merged into this file\r
+//\r
+//  $Revision: 1 $\r
+//  $Date: 4/13/06 4:35p $\r
+//\r
+\r
+// ----------------------------------------------------\r
+//  Main: AllAVCField\r
+// ----------------------------------------------------\r
+\r
+#define        ALLHWMC\r
+#define        COMBINED_KERNEL\r
+\r
+.kernel AllAVCField\r
+\r
+    #include "Intra_PCM.asm"\r
+    #include "Intra_16x16.asm"\r
+    #include "Intra_8x8.asm"\r
+    #include "Intra_4x4.asm"\r
+    #include "scoreboard.asm"\r
+\r
+       #define FIELD\r
+       #include "AVCMCInter.asm"\r
+\r
+// End of AllAVCField\r
+\r
+.end_kernel\r
+\r
diff --git a/i965_drv_video/shaders/h264/mc/AllAVCFrame.asm b/i965_drv_video/shaders/h264/mc/AllAVCFrame.asm
new file mode 100644 (file)
index 0000000..8871627
--- /dev/null
@@ -0,0 +1,69 @@
+/*\r
+ * All frame picture HWMC kernels \r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+//     2857702934      // 0xAA551616 - GUID for Intra_16x16 luma prediction mode offsets\r
+//    0    // Offset to Intra_16x16 luma prediction mode 0\r
+//    9    // Offset to Intra_16x16 luma prediction mode 1\r
+//   19    // Offset to Intra_16x16 luma prediction mode 2\r
+//   42    // Offset to Intra_16x16 luma prediction mode 3\r
+//     2857699336      // 0xAA550808 - GUID for Intra_8x8 luma prediction mode offsets\r
+//    0    // Offset to Intra_8x8 luma prediction mode 0\r
+//    5    // Offset to Intra_8x8 luma prediction mode 1\r
+//   10    // Offset to Intra_8x8 luma prediction mode 2\r
+//   26    // Offset to Intra_8x8 luma prediction mode 3\r
+//   36    // Offset to Intra_8x8 luma prediction mode 4\r
+//   50    // Offset to Intra_8x8 luma prediction mode 5\r
+//   68    // Offset to Intra_8x8 luma prediction mode 6\r
+//   85    // Offset to Intra_8x8 luma prediction mode 7\r
+//   95    // Offset to Intra_8x8 luma prediction mode 8\r
+//     2857698308      // 0xAA550404 - GUID for Intra_4x4 luma prediction mode offsets\r
+//    0    // Offset to Intra_4x4 luma prediction mode 0\r
+//    2    // Offset to Intra_4x4 luma prediction mode 1\r
+//    4    // Offset to Intra_4x4 luma prediction mode 2\r
+//   16    // Offset to Intra_4x4 luma prediction mode 3\r
+//   23    // Offset to Intra_4x4 luma prediction mode 4\r
+//   32    // Offset to Intra_4x4 luma prediction mode 5\r
+//   45    // Offset to Intra_4x4 luma prediction mode 6\r
+//   59    // Offset to Intra_4x4 luma prediction mode 7\r
+//   66    // Offset to Intra_4x4 luma prediction mode 8\r
+//     2857700364      // 0xAA550C0C - GUID for intra chroma prediction mode offsets\r
+//    0    // Offset to intra chroma prediction mode 0\r
+//   30    // Offset to intra chroma prediction mode 1\r
+//   36    // Offset to intra chroma prediction mode 2\r
+//   41    // Offset to intra chroma prediction mode 3\r
+\r
+// Kernel name: AllAVCFrame.asm\r
+//\r
+// All frame picture HWMC kernels merged into this file\r
+//\r
+//  $Revision: 1 $\r
+//  $Date: 4/13/06 4:35p $\r
+//\r
+\r
+// ----------------------------------------------------\r
+//  Main: AllAVCFrame\r
+// ----------------------------------------------------\r
+\r
+#define        ALLHWMC\r
+#define        COMBINED_KERNEL\r
+\r
+.kernel AllAVCFrame\r
+\r
+    #include "Intra_PCM.asm"\r
+    #include "Intra_16x16.asm"\r
+    #include "Intra_8x8.asm"\r
+    #include "Intra_4x4.asm"\r
+    #include "scoreboard.asm"\r
+\r
+       #include "AVCMCInter.asm"\r
+\r
+// End of AllAVCFrame\r
+\r
+.end_kernel\r
+\r
diff --git a/i965_drv_video/shaders/h264/mc/AllAVCMBAFF.asm b/i965_drv_video/shaders/h264/mc/AllAVCMBAFF.asm
new file mode 100644 (file)
index 0000000..1dd06ed
--- /dev/null
@@ -0,0 +1,70 @@
+/*\r
+ * All MBAFF frame picture HWMC kernels \r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+//     2857702934      // 0xAA551616 - GUID for Intra_16x16 luma prediction mode offsets\r
+//    0    // Offset to Intra_16x16 luma prediction mode 0\r
+//    9    // Offset to Intra_16x16 luma prediction mode 1\r
+//   19    // Offset to Intra_16x16 luma prediction mode 2\r
+//   42    // Offset to Intra_16x16 luma prediction mode 3\r
+//     2857699336      // 0xAA550808 - GUID for Intra_8x8 luma prediction mode offsets\r
+//    0    // Offset to Intra_8x8 luma prediction mode 0\r
+//    5    // Offset to Intra_8x8 luma prediction mode 1\r
+//   10    // Offset to Intra_8x8 luma prediction mode 2\r
+//   26    // Offset to Intra_8x8 luma prediction mode 3\r
+//   36    // Offset to Intra_8x8 luma prediction mode 4\r
+//   50    // Offset to Intra_8x8 luma prediction mode 5\r
+//   68    // Offset to Intra_8x8 luma prediction mode 6\r
+//   85    // Offset to Intra_8x8 luma prediction mode 7\r
+//   95    // Offset to Intra_8x8 luma prediction mode 8\r
+//     2857698308      // 0xAA550404 - GUID for Intra_4x4 luma prediction mode offsets\r
+//    0    // Offset to Intra_4x4 luma prediction mode 0\r
+//    2    // Offset to Intra_4x4 luma prediction mode 1\r
+//    4    // Offset to Intra_4x4 luma prediction mode 2\r
+//   16    // Offset to Intra_4x4 luma prediction mode 3\r
+//   23    // Offset to Intra_4x4 luma prediction mode 4\r
+//   32    // Offset to Intra_4x4 luma prediction mode 5\r
+//   45    // Offset to Intra_4x4 luma prediction mode 6\r
+//   59    // Offset to Intra_4x4 luma prediction mode 7\r
+//   66    // Offset to Intra_4x4 luma prediction mode 8\r
+//     2857700364      // 0xAA550C0C - GUID for intra chroma prediction mode offsets\r
+//    0    // Offset to intra chroma prediction mode 0\r
+//   30    // Offset to intra chroma prediction mode 1\r
+//   36    // Offset to intra chroma prediction mode 2\r
+//   41    // Offset to intra chroma prediction mode 3\r
+\r
+// Kernel name: AllAVCMBAFF.asm\r
+//\r
+// All MBAFF frame picture HWMC kernels merged into this file\r
+//\r
+//  $Revision: 1 $\r
+//  $Date: 4/13/06 4:35p $\r
+//\r
+\r
+// ----------------------------------------------------\r
+//  Main: AllAVCMBAFF\r
+// ----------------------------------------------------\r
+\r
+#define        ALLHWMC\r
+#define        COMBINED_KERNEL\r
+\r
+.kernel AllAVCMBAFF\r
+\r
+    #include "Intra_PCM.asm"\r
+    #include "Intra_16x16.asm"\r
+    #include "Intra_8x8.asm"\r
+    #include "Intra_4x4.asm"\r
+    #include "scoreboard.asm"\r
+\r
+       #define MBAFF\r
+       #include "AVCMCInter.asm"\r
+\r
+// End of AllAVCMBAFF\r
+\r
+.end_kernel\r
+\r
diff --git a/i965_drv_video/shaders/h264/mc/AllAVC_Build.inc b/i965_drv_video/shaders/h264/mc/AllAVC_Build.inc
new file mode 100644 (file)
index 0000000..5bfb753
--- /dev/null
@@ -0,0 +1,82 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+$table {\r
+AllAVC_END_IP/INSTFACTOR    // Total instruction count\r
+#if (defined(SW_SCOREBOARD) || defined(HW_SCOREBOARD)) && defined(ENABLE_ILDB)\r
+//    23    // Total kernel count\r
+#elif defined(SW_SCOREBOARD) || defined(HW_SCOREBOARD)\r
+//    11    // Total kernel count\r
+#elif defined(ENABLE_ILDB)\r
+//    21    // Total kernel count\r
+#else\r
+//    11    // Total kernel count\r
+#endif\r
+INTRA_16x16_ENTRY/INSTFACTOR    // Instruction offset to 'Intra_16x16'\r
+INTRA_8x8_ENTRY/INSTFACTOR    // Instruction offset to 'Intra_8x8'\r
+INTRA_4x4_ENTRY/INSTFACTOR    // Instruction offset to 'Intra_4x4'\r
+INTRA_PCM_ENTRY/INSTFACTOR    // Instruction offset to 'Intra_PCM'\r
+FRAME_MB_ENTRY/INSTFACTOR    // Instruction offset to 'FrameMB_Motion'\r
+FIELD_MB_ENTRY/INSTFACTOR    // Instruction offset to 'FieldMB_Motion'\r
+MBAFF_MB_ENTRY/INSTFACTOR    // Instruction offset to 'MBAff_Motion'\r
+#ifdef SW_SCOREBOARD    \r
+SCOREBOARD_ENTRY/INSTFACTOR                    // Instruction offset to 'scoreboard'\r
+SCOREBOARD_MBAFF_ENTRY/INSTFACTOR      // Instruction offset to 'scoreboard_MBAFF'\r
+#elif defined(HW_SCOREBOARD)\r
+SETHWSCOREBOARD_ENTRY/INSTFACTOR               // Instruction offset to 'AVC_SetIntraDepend'\r
+SETHWSCOREBOARD_MBAFF_ENTRY/INSTFACTOR // Instruction offset to 'AVC_SetIntraDependMBAFF'\r
+#endif // SW_SCOREBOARD\r
+#ifdef ENABLE_ILDB\r
+AVC_ILDB_ROOT_Y_ILDB_FRAME_ENTRY/INSTFACTOR    // Instruction offset to 'AVC_ILDB_Root_Y'\r
+AVC_ILDB_CHILD_Y_ILDB_FRAME_ENTRY/INSTFACTOR    // Instruction offset to 'AVC_ILDB_Child_Y'\r
+AVC_ILDB_ROOT_UV_ILDB_FRAME_ENTRY/INSTFACTOR    // Instruction offset to 'AVC_ILDB_Root_UV'\r
+AVC_ILDB_CHILD_UV_ILDB_FRAME_ENTRY/INSTFACTOR    // Instruction offset to 'AVC_ILDB_Child_UV'\r
+AVC_ILDB_ROOT_Y_ILDB_FIELD_ENTRY/INSTFACTOR    // Instruction offset to 'AVC_ILDB_Root_Field_Y'\r
+AVC_ILDB_CHILD_Y_ILDB_FIELD_ENTRY/INSTFACTOR    // Instruction offset to 'AVC_ILDB_Child_Field_Y'\r
+AVC_ILDB_ROOT_UV_ILDB_FIELD_ENTRY/INSTFACTOR    // Instruction offset to 'AVC_ILDB_Root_Field_UV'\r
+AVC_ILDB_CHILD_UV_ILDB_FIELD_ENTRY/INSTFACTOR    // Instruction offset to 'AVC_ILDB_Child_Field_UV'\r
+AVC_ILDB_ROOT_Y_ILDB_MBAFF_ENTRY/INSTFACTOR    // Instruction offset to 'AVC_ILDB_Root_Mbaff_Y'\r
+AVC_ILDB_CHILD_Y_ILDB_MBAFF_ENTRY/INSTFACTOR    // Instruction offset to 'AVC_ILDB_Child_Mbaff_Y'\r
+AVC_ILDB_ROOT_UV_ILDB_MBAFF_ENTRY/INSTFACTOR    // Instruction offset to 'AVC_ILDB_Root_Mbaff_UV'\r
+AVC_ILDB_CHILD_UV_ILDB_MBAFF_ENTRY/INSTFACTOR    // Instruction offset to 'AVC_ILDB_Child_Mbaff_UV'\r
+#endif // ENABLE_ILDB\r
+BSDRESET_ENTRY/INSTFACTOR    // Instruction offset to 'BSDReset'\r
+DCRESETDUMMY_ENTRY/INSTFACTOR    // Instruction offset to 'DCResetDummy'\r
+\r
+//    0    // Instruction offset to Intra_4x4_luma_prediction_mode_0\r
+INTRA_4X4_HORIZONTAL_IP-INTRA_4X4_VERTICAL_IP    // Instruction offset to Intra_4x4_luma_prediction_mode_1\r
+INTRA_4X4_DC_IP-INTRA_4X4_VERTICAL_IP    // Instruction offset to Intra_4x4_luma_prediction_mode_2\r
+INTRA_4X4_DIAG_DOWN_LEFT_IP-INTRA_4X4_VERTICAL_IP    // Instruction offset to Intra_4x4_luma_prediction_mode_3\r
+INTRA_4X4_DIAG_DOWN_RIGHT_IP-INTRA_4X4_VERTICAL_IP    // Instruction offset to Intra_4x4_luma_prediction_mode_4\r
+INTRA_4X4_VERT_RIGHT_IP-INTRA_4X4_VERTICAL_IP    // Instruction offset to Intra_4x4_luma_prediction_mode_5\r
+INTRA_4X4_HOR_DOWN_IP-INTRA_4X4_VERTICAL_IP    // Instruction offset to Intra_4x4_luma_prediction_mode_6\r
+INTRA_4X4_VERT_LEFT_IP-INTRA_4X4_VERTICAL_IP    // Instruction offset to Intra_4x4_luma_prediction_mode_7\r
+INTRA_4X4_HOR_UP_IP-INTRA_4X4_VERTICAL_IP    // Instruction offset to Intra_4x4_luma_prediction_mode_8\r
+\r
+//    0    // Instruction offset to Intra_8x8_luma_prediction_mode_0\r
+INTRA_8X8_HORIZONTAL_IP-INTRA_8X8_VERTICAL_IP    // Instruction offset to Intra_8x8_luma_prediction_mode_1\r
+INTRA_8X8_DC_IP-INTRA_8X8_VERTICAL_IP    // Instruction offset to Intra_8x8_luma_prediction_mode_2\r
+INTRA_8X8_DIAG_DOWN_LEFT_IP-INTRA_8X8_VERTICAL_IP    // Instruction offset to Intra_8x8_luma_prediction_mode_3\r
+INTRA_8X8_DIAG_DOWN_RIGHT_IP-INTRA_8X8_VERTICAL_IP    // Instruction offset to Intra_8x8_luma_prediction_mode_4\r
+INTRA_8X8_VERT_RIGHT_IP-INTRA_8X8_VERTICAL_IP    // Instruction offset to Intra_8x8_luma_prediction_mode_5\r
+INTRA_8X8_HOR_DOWN_IP-INTRA_8X8_VERTICAL_IP    // Instruction offset to Intra_8x8_luma_prediction_mode_6\r
+INTRA_8X8_VERT_LEFT_IP-INTRA_8X8_VERTICAL_IP    // Instruction offset to Intra_8x8_luma_prediction_mode_7\r
+INTRA_8X8_HOR_UP_IP-INTRA_8X8_VERTICAL_IP    // Instruction offset to Intra_8x8_luma_prediction_mode_8\r
+\r
+//    0    // Instruction offset to Intra_16x16_luma_prediction_mode_0\r
+INTRA_16x16_HORIZONTAL_IP-INTRA_16x16_VERTICAL_IP    // Instruction offset to Intra_16x16_luma_prediction_mode_1\r
+INTRA_16x16_DC_IP-INTRA_16x16_VERTICAL_IP    // Instruction offset to Intra_16x16_luma_prediction_mode_2\r
+INTRA_16x16_PLANE_IP-INTRA_16x16_VERTICAL_IP    // Instruction offset to Intra_16x16_luma_prediction_mode_3\r
+\r
+//    0    // Instruction offset to intra_chroma_prediction_mode_0\r
+INTRA_CHROMA_HORIZONTAL_IP-INTRA_CHROMA_DC_IP    // Instruction offset to intra_chroma_prediction_mode_1\r
+INTRA_CHROMA_VERTICAL_IP-INTRA_CHROMA_DC_IP    // Instruction offset to intra_chroma_prediction_mode_2\r
+INTRA_Chroma_PLANE_IP-INTRA_CHROMA_DC_IP    // Instruction offset to intra_chroma_prediction_mode_3\r
+\r
+intra_Pred_4x4_Y_IP-ADD_ERROR_SB3_IP*0x100+intra_Pred_4x4_Y_IP-ADD_ERROR_SB2_IP*0x100+intra_Pred_4x4_Y_IP-ADD_ERROR_SB1_IP*0x100+intra_Pred_4x4_Y_IP-ADD_ERROR_SB0_IP  // Instruction offset to intra_4x4_pred_module\r
+}\r
diff --git a/i965_drv_video/shaders/h264/mc/AllAVC_Export.inc b/i965_drv_video/shaders/h264/mc/AllAVC_Export.inc
new file mode 100644 (file)
index 0000000..6bb3eff
--- /dev/null
@@ -0,0 +1,172 @@
+.export entry_point INTRA_16x16\r
+.export entry_point INTRA_8x8\r
+.export entry_point INTRA_4x4\r
+.export entry_point INTRA_PCM\r
+.export entry_point FRAME_MB\r
+.export entry_point FIELD_MB\r
+.export entry_point MBAFF_MB\r
+#ifdef SW_SCOREBOARD    \r
+.export entry_point SCOREBOARD\r
+.export entry_point SCOREBOARD_MBAFF\r
+#elif defined(HW_SCOREBOARD)\r
+.export entry_point SETHWSCOREBOARD\r
+.export entry_point SETHWSCOREBOARD_MBAFF\r
+#endif // SW_SCOREBOARD\r
+\r
+#ifdef ENABLE_ILDB\r
+.export entry_point AVC_ILDB_ROOT_Y_ILDB_FRAME\r
+.export entry_point AVC_ILDB_CHILD_Y_ILDB_FRAME\r
+.export entry_point AVC_ILDB_ROOT_UV_ILDB_FRAME\r
+.export entry_point AVC_ILDB_CHILD_UV_ILDB_FRAME\r
+.export entry_point AVC_ILDB_ROOT_Y_ILDB_FIELD\r
+.export entry_point AVC_ILDB_CHILD_Y_ILDB_FIELD\r
+.export entry_point AVC_ILDB_ROOT_UV_ILDB_FIELD\r
+.export entry_point AVC_ILDB_CHILD_UV_ILDB_FIELD\r
+.export entry_point AVC_ILDB_ROOT_Y_ILDB_MBAFF\r
+.export entry_point AVC_ILDB_CHILD_Y_ILDB_MBAFF\r
+.export entry_point AVC_ILDB_ROOT_UV_ILDB_MBAFF\r
+.export entry_point AVC_ILDB_CHILD_UV_ILDB_MBAFF\r
+#endif // ENABLE_ILDB\r
+\r
+.export entry_point BSDRESET\r
+.export entry_point DCRESETDUMMY\r
+\r
+.export label INTRA_16x16_VERTICAL\r
+.export label INTRA_16x16_HORIZONTAL\r
+.export label INTRA_16x16_DC\r
+.export label INTRA_16x16_PLANE\r
+\r
+.export label INTRA_8X8_VERTICAL\r
+.export label INTRA_8X8_HORIZONTAL\r
+.export label INTRA_8X8_DC\r
+.export label INTRA_8X8_DIAG_DOWN_LEFT\r
+.export label INTRA_8X8_DIAG_DOWN_RIGHT\r
+.export label INTRA_8X8_VERT_RIGHT\r
+.export label INTRA_8X8_HOR_DOWN\r
+.export label INTRA_8X8_VERT_LEFT\r
+.export label INTRA_8X8_HOR_UP\r
+\r
+.export label INTRA_4X4_VERTICAL\r
+.export label INTRA_4X4_HORIZONTAL\r
+.export label INTRA_4X4_DC\r
+.export label INTRA_4X4_DIAG_DOWN_LEFT\r
+.export label INTRA_4X4_DIAG_DOWN_RIGHT\r
+.export label INTRA_4X4_VERT_RIGHT\r
+.export label INTRA_4X4_HOR_DOWN\r
+.export label INTRA_4X4_VERT_LEFT\r
+.export label INTRA_4X4_HOR_UP\r
+\r
+.export label INTRA_CHROMA_DC\r
+.export label INTRA_CHROMA_HORIZONTAL\r
+.export label INTRA_CHROMA_VERTICAL\r
+.export label INTRA_Chroma_PLANE\r
+\r
+.export label intra_Pred_4x4_Y\r
+.export label ADD_ERROR_SB0\r
+.export label ADD_ERROR_SB1\r
+.export label ADD_ERROR_SB2\r
+.export label ADD_ERROR_SB3\r
+\r
+.export label AllAVC_END\r
+\r
+#ifdef SW_SCOREBOARD    \r
+.export label MB_Loop\r
+.export label No_Message\r
+.export label Dependency_Check\r
+.export label Notify_MSG\r
+.export label Update_CurMB\r
+.export label MBAFF_MB_Loop\r
+.export label MBAFF_No_Message\r
+.export label MBAFF_Dependency_Check\r
+.export label MBAFF_Notify_MSG\r
+.export label MBAFF_Update_CurMB\r
+\r
+//.export label \r
+\r
+// Definitions for first pass MC kernel building\r
+#ifndef No_Message_IP\r
+#define No_Message_IP  0\r
+#endif\r
+\r
+#ifndef Dependency_Check_IP\r
+#define Dependency_Check_IP    0\r
+#endif\r
+\r
+#ifndef Notify_MSG_IP\r
+#define Notify_MSG_IP  0\r
+#endif\r
+\r
+#ifndef Update_CurMB_IP\r
+#define Update_CurMB_IP        0\r
+#endif\r
+\r
+#ifndef MBAFF_No_Message_IP\r
+#define MBAFF_No_Message_IP    0\r
+#endif\r
+\r
+#ifndef MBAFF_Dependency_Check_IP\r
+#define MBAFF_Dependency_Check_IP      0\r
+#endif\r
+\r
+#ifndef MBAFF_Notify_MSG_IP\r
+#define MBAFF_Notify_MSG_IP    0\r
+#endif\r
+\r
+#ifndef        AS_ENABLED\r
+ #ifndef MBAFF_MB_Loop_IP\r
+ #define MBAFF_MB_Loop_IP      0\r
+ #endif\r
+\r
+ #ifndef MB_Loop_IP\r
+ #define MB_Loop_IP    0\r
+ #endif\r
+#endif // End AS_ENABLED\r
+\r
+#ifndef MBAFF_Update_CurMB_IP\r
+#define MBAFF_Update_CurMB_IP  0\r
+#endif\r
+\r
+#endif // SW_SCOREBOARD\r
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+\r
+#ifdef ENABLE_ILDB\r
+.export label  ALL_SPAWNED_UV_ILDB_FRAME\r
+.export label  SLEEP_ENTRY_UV_ILDB_FRAME\r
+.export label  POST_SLEEP_UV_ILDB_FRAME\r
+.export label  ALL_SPAWNED_Y_ILDB_FRAME\r
+.export label  SLEEP_ENTRY_Y_ILDB_FRAME\r
+.export label  POST_SLEEP_Y_ILDB_FRAME\r
+\r
+// Definitions for first pass ILDB kernel building\r
+#ifndef        ALL_SPAWNED_UV_ILDB_FRAME_IP\r
+#define        ALL_SPAWNED_UV_ILDB_FRAME_IP    0\r
+#endif\r
+\r
+#ifndef        SLEEP_ENTRY_UV_ILDB_FRAME_IP\r
+#define        SLEEP_ENTRY_UV_ILDB_FRAME_IP    0\r
+#endif\r
+\r
+#ifndef        POST_SLEEP_UV_ILDB_FRAME_IP\r
+#define        POST_SLEEP_UV_ILDB_FRAME_IP     0\r
+#endif\r
+\r
+#ifndef        ALL_SPAWNED_Y_ILDB_FRAME_IP\r
+#define        ALL_SPAWNED_Y_ILDB_FRAME_IP     0\r
+#endif\r
+\r
+#ifndef        SLEEP_ENTRY_Y_ILDB_FRAME_IP\r
+#define        SLEEP_ENTRY_Y_ILDB_FRAME_IP     0\r
+#endif\r
+\r
+#ifndef        POST_SLEEP_Y_ILDB_FRAME_IP\r
+#define        POST_SLEEP_Y_ILDB_FRAME_IP      0\r
+#endif\r
+\r
+#endif // ENABLE_ILDB\r
diff --git a/i965_drv_video/shaders/h264/mc/AllIntra.asm b/i965_drv_video/shaders/h264/mc/AllIntra.asm
new file mode 100644 (file)
index 0000000..1cc895a
--- /dev/null
@@ -0,0 +1,68 @@
+/*\r
+ * All intra-prediction macroblock kernels \r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+//     2857702934      // 0xAA551616 - GUID for Intra_16x16 luma prediction mode offsets\r
+//    0    // Offset to Intra_16x16 luma prediction mode 0\r
+//    9    // Offset to Intra_16x16 luma prediction mode 1\r
+//   19    // Offset to Intra_16x16 luma prediction mode 2\r
+//   42    // Offset to Intra_16x16 luma prediction mode 3\r
+//     2857699336      // 0xAA550808 - GUID for Intra_8x8 luma prediction mode offsets\r
+//    0    // Offset to Intra_8x8 luma prediction mode 0\r
+//    5    // Offset to Intra_8x8 luma prediction mode 1\r
+//   10    // Offset to Intra_8x8 luma prediction mode 2\r
+//   26    // Offset to Intra_8x8 luma prediction mode 3\r
+//   36    // Offset to Intra_8x8 luma prediction mode 4\r
+//   50    // Offset to Intra_8x8 luma prediction mode 5\r
+//   68    // Offset to Intra_8x8 luma prediction mode 6\r
+//   85    // Offset to Intra_8x8 luma prediction mode 7\r
+//   95    // Offset to Intra_8x8 luma prediction mode 8\r
+//     2857698308      // 0xAA550404 - GUID for Intra_4x4 luma prediction mode offsets\r
+//    0    // Offset to Intra_4x4 luma prediction mode 0\r
+//    2    // Offset to Intra_4x4 luma prediction mode 1\r
+//    4    // Offset to Intra_4x4 luma prediction mode 2\r
+//   16    // Offset to Intra_4x4 luma prediction mode 3\r
+//   23    // Offset to Intra_4x4 luma prediction mode 4\r
+//   32    // Offset to Intra_4x4 luma prediction mode 5\r
+//   45    // Offset to Intra_4x4 luma prediction mode 6\r
+//   59    // Offset to Intra_4x4 luma prediction mode 7\r
+//   66    // Offset to Intra_4x4 luma prediction mode 8\r
+//     2857700364      // 0xAA550C0C - GUID for intra chroma prediction mode offsets\r
+//    0    // Offset to intra chroma prediction mode 0\r
+//   30    // Offset to intra chroma prediction mode 1\r
+//   36    // Offset to intra chroma prediction mode 2\r
+//   41    // Offset to intra chroma prediction mode 3\r
+\r
+// Kernel name: AllIntra.asm\r
+//\r
+// All HWMC kernels merged into this file\r
+//\r
+//  $Revision: 1 $\r
+//  $Date: 4/13/06 4:35p $\r
+//\r
+\r
+// ----------------------------------------------------\r
+//  Main: ALLINTRA\r
+// ----------------------------------------------------\r
+\r
+#define        ALLHWMC\r
+#define        COMBINED_KERNEL\r
+\r
+.kernel ALLINTRA\r
+\r
+    // All frame destination HWMC kernels\r
+    //\r
+    #include "Intra_PCM.asm"\r
+    #include "Intra_16x16.asm"\r
+    #include "Intra_8x8.asm"\r
+    #include "Intra_4x4.asm"\r
+\r
+// End of ALLINTRA\r
+\r
+.end_kernel\r
+\r
diff --git a/i965_drv_video/shaders/h264/mc/BSDReset.asm b/i965_drv_video/shaders/h264/mc/BSDReset.asm
new file mode 100644 (file)
index 0000000..5c6e5df
--- /dev/null
@@ -0,0 +1,43 @@
+/*\r
+ * Initial kernel for filling initial BSD command buffer\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Kernel name: BSDReset.asm\r
+//\r
+// Initial kernel for filling initial BSD command buffer\r
+//\r
+\r
+// ----------------------------------------------------\r
+//  Main: BSDReset\r
+// ----------------------------------------------------\r
+\r
+.kernel BSDReset\r
+BSDRESET:\r
+\r
+#include "header.inc"\r
+\r
+.code\r
+#ifdef SW_SCOREBOARD\r
+    CALL(scoreboard_start_inter,1)\r
+       wait    n0:ud           //      Now wait for scoreboard to response\r
+\r
+#define BSDRESET_ENABLE\r
+       #include "scoreboard_update.asm"        // scorboard update function\r
+#undef BSDRESET_ENABLE\r
+\r
+#endif // defined(SW_SCOREBOARD)\r
+\r
+// Terminate the thread\r
+//\r
+    END_THREAD\r
+\r
+#if !defined(COMBINED_KERNEL)          // For standalone kernel only\r
+.end_code\r
+\r
+.end_kernel\r
+#endif // !defined(COMBINED_KERNEL)\r
diff --git a/i965_drv_video/shaders/h264/mc/DCResetDummy.asm b/i965_drv_video/shaders/h264/mc/DCResetDummy.asm
new file mode 100644 (file)
index 0000000..d4e52a9
--- /dev/null
@@ -0,0 +1,34 @@
+/*\r
+ * Dummy kernel\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Kernel name: DCResetDummy.asm\r
+//\r
+// Dummy kernel used by driver for debug-counter reset SW WA\r
+//\r
+\r
+// ----------------------------------------------------\r
+//  Main: DCResetDummy\r
+// ----------------------------------------------------\r
+\r
+.kernel DCResetDummy\r
+DCRESETDUMMY:\r
+\r
+#include "header.inc"\r
+\r
+.code\r
+\r
+// Terminate the thread\r
+//\r
+    END_THREAD\r
+\r
+#if !defined(COMBINED_KERNEL)          // For standalone kernel only\r
+.end_code\r
+\r
+.end_kernel\r
+#endif // !defined(COMBINED_KERNEL)\r
diff --git a/i965_drv_video/shaders/h264/mc/Decode_Chroma_Intra.asm b/i965_drv_video/shaders/h264/mc/Decode_Chroma_Intra.asm
new file mode 100644 (file)
index 0000000..7799825
--- /dev/null
@@ -0,0 +1,29 @@
+/*\r
+ * Decode both intra chroma blocks\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+#if !defined(__DECODE_CHROMA_INTRA__)          // Make sure this is only included once\r
+#define __DECODE_CHROMA_INTRA__\r
+\r
+// Module name: Decode_Chroma_Intra.asm\r
+//\r
+// Decode both intra chroma blocks\r
+//\r
+\r
+decode_Chroma_Intra:\r
+#ifndef MONO\r
+    #include "load_Intra_Ref_UV.asm"   // Load intra U/V reference data\r
+    #include "intra_Pred_Chroma.asm"   // Intra predict chroma blocks\r
+    #include "add_Error_UV.asm"                        // Add error data to predicted U/V data blocks\r
+#endif // !defined(MONO)\r
+    #include "save_8x8_UV.asm"                 // Save to destination U/V frame surface\r
+\r
+       RETURN\r
+// End of Decode_Chroma_Intra\r
+\r
+#endif // !defined(__DECODE_CHROMA_INTRA__)\r
diff --git a/i965_drv_video/shaders/h264/mc/EndIntraThread.asm b/i965_drv_video/shaders/h264/mc/EndIntraThread.asm
new file mode 100644 (file)
index 0000000..2c78b62
--- /dev/null
@@ -0,0 +1,30 @@
+/*\r
+ * Common module to end current intra thread\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Kernel name: EndIntraThread.asm\r
+//\r
+// Common module to end current intra thread\r
+//\r
+#ifndef SW_SCOREBOARD\r
+// Check for write commit first if SW scoreboard is disabled\r
+       mov     (1)     REG_WRITE_COMMIT_Y<1>:ud        REG_WRITE_COMMIT_Y<0;1,0>:ud            // Make sure Y write is committed\r
+       mov     (1)     REG_WRITE_COMMIT_UV<1>:ud       REG_WRITE_COMMIT_UV<0;1,0>:ud           // Make sure U/V write is committed\r
+#endif\r
+\r
+       END_THREAD\r
+\r
+    #include "Intra_funcLib.asm"\r
+\r
+#ifndef COMBINED_KERNEL                // For standalone kernel only\r
+.end_code\r
+\r
+.end_kernel\r
+#endif // COMBINED_KERNEL\r
+\r
+// End of EndIntraThread\r
diff --git a/i965_drv_video/shaders/h264/mc/HwmcOnlyHeader.inc b/i965_drv_video/shaders/h264/mc/HwmcOnlyHeader.inc
new file mode 100644 (file)
index 0000000..514cb78
--- /dev/null
@@ -0,0 +1,29 @@
+/*\r
+ * Header file used only in HWMC_ONLY mode\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Module name: HwmcOnlyHeader.inc\r
+//\r
+// Header file used only in HWMC_ONLY mode\r
+//\r
+\r
+#include "header.inc"\r
+\r
+#if !defined(__HWMCONLYHEADER__)       // Make sure the following are only included once\r
+#define __HWMCONLYHEADER__\r
+\r
+.reg_count_total    64\r
+.reg_count_payload  2\r
+\r
+//\r
+//  Now, begin source code....\r
+//\r
+\r
+.code\r
+#endif // !defined(__HWMCONLYHEADER__)\r
+\r
diff --git a/i965_drv_video/shaders/h264/mc/Intra_16x16.asm b/i965_drv_video/shaders/h264/mc/Intra_16x16.asm
new file mode 100644 (file)
index 0000000..e40e6a3
--- /dev/null
@@ -0,0 +1,71 @@
+/*\r
+ * Decode Intra_16x16 macroblock\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Kernel name: Intra_16x16.asm\r
+//\r
+// Decoding of Intra_16x16 macroblock\r
+//\r
+//  $Revision: 8 $\r
+//  $Date: 10/18/06 4:10p $\r
+//\r
+\r
+// ----------------------------------------------------\r
+//  Main: Intra_16x16\r
+// ----------------------------------------------------\r
+\r
+#define        INTRA_16X16\r
+\r
+.kernel Intra_16x16\r
+INTRA_16x16:\r
+\r
+#ifdef _DEBUG\r
+// WA for FULSIM so we'll know which kernel is being debugged\r
+mov (1) acc0:ud 0x00aa55a5:ud\r
+#endif\r
+\r
+#include "SetupForHWMC.asm"\r
+\r
+#ifdef SW_SCOREBOARD    \r
+    CALL(scoreboard_start_intra,1)\r
+#endif\r
+\r
+#ifdef SW_SCOREBOARD \r
+       wait    n0:ud           //      Now wait for scoreboard to response\r
+#endif\r
+\r
+//\r
+//  Decode Y blocks\r
+//\r
+//     Load reference data from neighboring macroblocks\r
+    CALL(load_Intra_Ref_Y,1)\r
+\r
+//     Intra predict Intra_16x16 luma block\r
+       #include "intra_pred_16x16_Y.asm"\r
+\r
+//     Add error data to predicted intra data\r
+       #include "add_Error_16x16_Y.asm"\r
+\r
+//     Save decoded Y picture\r
+       CALL(save_16x16_Y,1)\r
+//\r
+//  Decode U/V blocks\r
+//\r
+//     Note: The decoding for chroma blocks will be the same for all intra prediction mode\r
+//\r
+       CALL(decode_Chroma_Intra,1)\r
+\r
+#ifdef SW_SCOREBOARD\r
+    #include "scoreboard_update.asm"\r
+#endif\r
+\r
+// Terminate the thread\r
+//\r
+    #include "EndIntraThread.asm"\r
+\r
+// End of Intra_16x16\r
diff --git a/i965_drv_video/shaders/h264/mc/Intra_4x4.asm b/i965_drv_video/shaders/h264/mc/Intra_4x4.asm
new file mode 100644 (file)
index 0000000..1169983
--- /dev/null
@@ -0,0 +1,175 @@
+/*\r
+ * Decode Intra_4x4 macroblock\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Kernel name: Intra_4x4.asm\r
+//\r
+// Decoding of Intra_4x4 macroblock\r
+//\r
+//  $Revision: 12 $\r
+//  $Date: 10/18/06 4:10p $\r
+//\r
+\r
+// ----------------------------------------------------\r
+//  Main: Intra_4x4\r
+// ----------------------------------------------------\r
+\r
+#define        INTRA_4X4\r
+\r
+.kernel Intra_4x4\r
+INTRA_4x4:\r
+\r
+#ifdef _DEBUG\r
+// WA for FULSIM so we'll know which kernel is being debugged\r
+mov (1) acc0:ud 0x02aa55a5:ud\r
+#endif\r
+\r
+#include "SetupForHWMC.asm"\r
+\r
+#undef         PPREDBUF_Y\r
+#define            PPREDBUF_Y          a0.3    // Pointer to predicted Y picture\r
+\r
+#define                REG_INTRA_PRED_AVAIL    REG_INTRA_TEMP_4\r
+#define                REG_INTRA_4X4_PRED              REG_INTRA_TEMP_7                // Store predicted Intra_4x4 data\r
+\r
+// Offset where 4x4 predicted data blocks are stored\r
+#define        PREDSUBBLK0             0*GRFWIB\r
+#define        PREDSUBBLK1             1*GRFWIB\r
+#define        PREDSUBBLK2             2*GRFWIB\r
+#define        PREDSUBBLK3             3*GRFWIB\r
+#define        PREDSUBBLK4             4*GRFWIB\r
+#define        PREDSUBBLK5             5*GRFWIB\r
+#define        PREDSUBBLK6             6*GRFWIB\r
+#define        PREDSUBBLK7             7*GRFWIB\r
+#define        PREDSUBBLK8             8*GRFWIB\r
+#define        PREDSUBBLK9             9*GRFWIB\r
+#define        PREDSUBBLK10    10*GRFWIB\r
+#define        PREDSUBBLK11    11*GRFWIB\r
+#define        PREDSUBBLK12    12*GRFWIB\r
+#define        PREDSUBBLK13    13*GRFWIB\r
+#define        PREDSUBBLK14    14*GRFWIB\r
+#define        PREDSUBBLK15    15*GRFWIB\r
+\r
+// 4x4 error block byte offset within the 8x8 error block\r
+#define ERRBLK0                0\r
+#define ERRBLK1                8\r
+#define ERRBLK2                64\r
+#define ERRBLK3                72\r
+\r
+#ifdef SW_SCOREBOARD    \r
+    CALL(scoreboard_start_intra,1)\r
+#endif\r
+\r
+#ifdef SW_SCOREBOARD    \r
+       wait    n0:ud           //      Now wait for scoreboard to response\r
+#endif\r
+\r
+//\r
+//  Decode Y blocks\r
+//\r
+//     Load reference data from neighboring macroblocks\r
+    CALL(load_Intra_Ref_Y,1)\r
+\r
+       mov     (1)     PERROR<1>:w     ERRBUF*GRFWIB:w                 // Pointer to macroblock error data\r
+       mov     (1)     PPREDBUF_Y<1>:w PREDBUF*GRFWIB:w        // Pointer to predicted data\r
+       shr (2) REG_INTRA_PRED_AVAIL<1>:w       REG_INTRA_PRED_AVAIL_FLAG_BYTE<0;1,0>:ub        0x40:v\r
+    and.nz.f0.0 (8)    NULLREG REG_INTRA_PRED_AVAIL_FLAG_BYTE<0;1,0>:ub        4:w     // Top-right macroblock available for intra prediction?\r
+       (-f0.0.any8h) mov (8)   INTRA_REF_TOP(0,16)<1>  INTRA_REF_TOP(0,15)REGION(1,0)  // Extend right boundary of MB B to C\r
+\r
+//     Intra predict Intra_4x4 luma blocks\r
+//\r
+//     Sub-macroblock 0 *****************\r
+       mov     (16)    REF_TOP0(0)<1>  INTRA_REF_TOP0(0)       // Top reference data\r
+       mov     (8)             REF_LEFT(0)<1>  INTRA_REF_LEFT(0)REGION(8,4)    // Left reference data\r
+       shr     (4)             PRED_MODE<1>:w  INTRA_PRED_MODE(0)<1;2,0>       0x4040:v        // Expand IntraPredMode to 1 byte/block\r
+       CALL(intra_Pred_4x4_Y_4,1)\r
+    add (1)            PERROR<1>:w     PERROR<0;1,0>:w 0x0080:w        // Pointers to next error block\r
+\r
+       or  (1) REG_INTRA_PRED_AVAIL<1>:w       REG_INTRA_PRED_AVAIL<0;1,0>:w   0x1:w   // Left neighbor is available now\r
+\r
+//     Sub-macroblock 1 *****************\r
+\r
+       mov     (16)    REF_TOP0(0)<1>  INTRA_REF_TOP0(0,8)     // Top reference data\r
+       mov     (4)             REF_LEFT(0)<1>  r[PPREDBUF_Y,PREDSUBBLK1+6]<8;1,0>:ub   // Left reference data (top half)\r
+       mov     (4)             REF_LEFT(0,4)<1>        r[PPREDBUF_Y,PREDSUBBLK3+6]<8;1,0>:ub   // Left reference data (bottom half)\r
+       shr     (4)             PRED_MODE<1>:w  INTRA_PRED_MODE(0,2)<1;2,0>     0x4040:v        // Expand IntraPredMode to 1 byte/block\r
+       add     (1)             PPREDBUF_Y<1>:w PPREDBUF_Y<0;1,0>:w     4*GRFWIB:w      // Pointer to predicted sub-macroblock 1\r
+       CALL(intra_Pred_4x4_Y_4,1)\r
+    add (1)            PERROR<1>:w     PERROR<0;1,0>:w 0x0080:w        // Pointers to next error block\r
+\r
+       or  (1) REG_INTRA_PRED_AVAIL<1>:w       REG_INTRA_PRED_AVAIL.1<0;1,0>:w 0x2:w   // Top neighbor is available now\r
+\r
+//     Pack constructed data from word-aligned to byte-aligned format\r
+//     to speed up save_4x4_Y module later\r
+//     PPREDBUF_Y now points to sub-block #4\r
+       mov (16)        r[PPREDBUF_Y,-PREDSUBBLK4]<1>:ub        r[PPREDBUF_Y,-PREDSUBBLK4]<32;16,2>:ub          // Sub-block 0\r
+       mov (16)        r[PPREDBUF_Y,0-PREDSUBBLK4+16]<1>:ub    r[PPREDBUF_Y,-PREDSUBBLK3]<32;16,2>:ub  // Sub-block 1\r
+       mov (16)        r[PPREDBUF_Y,-PREDSUBBLK2]<1>:ub        r[PPREDBUF_Y,-PREDSUBBLK2]<32;16,2>:ub          // Sub-block 2\r
+       mov (16)        r[PPREDBUF_Y,0-PREDSUBBLK2+16]<1>:ub    r[PPREDBUF_Y,-PREDSUBBLK1]<32;16,2>:ub  // Sub-block 3\r
+\r
+       mov (16)        r[PPREDBUF_Y,-PREDSUBBLK3]<1>:ub        r[PPREDBUF_Y]<32;16,2>:ub                               // Sub-block 4\r
+       mov (16)        r[PPREDBUF_Y,0-PREDSUBBLK3+16]<1>:ub    r[PPREDBUF_Y,PREDSUBBLK1]<32;16,2>:ub   // Sub-block 5\r
+       mov (16)        r[PPREDBUF_Y,-PREDSUBBLK1]<1>:ub        r[PPREDBUF_Y,PREDSUBBLK2]<32;16,2>:ub           // Sub-block 6\r
+       mov (16)        r[PPREDBUF_Y,0-PREDSUBBLK1+16]<1>:ub    r[PPREDBUF_Y,PREDSUBBLK3]<32;16,2>:ub   // Sub-block 7\r
+\r
+//     Sub-macroblock 2 *****************\r
+\r
+       mov     (4)             REF_TOP0(0)<1>          INTRA_REF_LEFT0(0,28)REGION(4,1)                // Top-left reference data\r
+       mov     (8)             REF_TOP0(0,4)<1>        r[PPREDBUF_Y,0-2*GRFWIB+12]<16;4,1>:ub  // Top reference data from SB 2,3\r
+       mov     (8)             REF_TOP0(0,12)<1>       r[PPREDBUF_Y,0-GRFWIB+12]<16;4,1>:ub    // Top reference data from SB 6,7\r
+       mov     (8)             REF_TOP0(0,20)<1>       r[PPREDBUF_Y,0-GRFWIB+31]<0;1,0>:ub             // Top-right reference data\r
+       mov     (16)    REG_INTRA_REF_TOP<1>:w  REF_TOP_W(0)            // Store top reference data for SubMB #2 and #3.\r
+       mov     (8)             REF_LEFT(0)<1>          INTRA_REF_LEFT(1)REGION(8,4)    // Left reference data\r
+       shr     (4)             PRED_MODE<1>:w          INTRA_PRED_MODE(0,4)<1;2,0>     0x4040:v        // Expand IntraPredMode to 1 byte/block\r
+       CALL(intra_Pred_4x4_Y_4,1)\r
+    add (1)            PERROR<1>:w     PERROR<0;1,0>:w 0x0080:w        // Pointers to next error block\r
+\r
+       or  (1) REG_INTRA_PRED_AVAIL<1>:w       REG_INTRA_PRED_AVAIL<0;1,0>:w   0x1:w   // Left neighbor is available now\r
+\r
+//     Sub-macroblock 3 *****************\r
+\r
+       mov     (16)    REF_TOP0(0)<1>  INTRA_REF_TOP0(0,8)             // Top reference data\r
+       mov     (8)             REF_TOP0(0,16)<1>       INTRA_REF_TOP0(0,24)REGION(8,1) // Top reference data\r
+       mov     (4)             REF_LEFT(0)<1>  r[PPREDBUF_Y,PREDSUBBLK1+6]<8;1,0>:ub   // Left reference data (top half)\r
+       mov     (4)             REF_LEFT(0,4)<1>        r[PPREDBUF_Y,PREDSUBBLK3+6]<8;1,0>:ub   // Left reference data (bottom half)\r
+       shr     (4)             PRED_MODE<1>:w  INTRA_PRED_MODE(0,6)<1;2,0>     0x4040:v        // Expand IntraPredMode to 1 byte/block\r
+       add     (1)             PPREDBUF_Y<1>:w PPREDBUF_Y<0;1,0>:w     4*GRFWIB:w      // Pointer to predicted sub-macroblock 3\r
+       CALL(intra_Pred_4x4_Y_4,1)\r
+\r
+//     Pack constructed data from word-aligned to byte-aligned format\r
+//     to speed up save_4x4_Y module later\r
+//     PPREDBUF_Y now points to sub-block #12\r
+       mov (16)        r[PPREDBUF_Y,-PREDSUBBLK4]<1>:ub        r[PPREDBUF_Y,-PREDSUBBLK4]<32;16,2>:ub          // Sub-block 8\r
+       mov (16)        r[PPREDBUF_Y,0-PREDSUBBLK4+16]<1>:ub    r[PPREDBUF_Y,-PREDSUBBLK3]<32;16,2>:ub  // Sub-block 9\r
+       mov (16)        r[PPREDBUF_Y,-PREDSUBBLK2]<1>:ub        r[PPREDBUF_Y,-PREDSUBBLK2]<32;16,2>:ub          // Sub-block 10\r
+       mov (16)        r[PPREDBUF_Y,0-PREDSUBBLK2+16]<1>:ub    r[PPREDBUF_Y,-PREDSUBBLK1]<32;16,2>:ub  // Sub-block 11\r
+\r
+       mov (16)        r[PPREDBUF_Y,-PREDSUBBLK3]<1>:ub        r[PPREDBUF_Y]<32;16,2>:ub                               // Sub-block 12\r
+       mov (16)        r[PPREDBUF_Y,0-PREDSUBBLK3+16]<1>:ub    r[PPREDBUF_Y,PREDSUBBLK1]<32;16,2>:ub   // Sub-block 13\r
+       mov (16)        r[PPREDBUF_Y,-PREDSUBBLK1]<1>:ub        r[PPREDBUF_Y,PREDSUBBLK2]<32;16,2>:ub           // Sub-block 14\r
+       mov (16)        r[PPREDBUF_Y,0-PREDSUBBLK1+16]<1>:ub    r[PPREDBUF_Y,PREDSUBBLK3]<32;16,2>:ub   // Sub-block 15\r
+\r
+//     All 4 sub-macroblock (containing 4 intra_4x4 blocks) have be constructed\r
+//     Save constructed Y picture\r
+       CALL(save_4x4_Y,1)              // Save Intra_4x4 predicted luma data.\r
+//\r
+//  Decode U/V blocks\r
+//\r
+//     Note: The decoding for chroma blocks will be the same for all intra prediction mode\r
+//\r
+       CALL(decode_Chroma_Intra,1)\r
+\r
+#ifdef SW_SCOREBOARD\r
+    #include "scoreboard_update.asm"\r
+#endif\r
+\r
+// Terminate the thread\r
+//\r
+    #include "EndIntraThread.asm"\r
+\r
+// End of Intra_4x4\r
diff --git a/i965_drv_video/shaders/h264/mc/Intra_8x8.asm b/i965_drv_video/shaders/h264/mc/Intra_8x8.asm
new file mode 100644 (file)
index 0000000..05a0be5
--- /dev/null
@@ -0,0 +1,192 @@
+/*\r
+ * Decode Intra_8x8 macroblock\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Kernel name: Intra_8x8.asm\r
+//\r
+// Decoding of Intra_8x8 macroblock\r
+//\r
+//  $Revision: 12 $\r
+//  $Date: 10/18/06 4:10p $\r
+//\r
+\r
+// ----------------------------------------------------\r
+//  Main: Intra_8x8\r
+// ----------------------------------------------------\r
+\r
+#define        INTRA_8X8\r
+\r
+.kernel Intra_8x8\r
+INTRA_8x8:\r
+\r
+#ifdef _DEBUG\r
+// WA for FULSIM so we'll know which kernel is being debugged\r
+mov (1) acc0:ud 0x01aa55a5:ud\r
+#endif\r
+\r
+#include "SetupForHWMC.asm"\r
+\r
+#define                REG_INTRA_PRED_AVAIL    REG_INTRA_TEMP_4\r
+#define                INTRA_PRED_AVAIL                REG_INTRA_TEMP_4.4\r
+\r
+// Offset where 8x8 predicted data blocks are stored\r
+#define        PREDBLK0        0*GRFWIB\r
+#define        PREDBLK1        4*GRFWIB\r
+#define        PREDBLK2        8*GRFWIB\r
+#define        PREDBLK3        12*GRFWIB\r
+\r
+#ifdef SW_SCOREBOARD\r
+\r
+// Update "E" flag with "F" flag information\r
+       mov (1) REG_INTRA_TEMP_0<1>:w   REG_INTRA_PRED_AVAIL_FLAG_WORD<0;1,0>:w         // Store original Intra_Pred_Avail_Flag\r
+       and.nz.f0.0 (1) NULLREG REG_MBAFF_PIC   MBAFF_PIC       // Is current MBAFF picture\r
+       and.z.f0.1 (1) NULLREG  REG_INTRA_PRED_AVAIL_FLAG       INTRA_PRED_LEFT_TH_AVAIL_FLAG   // Is "A" not available?\r
+       (f0.0) and.z.f0.0 (1) NULLREG   REG_FIELD_MACROBLOCK_FLAG       FIELD_MACROBLOCK_FLAG   // Is current frame MB?\r
+       (f0.1) and.nz.f0.1 (1) NULLREG  REG_INTRA_PRED_8X8_BLK2_AVAIL_FLAG      INTRA_PRED_8X8_BLK2_AVAIL_FLAG  // Is "F" flag set?\r
+       (f0.0.allv) or (1)      REG_INTRA_PRED_AVAIL_FLAG_WORD<1>:w     REG_INTRA_PRED_AVAIL_FLAG_WORD<0;1,0>:w INTRA_PRED_LEFT_BH_AVAIL_FLAG   // Set "E" to 1 if all conditions meet\r
+\r
+    CALL(scoreboard_start_intra,1)\r
+       mov (1) REG_INTRA_PRED_AVAIL_FLAG_WORD<1>:w     REG_INTRA_TEMP_0<0;1,0>:w               // Restore original Intra_Pred_Avail_Flag\r
+#endif\r
+\r
+#ifdef SW_SCOREBOARD    \r
+       wait    n0:ud           //      Now wait for scoreboard to response\r
+#endif\r
+\r
+//\r
+//  Decode Y blocks\r
+//\r
+//     Load reference data from neighboring macroblocks\r
+    CALL(load_Intra_Ref_Y,1)\r
+\r
+       mov     (1)     PERROR<1>:w     ERRBUF*GRFWIB:w                 // Pointer to macroblock error data\r
+       mov     (1)     PDECBUF_UD<1>:ud        0x00010001*PREDBUF*GRFWIB+0x00100000:ud // Pointers to predicted data\r
+       shr (2) REG_INTRA_PRED_AVAIL<1>:w       REG_INTRA_PRED_AVAIL_FLAG_BYTE<0;1,0>:ub        0x40:v\r
+       \r
+#if 1\r
+       mov (4) REF_LEFT_D(0,0)<1>      0:ud            // This is to make validation easier. Without it, DRAM mismatch occurs.\r
+#endif\r
+\r
+//     Intra predict Intra_8x8 luma blocks\r
+//\r
+//     Sub-macroblock 0 *****************\r
+       mov     (16)    REF_TOP_W(0)<1> REG_INTRA_REF_TOP<16;16,1>:w            // Copy entire top reference data\r
+       and.nz.f0.0 (8) NULLREG         REG_INTRA_PRED_AVAIL_FLAG       INTRA_PRED_UP_LEFT_AVAIL_FLAG   // Is "D" available?\r
+       (-f0.0) mov (1) REF_TOP(0,-1)<1>        INTRA_REF_TOP(0)REGION(1,0)             // p[-1,-1] = p[0,-1]\r
+\r
+       mov     (8)             REF_LEFT(0,2)<1>        INTRA_REF_LEFT(0)       // Left reference data, (leave 2 for reference filtering)\r
+       (-f0.0) mov (1)         REF_LEFT(0,1)<1>        INTRA_REF_LEFT(0)REGION(1,0)                    // p[-1,-1]=p[-1,0]\r
+       (f0.0.any2h)  mov (2)           REF_LEFT(0,0)<1>        INTRA_REF_TOP(0,-1)REGION(1,0)  // p'[-1,y] (y=0,1) = p[-1,-1]\r
+       and.nz.f0.1 (1) NULLREG         REG_INTRA_PRED_AVAIL_FLAG       INTRA_PRED_UP_AVAIL_FLAG        // Is "B" available?\r
+       (f0.1) mov      (1)             REF_LEFT(0,0)<1>        INTRA_REF_TOP(0,0)REGION(1,0)   // p[0,-1] for left filtering\r
+       and.nz.f0.1 (1) NULLREG         REG_INTRA_PRED_AVAIL_FLAG       INTRA_PRED_LEFT_TH_AVAIL_FLAG   // Is "A" available?\r
+       (-f0.1) mov     (1)             REF_LEFT(0,2)<1>        INTRA_REF_TOP(0,-1)REGION(1,0)  // p'[-1,2] = p[-1,-1]\r
+\r
+       and     (1)             PRED_MODE<1>:w  INTRA_PRED_MODE(0)REGION(1,0)   0x0F:w          // Intra pred mode for current block\r
+       mov (1)         INTRA_PRED_AVAIL<1>:w   REG_INTRA_PRED_AVAIL<0;1,0>:w           // Top/Left neighbor available flags\r
+       CALL(intra_Pred_8x8_Y,1)\r
+    add (1)            PERROR<1>:w     PERROR<0;1,0>:w 0x0080:w        // Pointers to next error block\r
+\r
+//     Sub-macroblock 1 *****************\r
+       mov     (16)    REF_TOP0(0)<1>  INTRA_REF_TOP(0,4)      // Top reference data\r
+       and.nz.f0.1 (8) NULLREG         REG_INTRA_PRED_AVAIL_FLAG       INTRA_PRED_UP_RIGHT_AVAIL_FLAG  // Is "C" available?\r
+       (f0.1.any8h)  mov (8)   REF_TOP(0,8)<1> INTRA_REF_TOP(0,16)<8;8,1>              // Take data from "C"\r
+       (-f0.1.any8h) mov (8)   REF_TOP(0,8)<1> INTRA_REF_TOP(0,15)REGION(1,0)  // Repeat last pixel from "B"\r
+\r
+       mov     (4)             REF_LEFT(0,2)<1>        DEC_Y(0,14)<16;1,0>             // Left reference data (top half) (leave 2 for reference filtering)\r
+       mov     (4)             REF_LEFT(0,6)<1>        DEC_Y(2,14)<16;1,0>             // Left reference data (bottom half)\r
+       mov     (2)             REF_LEFT(0,0)<1>        INTRA_REF_TOP(0,7)REGION(1,0)           // p'[-1,y] (y=0,1) = p[-1,-1]\r
+       and.nz.f0.1 (1) NULLREG         REG_INTRA_PRED_AVAIL_FLAG       INTRA_PRED_UP_AVAIL_FLAG        // Is "B" available?\r
+       (f0.1)  mov     (1)             REF_LEFT(0,0)<1>        INTRA_REF_TOP(0,8)REGION(1,0)   // p[-1,-1] for left filtering\r
+       (-f0.1) mov     (1)             REF_LEFT(0,1)<1>        DEC_Y(0,14)REGION(1,0)  // p[-1,-1] = p[-1,0]\r
+\r
+       shr     (1)             PRED_MODE<1>:w  INTRA_PRED_MODE(0)REGION(1,0)   4:w             // Intra pred mode for current block\r
+       add     (2)             PPREDBUF_Y<1>:w PPREDBUF_Y<2;2,1>:w     4*GRFWIB:w                      // Pointer to predicted sub-macroblock 1\r
+       or (1)          INTRA_PRED_AVAIL<1>:w   REG_INTRA_PRED_AVAIL<0;1,0>:w   1:w             // Left neighbor is available\r
+       CALL(intra_Pred_8x8_Y,1)\r
+    add (1)            PERROR<1>:w     PERROR<0;1,0>:w 0x0080:w        // Pointers to next error block\r
+\r
+//     Pack constructed data from word-aligned to byte-aligned format and interlace Y0 and Y1(every two Y rows)\r
+//     to speed up save_8x8_Y module later\r
+//     PPREDBUF_Y now points to sub-macroblock Y1\r
+       mov (32)        r[PPREDBUF_Y,-PREDBLK1]<1>:ub           DEC_Y(0)<32;16,2> {Compr}       // First 4 Y0 rows\r
+       mov (32)        r[PPREDBUF_Y,0-PREDBLK1+32]<1>:ub       DEC_Y(4)<32;16,2> {Compr}       // First 4 Y1 rows\r
+       mov (32)        r[PPREDBUF_Y,0-PREDBLK1+64]<1>:ub       DEC_Y(2)<32;16,2> {Compr}       // Second 4 Y0 rows\r
+       mov (32)        r[PPREDBUF_Y,0-PREDBLK1+96]<1>:ub       DEC_Y(6)<32;16,2> {Compr}       // Second 4 Y1 rows\r
+\r
+//     Sub-macroblock 2 *****************\r
+//     Intra_8x8 special available flag handling\r
+       and.nz.f0.0 (1) NULLREG REG_MBAFF_PIC   MBAFF_PIC       // Is current MBAFF picture\r
+       and.z.f0.1 (1) NULLREG  REG_INTRA_PRED_AVAIL_FLAG       INTRA_PRED_LEFT_TH_AVAIL_FLAG   // Is "A" not available?\r
+       (f0.0) and.z.f0.0 (1) NULLREG   REG_FIELD_MACROBLOCK_FLAG       FIELD_MACROBLOCK_FLAG   // Is current frame MB?\r
+       (f0.1) and.nz.f0.1 (1) NULLREG  REG_INTRA_PRED_8X8_BLK2_AVAIL_FLAG      INTRA_PRED_8X8_BLK2_AVAIL_FLAG  // Is special intra_8x8 available flag set?\r
+       (f0.0.allv) mov (1)     REF_TOP(0,-1)<1>        INTRA_REF_LEFT0(0,31)REGION(1,0)        // Top-left reference data\r
+       (f0.0.allv) jmpi (1)    INTRA_8x8_BLK2\r
+//     Done intra_8x8 special available flag handling\r
+\r
+       and.nz.f0.0 (8) NULLREG REG_INTRA_PRED_AVAIL_FLAG       INTRA_PRED_LEFT_TH_AVAIL_FLAG   // Is top-half "A" available?\r
+       (f0.0.any4h) mov (4)    REF_TOP0(0)<1>          INTRA_REF_LEFT0(0,28)REGION(4,1)        // Top-left reference data\r
+       (-f0.0) mov (1) REF_TOP(0,-1)<1>        DEC_Y(2,24)REGION(1,0)  // p[-1,-1] = p[0,-1]\r
+INTRA_8x8_BLK2:\r
+       mov     (8)             REF_TOP(0)<1>           DEC_Y(2,24)REGION(8,1)          // Top reference data\r
+       mov     (8)             REF_TOP(0,8)<1>         DEC_Y(3,24)REGION(8,1)          // Top reference data\r
+\r
+       mov     (8)             REF_LEFT(0,2)<1>        INTRA_REF_LEFT(1)                       // Left reference data,  (leave 2 for reference filtering)\r
+       mov (1)         REF_LEFT(0,0)<1>        DEC_Y(2,24)REGION(1,0)          // p'[-1,0] = p[0,-1] since "B" is always available\r
+       (f0.0) mov      (1)     REF_LEFT(0,1)<1>        INTRA_REF_LEFT(0,28)REGION(1,0) // p[-1,1] = p[-1,-1] if top-half "A" available\r
+       (-f0.0) mov (1) REF_LEFT(0,1)<1>        INTRA_REF_LEFT(1)REGION(1,0)    // p[-1,1] = p[-1,0]\r
+       and.nz.f0.1 (1) NULLREG REG_INTRA_PRED_AVAIL_FLAG       INTRA_PRED_LEFT_BH_AVAIL_FLAG   // Is bottom-half "A" available?\r
+       (-f0.1) mov     (1)     REF_LEFT(0,2)<1>        INTRA_REF_LEFT(0,28)REGION(1,0) // p'[-1,2] = p[-1,-1]\r
+\r
+       and     (1)             PRED_MODE<1>:w                  INTRA_PRED_MODE(0,1)REGION(1,0) 0x0F:w  // Intra pred mode for current block\r
+       or (1)          INTRA_PRED_AVAIL<1>:w   REG_INTRA_PRED_AVAIL.1<0;1,0>:w 2:w             // Top neighbor is available\r
+       CALL(intra_Pred_8x8_Y,1)\r
+    add (1)            PERROR<1>:w     PERROR<0;1,0>:w 0x0080:w        // Pointers to next error block\r
+\r
+//     Sub-macroblock 3 *****************\r
+       mov     (4)             REF_TOP0(0)<1>          DEC_Y(2,28)REGION(4,1)          // Top-left reference data\r
+       mov     (8)             REF_TOP(0)<1>           DEC_Y(3,24)REGION(8,1)          // Top reference data\r
+       mov     (16)    REF_TOP(0,8)<1>         DEC_Y(3,31)REGION(1,0)          // Top-right reference data\r
+\r
+       mov (4)         REF_LEFT(0,2)<1>        DEC_Y(4,14)<16;1,0>             // Left reference data (top half) (leave 2 for reference filtering)\r
+       mov (4)         REF_LEFT(0,6)<1>        DEC_Y(6,14)<16;1,0>             // Left reference data (bottom half)\r
+       mov (1)         REF_LEFT(0,0)<1>        DEC_Y(3,24)REGION(1,0)  // p[-1,0] = p[0,-1]\r
+       mov (1)         REF_LEFT(0,1)<1>        DEC_Y(2,31)REGION(1,0)  // p[-1,1] = p[-1,-1]\r
+\r
+       shr     (1)             PRED_MODE<1>:w  INTRA_PRED_MODE(0,1)REGION(1,0) 4:w             // Intra pred mode for current block\r
+       add     (2)             PPREDBUF_Y<1>:w PPREDBUF_Y<2;2,1>:w     4*GRFWIB:w      // Pointer to predicted sub-macroblock 3\r
+       or (1)          INTRA_PRED_AVAIL<1>:w   REG_INTRA_PRED_AVAIL<0;1,0>:w   3:w             // Top and Left neighbor are available\r
+       CALL(intra_Pred_8x8_Y,1)\r
+\r
+//     Pack constructed data from word-aligned to byte-aligned format\r
+//     to speed up save_8x8_Y module later\r
+//     PPREDBUF_Y now points to sub-macroblock Y1\r
+       mov (32)        r[PPREDBUF_Y,-PREDBLK1]<1>:ub           DEC_Y(4)<32;16,2> {Compr}       // First 4 Y2 rows\r
+       mov (32)        r[PPREDBUF_Y,0-PREDBLK1+32]<1>:ub       DEC_Y(8)<32;16,2> {Compr}       // First 4 Y3 rows\r
+       mov (32)        r[PPREDBUF_Y,0-PREDBLK1+64]<1>:ub       DEC_Y(6)<32;16,2> {Compr}       // Second 4 Y2 rows\r
+       mov (32)        r[PPREDBUF_Y,0-PREDBLK1+96]<1>:ub       DEC_Y(10)<32;16,2> {Compr}      // Second 4 Y3 rows\r
+\r
+//     All 4 sub-macroblock (containing 4 intra_8x8 blocks) have be constructed\r
+//     Save constructed Y picture\r
+       CALL(save_8x8_Y,1)              // Save Intra_8x8 predicted luma data.\r
+//\r
+//  Decode U/V blocks\r
+//\r
+//     Note: The decoding for chroma blocks will be the same for all intra prediction mode\r
+//\r
+       CALL(decode_Chroma_Intra,1)\r
+\r
+#ifdef SW_SCOREBOARD\r
+    #include "scoreboard_update.asm"\r
+#endif\r
+\r
+// Terminate the thread\r
+//\r
+    #include "EndIntraThread.asm"\r
+\r
+// End of Intra_8x8\r
diff --git a/i965_drv_video/shaders/h264/mc/Intra_PCM.asm b/i965_drv_video/shaders/h264/mc/Intra_PCM.asm
new file mode 100644 (file)
index 0000000..6bc81af
--- /dev/null
@@ -0,0 +1,56 @@
+/*\r
+ * Decode Intra_PCM macroblock\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Kernel name: Intra_PCM.asm\r
+//\r
+// Decoding of I_PCM macroblock\r
+//\r
+//  $Revision: 8 $\r
+//  $Date: 10/18/06 4:10p $\r
+//\r
+\r
+// ----------------------------------------------------\r
+//  Main: Intra_PCM\r
+// ----------------------------------------------------\r
+\r
+.kernel Intra_PCM\r
+INTRA_PCM:\r
+\r
+#ifdef _DEBUG\r
+// WA for FULSIM so we'll know which kernel is being debugged\r
+mov (1) acc0:ud 0x03aa55a5:ud\r
+#endif\r
+\r
+#include "SetupForHWMC.asm"\r
+\r
+// Not actually needed here but just want to slow down the Intra-PCM to avoid race condition\r
+//\r
+#ifdef SW_SCOREBOARD\r
+       and (1)   REG_INTRA_PRED_AVAIL_FLAG_WORD<1>:w   REG_INTRA_PRED_AVAIL_FLAG_WORD<0;1,0>:w 0xffe0:w        // Ensure all neighbor avail flags are "0"\r
+    CALL(scoreboard_start_intra,1)\r
+       wait    n0:ud           //      Now wait for scoreboard to response\r
+#endif\r
+\r
+//\r
+//  Decoding Y blocks\r
+//\r
+//     In I_PCM mode, the samples are already arranged in raster scan order within the macroblock.\r
+//     We just need to save them to picture buffers\r
+//\r
+    #include "save_I_PCM.asm"              // Save to destination picture buffers\r
+\r
+#ifdef SW_SCOREBOARD    \r
+    #include "scoreboard_update.asm"\r
+#endif\r
+\r
+// Terminate the thread\r
+//\r
+    #include "EndIntraThread.asm"\r
+\r
+// End of Intra_PCM\r
diff --git a/i965_drv_video/shaders/h264/mc/Intra_funcLib.asm b/i965_drv_video/shaders/h264/mc/Intra_funcLib.asm
new file mode 100644 (file)
index 0000000..9644f7f
--- /dev/null
@@ -0,0 +1,42 @@
+/*\r
+ * Library of common modules shared among different intra prediction kernels\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Module name: Intra_funcLib.asm\r
+//\r
+// Library of common modules shared among different intra prediction kernels\r
+//\r
+//  Note: Any sub-modules, if they are #included in more than one kernel,\r
+//       should be moved to this module.\r
+//\r
+#if defined(INTRA_16X16)\r
+#undef INTRA_16X16\r
+    #include "load_Intra_Ref_Y.asm"            // Load intra Y reference data\r
+    #include "Decode_Chroma_Intra.asm" // Decode chroma blocks\r
+    #include "save_16x16_Y.asm"                        // Save to destination Y frame surface\r
+#elif defined(INTRA_8X8)\r
+#undef INTRA_8X8\r
+    #include "load_Intra_Ref_Y.asm"            // Load intra Y reference data\r
+    #include "Decode_Chroma_Intra.asm" // Decode chroma blocks\r
+    #include "intra_Pred_8x8_Y.asm"            // Intra predict Intra_4x4 blocks\r
+    #include "save_8x8_Y.asm"                  // Save to destination Y frame surface\r
+#elif defined(INTRA_4X4)\r
+#undef INTRA_4X4\r
+    #include "load_Intra_Ref_Y.asm"            // Load intra Y reference data\r
+    #include "Decode_Chroma_Intra.asm" // Decode chroma blocks\r
+    #include "intra_Pred_4x4_Y_4.asm"  // Intra predict Intra_4x4 blocks\r
+    #include "save_4x4_Y.asm"                  // Save to destination Y frame surface\r
+#else                                                          // For all merged kernels\r
+#endif\r
+\r
+#ifdef SW_SCOREBOARD    \r
+    #include "scoreboard_start_intra.asm"      // scorboard intra start function       \r
+    #include "scoreboard_start_inter.asm"      // scorboard inter start function       \r
+#endif // SW_SCOREBOARD\r
+\r
+// End of Intra_funcLib\r
diff --git a/i965_drv_video/shaders/h264/mc/Makefile.am b/i965_drv_video/shaders/h264/mc/Makefile.am
new file mode 100644 (file)
index 0000000..9f97eb0
--- /dev/null
@@ -0,0 +1,28 @@
+
+INTEL_G4I = 
+
+INTEL_G4A = null.g4a
+
+INTEL_G4B = null.g4b
+
+INTEL_G4B_GEN5 = null.g4b.gen5
+
+EXTRA_DIST = $(INTEL_G4I)      \
+            $(INTEL_G4A)       \
+            $(INTEL_G4B)       \
+            $(INTEL_G4B_GEN5)
+
+if HAVE_GEN4ASM
+
+SUFFIXES = .g4a .g4b
+.g4a.g4b:
+       m4 $*.g4a > $*.g4m && intel-gen4asm -o $@ $*.g4m && intel-gen4asm -g 5 -o $@.gen5 $*.g4m && rm $*.g4m
+
+$(INTEL_G4B): $(INTEL_G4I)
+
+BUILT_SOURCES= $(INTEL_G4B)
+
+clean-local:
+       -rm -f $(INTEL_G4B)
+       -rm -f $(INTEL_G4B_GEN5)
+endif    
diff --git a/i965_drv_video/shaders/h264/mc/Scoreboard_header.inc b/i965_drv_video/shaders/h264/mc/Scoreboard_header.inc
new file mode 100644 (file)
index 0000000..5e87275
--- /dev/null
@@ -0,0 +1,85 @@
+/*\r
+ * Common header file for both scoreboard and scoreboard_MBAFF kernels\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+#if !defined(__SCOREBOARD_HEADER__)    // Make sure this file is only included once\r
+#define __SCOREBOARD_HEADER__\r
+\r
+// Module name: scoreboard_header.inc\r
+//\r
+// Common header file for both scoreboard and scoreboard_MBAFF kernels\r
+//\r
+\r
+#define ONE_MB_WA                                              // Enable WA for 1-MB wide pictures. To disable WA, simply comment out this line.\r
+\r
+#define        INLINE_REG_OFF  1\r
+#define INLINE_REG             r1\r
+#define INLINE_REG1            r2\r
+\r
+#define DONEFLAG       0x40            // Bit mask of "completed" thread flag\r
+\r
+// GRF r1 map\r
+//\r
+#define WIDTHINMB_1    INLINE_REG.0            // :uw type. Picture width in MB - 1\r
+#define HEIGHTINMB_1   INLINE_REG.1    // :uw type. Picture height in MB - 1\r
+#define TotalMB                INLINE_REG.2            // :uw type. Total number of macroblocks\r
+#define WFLen_B                INLINE_REG.3            // :uw type. Bottom MB Wavefront length (Reserved for MBAFF scoreboard)\r
+#define WFLen          INLINE_REG.4            // :uw type. Wavefront length (used as loop counter)\r
+#define WFLenY         INLINE_REG.5            // :uw type. Wavefront length (vertical component)\r
+#define StartX         INLINE_REG.6            // :uw type. Start X of current wavefront\r
+#define StartY         INLINE_REG.7            // :uw type. Start Y of current wavefront\r
+#define StartXD                INLINE_REG.3            // :ud type. Start (X,Y) of current wavefront\r
+#define        CASE00PTR       INLINE_REG.4            // :ud type. Pointer to "inter start" handler\r
+#define WFLen_Save     INLINE_REG.10           // :uw type. Saved Wavefront length (Reserved for MBAFF scoreboard)\r
+#define        CASE10PTR       INLINE_REG.6            // :ud type. Pointer to "intra start" handler\r
+#define        CASE11PTR       INLINE_REG.7            // :ud type. Pointer to "inter complete" handler\r
+\r
+// GRF r2 map\r
+//\r
+.declare    WFStart    Base=GRF(2) ElementSize=2 SrcRegion=REGION(4,1) Type=w  // Start MB of recent 4 wavefronts, actually use 5 WORDs\r
+.declare    WFStart_T  Base=GRF(2) ElementSize=2 SrcRegion=REGION(4,1) Type=w          // Start MB of recent 4 wavefronts\r
+.declare    WFStart_B  Base=GRF(2).4 ElementSize=2 SrcRegion=REGION(4,1) Type=w        // Start MB of recent 4 wavefronts\r
+\r
+#define NewWFOffsetD   INLINE_REG1.5   // :d type. Offsets used for new wavefront = 0x01ffff00 (0, -1, -1, 1)\r
+#define NewWFOffset    INLINE_REG1.20          // :b type. Offsets used for new wavefront = 0x01ffff00 (0, -1, -1, 1)\r
+\r
+#define AVAILFLAGD     INLINE_REG1.6           // :ud type. Neighbor available flags = 0x08020401 (in ACBD order)\r
+#define AVAILFLAG      INLINE_REG1.24          // :ub type. Neighbor available flags as above\r
+#define AVAILFLAG1D    INLINE_REG1.7           // :ud type. Top-half neighbor available flags = 0x80402010 (in A_Bxxx order)\r
+\r
+.declare    MBINDEX    Base=GRF(3) ElementSize=2 SrcRegion=REGION(16,1) Type=w // MB order # of current MB group (Cur, ACBD and AC_B_D_)\r
+#define AR_SAVE                r3.8    // :uw type. Saved Address Register information\r
+\r
+#define        CMDPTR          a0.0    // :uw type. DWORD Pointer to the scoreboard\r
+#define DEPPTR         a0.0    // :uw type. Pointer to the dependency scoreboard - Current MB\r
+#define DEPPTRL                a0.1    // :uw type. Pointer to the dependency scoreboard - Left MB\r
+#define DEPPTRTR       a0.2    // :uw type. Pointer to the dependency scoreboard - Top right MB\r
+#define DEPPTRT                a0.3    // :uw type. Pointer to the dependency scoreboard - Top MB\r
+#define DEPPTRTL    a0.4    // :uw type. Pointer to the dependency scoreboard - Top left MB\r
+#define DEPPTRLB       a0.5    // :uw type. Pointer to the dependency scoreboard - Left bottom-half MB\r
+\r
+#define        PMSGSEL         a0.7    // :uw type. Pointer to current message in message handling table\r
+\r
+#define        CMD_SB_REG_OFF          4\r
+.declare    CMD_SB             Base=GRF(4) ElementSize=4 SrcRegion=REGION(8,1) Type=ud // Command scoreboard (64 GRF)\r
+\r
+#ifdef AS_ENABLED\r
+//     Definitions for Advanced Scheduler support\r
+#define AS_INT         BIT23   // "Preemption Exception Status" bit in cr0.1:ud control register\r
+#define AS_INT_EN      BIT10   // "Preemption Exception Enable" bit in cr0.1:ud control register\r
+#define TH_INT         BIT2    // "Thread Interrupted" bit in message descriptor\r
+#define TH_RES         BIT0    // "Thread Restart Enable" bit in R0 header r0.2\r
+\r
+#define AS_SAVE                34              // Surface state for saving scoreboard contents\r
+                                                       // Ensure not to conflict with existing binding table entries\r
+#endif // End AS_ENABLED\r
+\r
+// End of scoreboard_header\r
+\r
+#endif // !defined(__SCOREBOARD_HEADER__)\r
+\r
diff --git a/i965_drv_video/shaders/h264/mc/SetHWScoreboard.asm b/i965_drv_video/shaders/h264/mc/SetHWScoreboard.asm
new file mode 100644 (file)
index 0000000..c2da855
--- /dev/null
@@ -0,0 +1,209 @@
+/*\r
+ * Set dependency control HW scoreboard kernel\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Kernel name: SetHWScoreboard.asm\r
+//\r
+// Set dependency control HW scoreboard kernel\r
+//\r
+\r
+// ----------------------------------------------------\r
+//  Main: SetHWScoreboard\r
+// ----------------------------------------------------\r
+\r
+.kernel SetHWScoreboard\r
+\r
+SETHWSCOREBOARD:\r
+\r
+#ifdef _DEBUG\r
+// WA for FULSIM so we'll know which kernel is being debugged\r
+mov (1) acc0:ud 0xf0aa55a5:ud\r
+#endif\r
+\r
+#include "header.inc"\r
+#include "SetHWScoreboard_header.inc"\r
+\r
+//\r
+//  Now, begin source code....\r
+//\r
+\r
+.code\r
+\r
+//     Separate the TotalMB so TotalMB will be multiple of 8\r
+//     and RemainderMB will hold the TotalMB%8\r
+//\r
+       and.z.f0.1 (1)  RemainderMB<1>:uw       TotalMB<0;1,0>:uw       0x0007:uw       // number of %8 commands\r
+       and.z.f0.0 (1)  TotalMB<1>:uw           TotalMB<0;1,0>:uw       0xfff8:uw       // Number of 8-command blocks\r
+\r
+       mov (1) MB_SHIFT_MASK_W<1>:uw           0x100*16+12:w                                   // Set up shift values (12, 16)\r
+\r
+//     Initialize common DAP read header\r
+//\r
+       mov (8) MRF_READ_HEADER_SRC<1>:ud       r0.0<8;8,1>:ud\r
+       shl (1) MRF_READ_HEADER_SRC.2<1>:ud     StartingMB<0;1,0>:uw    6:uw    // Byte-aligned offset being read\r
+\r
+//     Initialize Inter DAP write header\r
+       mov (8) MRF_INTER_WRITE_HEADER<1>:ud    r0.0<8;8,1>:ud\r
+\r
+       (f0.0) jmpi (1) SetHWScoreboard_Remainder                                                       // Jump if TotalMB < 8\r
+\r
+//------------------------------------------------------------------------\r
+//     Command buffer parsing loop\r
+//     Each loop will handle 8 commands\r
+//------------------------------------------------------------------------\r
+//\r
+SetHWScoreboard_Loop:\r
+//     Load block 0 (Commands 0/1)\r
+       mov (8) MRF_READ_HEADER0.0<1>:ud        MRF_READ_HEADER_SRC.0<8;8,1>:ud\r
+       send (16)       CMD_BUFFER_W(0)<1>      MRF_READ_HEADER0        null:uw DAPREAD RESP_LEN(4)+OWBRMSGDSC_SC+OWORD_8+BI_CMD_BUFFER\r
+\r
+//     Load block 1  (Commands 2/3)\r
+       mov (8) MRF_READ_HEADER1.0<1>:ud        MRF_READ_HEADER_SRC.0<8;8,1>:ud\r
+       add     (1)     MRF_READ_HEADER1.2<1>:ud        MRF_READ_HEADER_SRC.2<0;1,0>:ud         128:ud          // Point to next 2-command block\r
+       send (16)       CMD_BUFFER_W(4)<1>      MRF_READ_HEADER1        null:uw DAPREAD RESP_LEN(4)+OWBRMSGDSC_SC+OWORD_8+BI_CMD_BUFFER\r
+\r
+//     Load block 2  (Commands 4/5)\r
+       mov (8) MRF_READ_HEADER2.0<1>:ud        MRF_READ_HEADER_SRC.0<8;8,1>:ud\r
+       add     (1)     MRF_READ_HEADER2.2<1>:ud        MRF_READ_HEADER_SRC.2<0;1,0>:ud         256:ud          // Point to next 2-command block\r
+       send (16)       CMD_BUFFER_W(8)<1>      MRF_READ_HEADER2        null:uw DAPREAD RESP_LEN(4)+OWBRMSGDSC_SC+OWORD_8+BI_CMD_BUFFER\r
+\r
+//     Load block 3  (Commands 6/7)\r
+       mov (8) MRF_READ_HEADER3.0<1>:ud        MRF_READ_HEADER_SRC.0<8;8,1>:ud\r
+       add     (1)     MRF_READ_HEADER3.2<1>:ud        MRF_READ_HEADER_SRC.2<0;1,0>:ud         384:ud          // Point to next 2-command block\r
+       send (16)       CMD_BUFFER_W(12)<1>     MRF_READ_HEADER3        null:uw DAPREAD RESP_LEN(4)+OWBRMSGDSC_SC+OWORD_8+BI_CMD_BUFFER\r
+\r
+//     Start parsing commands\r
+    $for(0; <16; 2) {\r
+       and.nz.f0.1 (8) NULLREG CMD_BUFFER_D(%1,4)<0;1,0>       IS_INTRA_MB:ud          // Is it an "Intra" MB?\r
+       or (1)  CMD_BUFFER_D(%1,2)<1>   CMD_BUFFER_D(%1,2)<0;1,0>       BIT21:ud        // Set "Use Scoreboard" for every MB\r
+       shl     (2)     CMD_BUFFER_W(%1,2)<1>   CMD_BUFFER_W(%1,14)<0;1,0>      MB_SHIFT_MASK_B<2;2,1>:b        // Set HW SB masks\r
+       mov (2) CMD_BUFFER_B(%1,4)<2>   CMD_BUFFER_B(%1,20)<2;2,1>                              // Set scoreboard (X,Y) for intra MB\r
+       (-f0.1) mov (2) CMD_BUFFER_W(%1,2)<1>   CMD_BUFFER_B(%1,20)<2;2,1>              // Set scoreboard (X,Y) for inter MB\r
+       (f0.1) jmpi (1) Parse_8_Loop_%1\r
+\r
+//     Inter Macroblock\r
+//     Output MEDIA_OBJECT command in raster scan order\r
+       mul (16) acc0<1>:uw     CMD_BUFFER_B(%1,21)<0;1,0>      PicWidthMB<0;1,0>:uw    // MB offset = Y*W\r
+       add (16) acc0<1>:uw     acc0<8;8,1>:uw  CMD_BUFFER_B(%1,20)<0;1,0>                      // MB offset = Y*W+X\r
+       shl (1) MRF_INTER_WRITE_HEADER.2<1>:ud  acc0.2<0;1,0>:uw        6:uw            // Byte-aligned MB offset\r
+       mov (16)        MRF_INTER_WRITE_DATA0<1>:ud     CMD_BUFFER_D(%1)<8;8,1> {Compr} // Copy entire command to inter buffer\r
+       mov     (16)    CMD_BUFFER_D(%1)<1>             0:ud    {Compr}                                         // Clear original command\r
+       send (16)       NULLREGW        MRF_INTER_WRITE_HEADER  null:uw DAPWRITE        MSG_LEN(2)+OWBWMSGDSC+OWORD_4+BI_CMD_BUFFER\r
+\r
+Parse_8_Loop_%1:\r
+       }\r
+\r
+       add.z.f0.0 (1)  TotalMB<1>:w    TotalMB<0;1,0>:w        -8:w                            // Update remaining number of 8-command blocks\r
+\r
+//     Output modified intra commands\r
+//     Write block 0\r
+       mov (8) MRF_INTRA_WRITE_HEADER.0<1>:ud  MRF_READ_HEADER_SRC.0<8;8,1>:ud\r
+    $for(0; <4; 2) {\r
+       mov (16)        MRF_CMD_BUF_D(%1)<1>    CMD_BUFFER_D(%1)<8;8,1> {Compr}\r
+       }\r
+       send (16)       NULLREGW        MRF_INTRA_WRITE_HEADER  null:uw DAPWRITE        MSG_LEN(4)+OWBWMSGDSC+OWORD_8+BI_CMD_BUFFER\r
+\r
+//     Write block 1\r
+       mov (8) m1.0<1>:ud      MRF_READ_HEADER_SRC.0<8;8,1>:ud\r
+       add     (1)     m1.2<1>:ud      MRF_READ_HEADER_SRC.2<0;1,0>:ud         128:ud          // Point to next 2-command block\r
+       mov (16)        m2<1>:ud        CMD_BUFFER_D(4)<8;8,1>  {Compr}\r
+       mov (16)        m4<1>:ud        CMD_BUFFER_D(6)<8;8,1>  {Compr}\r
+       send (16)       NULLREGW        m1      null:uw DAPWRITE        MSG_LEN(4)+OWBWMSGDSC+OWORD_8+BI_CMD_BUFFER\r
+\r
+//     Write block 2\r
+       add     (1)     MRF_INTRA_WRITE_HEADER.2<1>:ud  MRF_READ_HEADER_SRC.2<0;1,0>:ud         256:ud          // Point to next 2-command block\r
+    $for(0; <4; 2) {\r
+       mov (16)        MRF_CMD_BUF_D(%1)<1>    CMD_BUFFER_D(%1+8)<8;8,1>       {Compr}\r
+       }\r
+       send (16)       NULLREGW        MRF_INTRA_WRITE_HEADER  null:uw DAPWRITE        MSG_LEN(4)+OWBWMSGDSC+OWORD_8+BI_CMD_BUFFER\r
+\r
+//     Write block 3\r
+       add     (1)     m1.2<1>:ud      MRF_READ_HEADER_SRC.2<0;1,0>:ud         384:ud          // Point to next 2-command block\r
+       mov (16)        m2<1>:ud        CMD_BUFFER_D(12)<8;8,1> {Compr}\r
+       mov (16)        m4<1>:ud        CMD_BUFFER_D(14)<8;8,1> {Compr}\r
+       send (16)       NULLREGW        m1      null:uw DAPWRITE        MSG_LEN(4)+OWBWMSGDSC+OWORD_8+BI_CMD_BUFFER\r
+\r
+//     Update message header for next DAP read\r
+       add (1) MRF_READ_HEADER_SRC.2<1>:ud     MRF_READ_HEADER_SRC.2<0;1,0>:ud 512:ud  // Point to next block of 8-commands\r
+\r
+       cmp.z.f0.1 (1)  NULLREG RemainderMB<0;1,0>:w    0:uw    // Check if remainder MB = 0\r
+       (-f0.0) jmpi (1)        SetHWScoreboard_Loop                    // Continue if more command blocks remain\r
+\r
+SetHWScoreboard_Remainder:\r
+//     f0.1 should have been set to indicate if RemainderMB = 0\r
+//\r
+       (f0.1) jmpi (1) SetHWScoreboard_Done                            // Stop if all commands have been updated\r
+\r
+//     Blindly load next 8 commands anyway\r
+//\r
+//     Load block 0 (Commands 0/1)\r
+       mov (8) MRF_READ_HEADER0.0<1>:ud        MRF_READ_HEADER_SRC.0<8;8,1>:ud\r
+       send (16)       CMD_BUFFER_W(0)<1>      MRF_READ_HEADER0        null:uw DAPREAD RESP_LEN(4)+OWBRMSGDSC_SC+OWORD_8+BI_CMD_BUFFER\r
+\r
+//     Load block 1  (Commands 2/3)\r
+       mov (8) MRF_READ_HEADER1.0<1>:ud        MRF_READ_HEADER_SRC.0<8;8,1>:ud\r
+       add     (1)     MRF_READ_HEADER1.2<1>:ud        MRF_READ_HEADER_SRC.2<0;1,0>:ud         128:ud          // Point to next 2-command block\r
+       send (16)       CMD_BUFFER_W(4)<1>      MRF_READ_HEADER1        null:uw DAPREAD RESP_LEN(4)+OWBRMSGDSC_SC+OWORD_8+BI_CMD_BUFFER\r
+\r
+//     Load block 2  (Commands 4/5)\r
+       mov (8) MRF_READ_HEADER2.0<1>:ud        MRF_READ_HEADER_SRC.0<8;8,1>:ud\r
+       add     (1)     MRF_READ_HEADER2.2<1>:ud        MRF_READ_HEADER_SRC.2<0;1,0>:ud         256:ud          // Point to next 2-command block\r
+       send (16)       CMD_BUFFER_W(8)<1>      MRF_READ_HEADER2        null:uw DAPREAD RESP_LEN(4)+OWBRMSGDSC_SC+OWORD_8+BI_CMD_BUFFER\r
+\r
+//     Load block 3  (Commands 6/7)\r
+       mov (8) MRF_READ_HEADER3.0<1>:ud        MRF_READ_HEADER_SRC.0<8;8,1>:ud\r
+       add     (1)     MRF_READ_HEADER3.2<1>:ud        MRF_READ_HEADER_SRC.2<0;1,0>:ud         384:ud          // Point to next 2-command block\r
+       send (16)       CMD_BUFFER_W(12)<1>     MRF_READ_HEADER3        null:uw DAPREAD RESP_LEN(4)+OWBRMSGDSC_SC+OWORD_8+BI_CMD_BUFFER\r
+\r
+//     Initialize necessary pointers\r
+       mov (1) a0.1<1>:ud      ((CMD_BUFFER_REG_OFF+1)*0x10000+CMD_BUFFER_REG_OFF)*32  // a0.2:w points to command buffer (first half)\r
+                                                                                                                                                       // a0.3:w points to command buffer (second half)\r
+//     Initialize Inter DAP write header\r
+       mov (8) MRF_INTER_WRITE_HEADER<1>:ud    r0.0<8;8,1>:ud\r
+\r
+SetHWScoreboard_Remainder_Loop:\r
+       and.nz.f0.1 (8) NULLREG r[a0.2,4*4]<0;1,0>:ud   IS_INTRA_MB:ud          // Is it an "Intra" MB?\r
+       add.z.f0.0 (1)  RemainderMB<1>:w        RemainderMB<0;1,0>:w    -1:w    // Decrement MB #\r
+       or (1)  r[a0.2,2*4]<1>:ud       r[a0.2,2*4]<0;1,0>:ud   BIT21:ud                // Set "Use Scoreboard" for every MB\r
+       shl     (2)     r[a0.2,2*2]<1>:uw       r[a0.2,14*2]<0;1,0>:uw  MB_SHIFT_MASK_B<2;2,1>:b        // Set HW SB masks\r
+       mov (2) r[a0.2,4*1]<2>:ub       r[a0.2,5*4]<2;2,1>:ub                                   // Set scoreboard (X,Y) for intra MB\r
+\r
+       (-f0.1) mov (2) r[a0.2,4*1]<1>:uw       r[a0.2,5*4]<2;2,1>:ub                   // Set scoreboard (X,Y) for inter MB\r
+       (f0.1) jmpi (1) Output_Remainder_Intra\r
+\r
+//     Inter Macroblock\r
+//     Output MEDIA_OBJECT command in raster scan order\r
+       mul (16) acc0<1>:uw     r[a0.2,21]<0;1,0>:ub    PicWidthMB<0;1,0>:uw    // MB offset = Y*W\r
+       add (16) acc0<1>:uw     acc0<8;8,1>:uw  r[a0.2,20]<0;1,0>:ub                    // MB offset = Y*W+X\r
+       shl (1) MRF_INTER_WRITE_HEADER.2<1>:ud  acc0.2<0;1,0>:uw        6:uw    // Byte-aligned MB offset\r
+       mov (16)        MRF_INTER_WRITE_DATA0<1>:ud     r[a0.2]<8;8,1>:ud       {Compr} // Copy entire command to inter buffer\r
+       mov     (16)    r[a0.2]<1>:ud           0:ud    {Compr}                                         // Clear original command\r
+       send (16)       NULLREGW        MRF_INTER_WRITE_HEADER  null:uw DAPWRITE        MSG_LEN(2)+OWBWMSGDSC+OWORD_4+BI_CMD_BUFFER\r
+\r
+Output_Remainder_Intra:\r
+//     Intra MB command always output\r
+       mov (8) MRF_INTRA_WRITE_HEADER.0<1>:ud  MRF_READ_HEADER_SRC.0<8;8,1>:ud\r
+       mov (16)        MRF_CMD_BUF_D(0)<1>             r[a0.2]<8;8,1>:ud       {Compr}         // Copy entire command to intra buffer\r
+       send (16)       NULLREGW        MRF_INTRA_WRITE_HEADER  null:uw DAPWRITE        MSG_LEN(2)+OWBWMSGDSC+OWORD_4+BI_CMD_BUFFER\r
+\r
+       add     (1)     MRF_READ_HEADER_SRC.2<1>:ud             MRF_READ_HEADER_SRC.2<0;1,0>:ud         64:ud   // Point to next command\r
+       add (1) a0.1<1>:ud      a0.1<0;1,0>:ud  0x00400040:ud                                   // Update pointers\r
+       (-f0.0) jmpi (1)        SetHWScoreboard_Remainder_Loop\r
+\r
+// All MBs have been decoded. Terminate the thread now\r
+//\r
+SetHWScoreboard_Done:\r
+    END_THREAD\r
+\r
+#if !defined(COMBINED_KERNEL)          // For standalone kernel only\r
+.end_code\r
+\r
+.end_kernel\r
+#endif\r
+\r
+// End of SetHWScoreboard\r
diff --git a/i965_drv_video/shaders/h264/mc/SetHWScoreboard_MBAFF.asm b/i965_drv_video/shaders/h264/mc/SetHWScoreboard_MBAFF.asm
new file mode 100644 (file)
index 0000000..c5cfeb3
--- /dev/null
@@ -0,0 +1,279 @@
+/*\r
+ * Set dependency control HW scoreboard kernel for MBAFF picture\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Kernel name: SetHWScoreboard_MBAFF.asm\r
+//\r
+// Set dependency control HW scoreboard kernel for MBAFF picture\r
+//\r
+\r
+// ----------------------------------------------------\r
+//  Main: SetHWScoreboard_MBAFF\r
+// ----------------------------------------------------\r
+\r
+.kernel SetHWScoreboard_MBAFF\r
+\r
+SETHWSCOREBOARD_MBAFF:\r
+\r
+#ifdef _DEBUG\r
+// WA for FULSIM so we'll know which kernel is being debugged\r
+mov (1) acc0:ud 0xf1aa55a5:ud\r
+#endif\r
+\r
+#include "header.inc"\r
+#include "SetHWScoreboard_header.inc"\r
+\r
+//\r
+//  Now, begin source code....\r
+//\r
+\r
+.code\r
+\r
+//     Separate the TotalMB so TotalMB will be multiple of 8\r
+//     and RemainderMB will hold the TotalMB%8\r
+//\r
+       and.z.f0.1 (1)  RemainderMB<1>:uw       TotalMB<0;1,0>:uw       0x0007:uw       // number of %8 commands\r
+       and.z.f0.0 (1)  TotalMB<1>:uw           TotalMB<0;1,0>:uw       0xfff8:uw       // Number of 8-command blocks\r
+\r
+//     Initialize common DAP read header\r
+//\r
+       mov (8) MRF_READ_HEADER_SRC<1>:ud       r0.0<8;8,1>:ud\r
+       shl (1) MRF_READ_HEADER_SRC.2<1>:ud     StartingMB<0;1,0>:uw    6:uw    // Byte-aligned offset being read\r
+\r
+//     Initialize Inter DAP write header\r
+       mov (8) MRF_INTER_WRITE_HEADER<1>:ud    r0.0<8;8,1>:ud\r
+\r
+       (f0.0) jmpi (1) SetHWScoreboard_MBAFF_Remainder                                         // Jump if TatalMB < 8\r
+\r
+//------------------------------------------------------------------------\r
+//     Command buffer parsing loop\r
+//     Each loop will handle 8 commands\r
+//------------------------------------------------------------------------\r
+//\r
+SetHWScoreboard_MBAFF_Loop:\r
+//     Load block 0 (Commands 0/1)\r
+       mov (8) MRF_READ_HEADER0.0<1>:ud        MRF_READ_HEADER_SRC.0<8;8,1>:ud\r
+       send (16)       CMD_BUFFER_W(0)<1>      MRF_READ_HEADER0        null:uw DAPREAD RESP_LEN(4)+OWBRMSGDSC_SC+OWORD_8+BI_CMD_BUFFER\r
+\r
+//     Load block 1  (Commands 2/3)\r
+       mov (8) MRF_READ_HEADER1.0<1>:ud        MRF_READ_HEADER_SRC.0<8;8,1>:ud\r
+       add     (1)     MRF_READ_HEADER1.2<1>:ud        MRF_READ_HEADER_SRC.2<0;1,0>:ud         128:ud          // Point to next 2-command block\r
+       send (16)       CMD_BUFFER_W(4)<1>      MRF_READ_HEADER1        null:uw DAPREAD RESP_LEN(4)+OWBRMSGDSC_SC+OWORD_8+BI_CMD_BUFFER\r
+\r
+//     Load block 2  (Commands 4/5)\r
+       mov (8) MRF_READ_HEADER2.0<1>:ud        MRF_READ_HEADER_SRC.0<8;8,1>:ud\r
+       add     (1)     MRF_READ_HEADER2.2<1>:ud        MRF_READ_HEADER_SRC.2<0;1,0>:ud         256:ud          // Point to next 2-command block\r
+       send (16)       CMD_BUFFER_W(8)<1>      MRF_READ_HEADER2        null:uw DAPREAD RESP_LEN(4)+OWBRMSGDSC_SC+OWORD_8+BI_CMD_BUFFER\r
+\r
+//     Load block 3  (Commands 6/7)\r
+       mov (8) MRF_READ_HEADER3.0<1>:ud        MRF_READ_HEADER_SRC.0<8;8,1>:ud\r
+       add     (1)     MRF_READ_HEADER3.2<1>:ud        MRF_READ_HEADER_SRC.2<0;1,0>:ud         384:ud          // Point to next 2-command block\r
+       send (16)       CMD_BUFFER_W(12)<1>     MRF_READ_HEADER3        null:uw DAPREAD RESP_LEN(4)+OWBRMSGDSC_SC+OWORD_8+BI_CMD_BUFFER\r
+\r
+//     Start parsing commands\r
+    $for(0; <16; 2) {\r
+//     Adjust MB Y origin for field MBs\r
+//\r
+       mov (2) TEMP_FD_X_W<1>:uw       CMD_BUFFER_B(%1,20)<2;2,1>                                      // Initialize temp (X,Y) location\r
+       and.nz.f0.1 (8) NULLREG CMD_BUFFER_D(%1,4)<0;1,0>       IS_BOT_FD:ud            // Is it a "Bottom Field MB"?\r
+       and.nz.f0.0 (8) NULLREG CMD_BUFFER_D(%1,4)<0;1,0>       IS_FIELD_MB:ud          // Is it a "Field MB"?\r
+       mul (8) acc0<1>:w       CMD_BUFFER_B(%1,21)<0;1,0>      2:w\r
+       (-f0.1) mov (1) TEMP_FD_Y_W<1>:w        acc0<0;1,0>:w\r
+       (f0.1) add (1)  TEMP_FD_Y_W<1>:w        acc0<0;1,0>:w   1:w\r
+       (-f0.0) mov (1) TEMP_FD_Y_W<1>:w        CMD_BUFFER_B(%1,21)<0;1,0>                      // Discard field MB Y origin handling\r
+\r
+       and.nz.f0.0 (8) NULLREG CMD_BUFFER_D(%1,4)<0;1,0>       IS_INTRA_MB:ud          // Is it an "Intra" MB?\r
+       and.nz.f0.1     (8)     NULLREG TEMP_FD_Y_W<0;1,0>:uw   BIT0                                    // Is it "Bottom MB"?\r
+       or (1)  CMD_BUFFER_D(%1,2)<1>   CMD_BUFFER_D(%1,2)<0;1,0>       BIT21           // Set "Use Scoreboard"\r
+       mov (2) CMD_BUFFER_W(%1,2)<1>   TEMP_FD_X_W<2;2,1>:uw                                   // Set scoreboard (X,Y) for inter MB\r
+       (f0.0) jmpi (1) SET_SB_MBAFF_INTRA_%1                                                                   // Jump if intra MB.\r
+\r
+//     Inter Macroblock\r
+//     Output MEDIA_OBJECT command in raster scan order\r
+       mul (16) acc0<1>:uw     TEMP_FD_Y_W<0;1,0>:uw   PicWidthMB<0;1,0>:uw            // MB offset = Y*W\r
+       add (16) acc0<1>:uw     acc0<8;8,1>:uw                  TEMP_FD_X_W<0;1,0>:uw           // MB offset = Y*W+X\r
+       shl (1) MRF_INTER_WRITE_HEADER.2<1>:ud  acc0.2<0;1,0>:uw        6:uw            // Byte-aligned MB offset\r
+       mov (16)        MRF_INTER_WRITE_DATA0<1>:ud     CMD_BUFFER_D(%1)<8;8,1> {Compr} // Copy entire command to inter buffer\r
+       mov     (16)    CMD_BUFFER_D(%1)<1>             0:ud    {Compr}                                         // Clear original command\r
+       send (16)       NULLREGW        MRF_INTER_WRITE_HEADER  null:uw DAPWRITE        MSG_LEN(2)+OWBWMSGDSC+OWORD_4+BI_CMD_BUFFER\r
+       jmpi (1)        NEXT_MB_MBAFF_%1                        // Done for inter MB. Move to next MB.\r
+\r
+SET_SB_MBAFF_INTRA_%1:\r
+//     Intra MB\r
+//\r
+       and.nz.f0.0 (8) NULLREG CMD_BUFFER_D(%1,4)<0;1,0>       IS_FIELD_MB:ud          // Is it an "Field" MB?\r
+       (f0.1) sel (2)  MB_MASK_D<1>:ud         BOT_FD_MASK1_D<2;2,1>:ud        TOP_FD_MASK1_D<2;2,1>:ud        // Assume field MB\r
+       mov (1) TEMP_INTRA_FLAG_W<1>:uw         CMD_BUFFER_W(%1,14)<0;1,0>                      // Don't want to alter original in-line data\r
+       (f0.0) jmpi (1) SET_SB_MBAFF_%1                                 // Jump if it's really field MB\r
+\r
+//     Frame MB\r
+//\r
+//     Derive E'\r
+       and.nz.f0.0     (8)     NULLREG CMD_BUFFER_W(%1,14)<0;1,0>      E_FLAG          // Is "E" = 1\r
+       (f0.1) sel (2)  MB_MASK_D<1>:ud         BOT_FM_MASK1_D<2;2,1>:ud        TOP_FM_MASK1_D<2;2,1>:ud\r
+       and.z.f0.1 (8)  NULLREG CMD_BUFFER_W(%1,14)<0;1,0>      A_FLAG          // "A" = 0?\r
+       (f0.0) jmpi (1) SET_SB_MBAFF_%1                         // If "E" flag = 1, skip the rest of derivation\r
+       (f0.1) and.nz.f0.1 (8)  NULLREG CMD_BUFFER_D(%1,4)<0;1,0>       IS_INTRA8X8\r
+       (f0.1) and.nz.f0.1 (8)  NULLREG CMD_BUFFER_W(%1,14)<0;1,0>      F_FLAG\r
+       (f0.1) or (1)   TEMP_INTRA_FLAG_W<1>:uw CMD_BUFFER_W(%1,14)<0;1,0>      E_FLAG\r
+\r
+SET_SB_MBAFF_%1:\r
+       and.nz.f0.1     (16)    NULLREGW        TEMP_INTRA_FLAG_W<0;1,0>:uw     MB_MASK_B<0;8,1>:ub\r
+       shl     (1)     CMD_BUFFER_W(%1,2)<1>   f0.1<0;1,0>:uw  12:w            // Masks 0-3\r
+       and (1) CMD_BUFFER_W(%1,3)<1>   f0.1<0;1,0>:uw  0xf000:uw       // Masks 4-7\r
+\r
+       mov (2) CMD_BUFFER_B(%1,4)<2>   TEMP_FD_X_B<4;2,2>:ub           // Set scoreboard (X,Y) for intra MB\r
+\r
+NEXT_MB_MBAFF_%1:\r
+       }\r
+\r
+       add.z.f0.0 (1)  TotalMB<1>:w    TotalMB<0;1,0>:w        -8:w                            // Update remaining number of 8-command blocks\r
+\r
+//     Output modified intra commands\r
+//     Write block 0\r
+       mov (8) MRF_INTRA_WRITE_HEADER.0<1>:ud  MRF_READ_HEADER_SRC.0<8;8,1>:ud\r
+    $for(0; <4; 2) {\r
+       mov (16)        MRF_CMD_BUF_D(%1)<1>    CMD_BUFFER_D(%1)<8;8,1> {Compr}\r
+       }\r
+       send (16)       NULLREGW        MRF_INTRA_WRITE_HEADER  null:uw DAPWRITE        MSG_LEN(4)+OWBWMSGDSC+OWORD_8+BI_CMD_BUFFER\r
+\r
+//     Write block 1\r
+       mov (8) m1.0<1>:ud      MRF_READ_HEADER_SRC.0<8;8,1>:ud\r
+       add     (1)     m1.2<1>:ud      MRF_READ_HEADER_SRC.2<0;1,0>:ud         128:ud          // Point to next 2-command block\r
+       mov (16)        m2<1>:ud        CMD_BUFFER_D(4)<8;8,1>  {Compr}\r
+       mov (16)        m4<1>:ud        CMD_BUFFER_D(6)<8;8,1>  {Compr}\r
+       send (16)       NULLREGW        m1      null:uw DAPWRITE        MSG_LEN(4)+OWBWMSGDSC+OWORD_8+BI_CMD_BUFFER\r
+\r
+//     Write block 2\r
+       add     (1)     MRF_INTRA_WRITE_HEADER.2<1>:ud  MRF_READ_HEADER_SRC.2<0;1,0>:ud         256:ud          // Point to next 2-command block\r
+    $for(0; <4; 2) {\r
+       mov (16)        MRF_CMD_BUF_D(%1)<1>    CMD_BUFFER_D(%1+8)<8;8,1>       {Compr}\r
+       }\r
+       send (16)       NULLREGW        MRF_INTRA_WRITE_HEADER  null:uw DAPWRITE        MSG_LEN(4)+OWBWMSGDSC+OWORD_8+BI_CMD_BUFFER\r
+\r
+//     Write block 3\r
+       add     (1)     m1.2<1>:ud      MRF_READ_HEADER_SRC.2<0;1,0>:ud         384:ud          // Point to next 2-command block\r
+       mov (16)        m2<1>:ud        CMD_BUFFER_D(12)<8;8,1> {Compr}\r
+       mov (16)        m4<1>:ud        CMD_BUFFER_D(14)<8;8,1> {Compr}\r
+       send (16)       NULLREGW        m1      null:uw DAPWRITE        MSG_LEN(4)+OWBWMSGDSC+OWORD_8+BI_CMD_BUFFER\r
+\r
+//     Update message header for next DAP read\r
+       add (1) MRF_READ_HEADER_SRC.2<1>:ud     MRF_READ_HEADER_SRC.2<0;1,0>:ud 512:ud  // Point to next block of 8-commands\r
+\r
+       cmp.z.f0.1 (1)  NULLREG RemainderMB<0;1,0>:w    0:uw            // Check if remaining MB = 0\r
+       (-f0.0) jmpi (1)        SetHWScoreboard_MBAFF_Loop                      // Continue if more command blocks remain\r
+\r
+SetHWScoreboard_MBAFF_Remainder:\r
+//     f0.1 should have been set to indicate if RemainderMB = 0\r
+//\r
+       (f0.1) jmpi (1) SetHWScoreboard_MBAFF_Done                              // Stop if all commands have been updated\r
+\r
+//     Blindly load next 8 commands anyway\r
+//\r
+//     Load block 0 (Commands 0/1)\r
+       mov (8) MRF_READ_HEADER0.0<1>:ud        MRF_READ_HEADER_SRC.0<8;8,1>:ud\r
+       send (16)       CMD_BUFFER_W(0)<1>      MRF_READ_HEADER0        null:uw DAPREAD RESP_LEN(4)+OWBRMSGDSC_SC+OWORD_8+BI_CMD_BUFFER\r
+\r
+//     Load block 1  (Commands 2/3)\r
+       mov (8) MRF_READ_HEADER1.0<1>:ud        MRF_READ_HEADER_SRC.0<8;8,1>:ud\r
+       add     (1)     MRF_READ_HEADER1.2<1>:ud        MRF_READ_HEADER_SRC.2<0;1,0>:ud         128:ud          // Point to next 2-command block\r
+       send (16)       CMD_BUFFER_W(4)<1>      MRF_READ_HEADER1        null:uw DAPREAD RESP_LEN(4)+OWBRMSGDSC_SC+OWORD_8+BI_CMD_BUFFER\r
+\r
+//     Load block 2  (Commands 4/5)\r
+       mov (8) MRF_READ_HEADER2.0<1>:ud        MRF_READ_HEADER_SRC.0<8;8,1>:ud\r
+       add     (1)     MRF_READ_HEADER2.2<1>:ud        MRF_READ_HEADER_SRC.2<0;1,0>:ud         256:ud          // Point to next 2-command block\r
+       send (16)       CMD_BUFFER_W(8)<1>      MRF_READ_HEADER2        null:uw DAPREAD RESP_LEN(4)+OWBRMSGDSC_SC+OWORD_8+BI_CMD_BUFFER\r
+\r
+//     Load block 3  (Commands 6/7)\r
+       mov (8) MRF_READ_HEADER3.0<1>:ud        MRF_READ_HEADER_SRC.0<8;8,1>:ud\r
+       add     (1)     MRF_READ_HEADER3.2<1>:ud        MRF_READ_HEADER_SRC.2<0;1,0>:ud         384:ud          // Point to next 2-command block\r
+       send (16)       CMD_BUFFER_W(12)<1>     MRF_READ_HEADER3        null:uw DAPREAD RESP_LEN(4)+OWBRMSGDSC_SC+OWORD_8+BI_CMD_BUFFER\r
+\r
+//     Initialize necessary pointers\r
+       mov (1) a0.1<1>:ud      ((CMD_BUFFER_REG_OFF+1)*0x10000+CMD_BUFFER_REG_OFF)*32  // a0.2:w points to command buffer (first half)\r
+                                                                                                                                                       // a0.3:w points to command buffer (second half)\r
+//     Initialize Inter DAP write header\r
+       mov (8) MRF_INTER_WRITE_HEADER<1>:ud    r0.0<8;8,1>:ud\r
+\r
+SetHWScoreboard_MBAFF_Remainder_Loop:\r
+//     Adjust MB Y origin for field MBs\r
+//\r
+       mov (2) TEMP_FD_X_W<1>:uw       r[a0.2,5*4]<2;2,1>:ub                                   // Initialize temp (X,Y) location\r
+       and.nz.f0.1 (8) NULLREG r[a0.2,4*4]<0;1,0>:ud   IS_BOT_FD:ud            // Is it a "Bottom Field MB"?\r
+       and.nz.f0.0 (8) NULLREG r[a0.2,4*4]<0;1,0>:ud   IS_FIELD_MB:ud          // Is it a "Field MB"?\r
+       mul (8) acc0<1>:w       r[a0.2,21]<0;1,0>:ub    2:w\r
+       (-f0.1) mov (1) TEMP_FD_Y_W<1>:w        acc0<0;1,0>:w\r
+       (f0.1) add (1)  TEMP_FD_Y_W<1>:w        acc0<0;1,0>:w   1:w\r
+       (-f0.0) mov (1) TEMP_FD_Y_W<1>:w        r[a0.2,5*4+1]<0;1,0>:ub                 // Discard field MB Y origin handling\r
+\r
+       and.nz.f0.0 (8) NULLREG r[a0.2,4*4]<0;1,0>:ud   IS_INTRA_MB:ud          // Is it an "Intra" MB?\r
+       add.z.f0.1 (1)  RemainderMB<1>:w        RemainderMB<0;1,0>:w    -1:w    // Decrement MB #\r
+       or (1)  r[a0.2,2*4]<1>:ud       r[a0.2,2*4]<0;1,0>:ud   BIT21:ud                // Set "Use Scoreboard"\r
+       mov (2) r[a0.2,2*2]<1>:uw       TEMP_FD_X_W<2;2,1>:uw                                   // Set scoreboard (X,Y) for inter MB\r
+       (f0.0) jmpi (1) SET_SB_MBAFF_REM_INTRA                                                          // Jump if intra MB.\r
+\r
+//     Inter Macroblock\r
+//     Output MEDIA_OBJECT command in raster scan order\r
+       mul (16) acc0<1>:uw     TEMP_FD_Y_W<0;1,0>:uw   PicWidthMB<0;1,0>:uw    // MB offset = Y*W\r
+       add (16) acc0<1>:uw     acc0<8;8,1>:uw                  TEMP_FD_X_W<0;1,0>:uw   // MB offset = Y*W+X\r
+       shl (1) MRF_INTER_WRITE_HEADER.2<1>:ud  acc0.2<0;1,0>:uw        6:uw    // Byte-aligned MB offset\r
+       mov (16)        MRF_INTER_WRITE_DATA0<1>:ud     r[a0.2]<8;8,1>:ud {Compr}       // Copy entire command to inter buffer\r
+       mov     (16)    r[a0.2]<1>:ud           0:ud    {Compr}                                                 // Clear original command\r
+       send (16)       NULLREGW        MRF_INTER_WRITE_HEADER  null:uw DAPWRITE        MSG_LEN(2)+OWBWMSGDSC+OWORD_4+BI_CMD_BUFFER\r
+       jmpi (1)        Output_MBAFF_Remainder_Intra                                                    // Done for inter MB. Move to dump intra MB.\r
+\r
+SET_SB_MBAFF_REM_INTRA:\r
+//     Intra MB\r
+//\r
+       and.nz.f0.1     (8)     NULLREG TEMP_FD_Y_W<0;1,0>:uw   BIT0:ud                 // Is it "Bottom MB"?\r
+       and.nz.f0.0 (8) NULLREG r[a0.2,4*4]<0;1,0>:ud   IS_FIELD_MB:ud  // Is it "Field MB"?\r
+       mov (1) TEMP_INTRA_FLAG_W<1>:uw r[a0.2,14*2]<0;1,0>:uw                  // Don't want to alter original in-line data\r
+       (f0.1) sel (2)  MB_MASK_D<1>:ud         BOT_FD_MASK1_D<2;2,1>:ud        TOP_FD_MASK1_D<2;2,1>:ud        // Assume field MB\r
+       (f0.0) jmpi (1) SET_SB_MBAFF_REM                                        // Jump if it's really field MB\r
+\r
+//     Frame MB\r
+//\r
+//     Derive E'\r
+       and.nz.f0.0     (8)     NULLREG r[a0.2,14*2]<0;1,0>:uw  E_FLAG          // Is "E" = 1\r
+       (f0.1) sel (2)  MB_MASK_D<1>:ud         BOT_FM_MASK1_D<2;2,1>:ud        TOP_FM_MASK1_D<2;2,1>:ud\r
+       and.z.f0.1 (8)  NULLREG r[a0.2,14*2]<0;1,0>:uw  A_FLAG          // "A" = 0?\r
+       (f0.0) jmpi (1) SET_SB_MBAFF_REM                                // If "E" flag = 1, skip the rest of derivation\r
+       (f0.1) and.nz.f0.1 (8)  NULLREG r[a0.2,4*4]<0;1,0>:ud   IS_INTRA8X8\r
+       (f0.1) and.nz.f0.1 (8)  NULLREG r[a0.2,14*2]<0;1,0>:uw  F_FLAG\r
+       (f0.1) or (1)   TEMP_INTRA_FLAG_W<1>:uw r[a0.2,14*2]<0;1,0>:uw  E_FLAG\r
+\r
+SET_SB_MBAFF_REM:\r
+       and.nz.f0.0     (16)    NULLREGW        TEMP_INTRA_FLAG_W<0;1,0>:uw     MB_MASK_B<0;8,1>:ub\r
+       add.z.f0.1 (1)  RemainderMB<1>:w        RemainderMB<0;1,0>:w    0:w             // Check remaining MB #\r
+       shl     (1)     r[a0.2,2*2]<1>:uw       f0.0<0;1,0>:uw  12:w            // Masks 0-3\r
+       and (1) r[a0.2,3*2]<1>:uw       f0.0<0;1,0>:uw  0xf000:uw       // Masks 4-7\r
+\r
+       mov (2) r[a0.2,4*1]<2>:ub       TEMP_FD_X_B<4;2,2>:ub           // Set scoreboard (X,Y) for intra MB\r
+\r
+Output_MBAFF_Remainder_Intra:\r
+//     Intra MB command always output\r
+       mov (8) MRF_INTRA_WRITE_HEADER.0<1>:ud  MRF_READ_HEADER_SRC.0<8;8,1>:ud\r
+       mov (16)        MRF_CMD_BUF_D(0)<1>             r[a0.2]<8;8,1>:ud       {Compr}         // Copy entire command to intra buffer\r
+       send (16)       NULLREGW        MRF_INTRA_WRITE_HEADER  null:uw DAPWRITE        MSG_LEN(2)+OWBWMSGDSC+OWORD_4+BI_CMD_BUFFER\r
+\r
+       add     (1)     MRF_READ_HEADER_SRC.2<1>:ud             MRF_READ_HEADER_SRC.2<0;1,0>:ud         64:ud   // Point to next command\r
+       add (1) a0.1<1>:ud      a0.1<0;1,0>:ud  0x00400040:ud                                   // Update pointers\r
+       (-f0.1) jmpi (1)        SetHWScoreboard_MBAFF_Remainder_Loop\r
+\r
+// All MBs have been decoded. Terminate the thread now\r
+//\r
+SetHWScoreboard_MBAFF_Done:\r
+    END_THREAD\r
+\r
+#if !defined(COMBINED_KERNEL)          // For standalone kernel only\r
+.end_code\r
+\r
+.end_kernel\r
+#endif\r
+\r
+// End of SetHWScoreboard_MBAFF\r
diff --git a/i965_drv_video/shaders/h264/mc/SetHWScoreboard_header.inc b/i965_drv_video/shaders/h264/mc/SetHWScoreboard_header.inc
new file mode 100644 (file)
index 0000000..1df91f9
--- /dev/null
@@ -0,0 +1,134 @@
+/*\r
+ * Common header file for both SetHWScoreboard and SetHWScoreboard_MBAFF kernels\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+#if !defined(__SETHWSCOREBOARD_HEADER__)       // Make sure this file is only included once\r
+#define __SETHWSCOREBOARD_HEADER__\r
+\r
+// Module name: SetHWScoreboard_header.inc\r
+//\r
+// Common header file for both SetHWScoreboard and SetHWScoreboard_MBAFF kernels\r
+//\r
+\r
+#define BI_CMD_BUF                     0       // Binding table index for command buffer\r
+\r
+// GRF r1 map\r
+//\r
+// For use by setting HW scoreboard kernel for MBAFF picture\r
+//\r
+// CURBE data\r
+#define        TOP_FM_MASK1_D  r1.0            // Bit mask for first half of top frame MB SB mask\r
+#define        TOP_FM_MASK1_B  r1.0            // Bit mask for first half of top frame MB SB mask\r
+#define        TOP_FM_MASK2_D  r1.1            // Bit mask for second half of top frame MB SB mask\r
+#define        TOP_FM_MASK2_B  r1.4            // Bit mask for second half of top frame MB SB mask\r
+#define        BOT_FM_MASK1_D  r1.2            // Bit mask for first half of bottom frame MB SB mask\r
+#define        BOT_FM_MASK1_B  r1.8            // Bit mask for first half of bottom frame MB SB mask\r
+#define        BOT_FM_MASK2_D  r1.3            // Bit mask for second half of bottom frame MB SB mask\r
+#define        BOT_FM_MASK2_B  r1.12           // Bit mask for second half of bottom frame MB SB mask\r
+#define        TOP_FD_MASK1_D  r1.4            // Bit mask for first half of top field MB SB mask\r
+#define        TOP_FD_MASK1_B  r1.16           // Bit mask for first half of top field MB SB mask\r
+#define        TOP_FD_MASK2_D  r1.5            // Bit mask for second half of top field MB SB mask\r
+#define        TOP_FD_MASK2_B  r1.20           // Bit mask for second half of top field MB SB mask\r
+#define        BOT_FD_MASK1_D  r1.6            // Bit mask for first half of bottom field MB SB mask\r
+#define        BOT_FD_MASK1_B  r1.24           // Bit mask for first half of bottom field MB SB mask\r
+#define        BOT_FD_MASK2_D  r1.7            // Bit mask for second half of bottom field MB SB mask\r
+#define        BOT_FD_MASK2_B  r1.28           // Bit mask for second half of bottom field MB SB mask\r
+\r
+// For use by setting HW scoreboard kernel for non-MBAFF picture\r
+#define        MB_SHIFT_MASK_W         r1.0    // :w type. Shift values for two parts of the MB SB mask\r
+#define        MB_SHIFT_MASK_B         r1.0    // :b type. Shift values for two parts of the MB SB mask\r
+\r
+// GRF r2 map\r
+//\r
+// In-line data\r
+//\r
+#define        INLINE_REG_OFFSET       1\r
+#define INLINE_REG                     r2\r
+\r
+#define StartingMB     INLINE_REG.0    // :uw type. Starting MB number\r
+#define TotalMB                INLINE_REG.1    // :uw type. Total number of MB to be processed\r
+#define PicWidthMB     INLINE_REG.2    // :uw type. Picture width in MB\r
+\r
+// GRF r3 map\r
+//\r
+// Temporary variables\r
+//\r
+#define RemainderMB    r3.0                    // :uw type. Remainder of MB (<16) to be processed\r
+\r
+#define        TEMP_FD_X_W     r3.2                    // :w type. Temporary variable for field MB X origin in MBAFF picture\r
+#define        TEMP_FD_X_B     r3.4                    // :b type. Temporary variable for field MB X origin in MBAFF picture\r
+#define        TEMP_FD_Y_W     r3.3                    // :w type. Temporary variable for field MB Y origin in MBAFF picture\r
+#define        TEMP_FD_Y_B     r3.6                    // :b type. Temporary variable for field MB Y origin in MBAFF picture\r
+\r
+#define        TEMP_INTRA_FLAG_W       r3.4    // :uw type. Temporary intra available flag\r
+\r
+#define MB_MASK_D      r3.4                    // :ud type. Bit masks for MBAFF MB\r
+#define MB_MASK_B      r3.16                   // :ub type. Bit masks for MBAFF MB\r
+\r
+#define        MRF_READ_HEADER_SRC     r63\r
+\r
+// MEDIA_OBJECT_EX Command map\r
+//\r
+// In DW1 of each MEDIA_OBJECT_EX command (VFE DWORD)\r
+#define        CUR_X   0       // Byte 0\r
+#define        CUR_Y   0       // Byte 2\r
+\r
+// In DW2 of each MEDIA_OBJECT_EX command\r
+#define        USE_SCOREBOARD  BIT21\r
+\r
+// In DW4 of each MEDIA_OBJECT_EX command\r
+#define        F_FLAG          BIT4\r
+#define        IS_INTRA_MB     BIT13\r
+#define IS_FIELD_MB    BIT14\r
+#define IS_INTRA8X8    BIT15\r
+#define IS_BOT_FD      BIT24\r
+\r
+// In DW7 of each MEDIA_OBJECT_EX command\r
+#define        A_FLAG          BIT0\r
+#define        B_FLAG          BIT1\r
+#define        C_FLAG          BIT2\r
+#define        D_FLAG          BIT3\r
+#define        E_FLAG          BIT4\r
+\r
+#define        CMD_BUFFER_REG_OFF              4\r
+.declare    CMD_BUFFER_D               Base=GRF(4) ElementSize=4 SrcRegion=REGION(8,1) Type=ud         // Command buffer (32 GRF)\r
+.declare    CMD_BUFFER_W               Base=GRF(4) ElementSize=2 SrcRegion=REGION(16,1) Type=uw        // Command buffer (32 GRF)\r
+.declare    CMD_BUFFER_B               Base=GRF(4) ElementSize=1 SrcRegion=REGION(16,1) Type=ub        // Command buffer (32 GRF)\r
+\r
+#define        MRF_READ_HEADER         m1\r
+#define        MRF_READ_HEADER0        m1\r
+#define        MRF_READ_HEADER1        m2\r
+#define        MRF_READ_HEADER2        m3\r
+#define        MRF_READ_HEADER3        m4\r
+\r
+#define        MRF_INTER_WRITE_HEADER  m5\r
+#define        MRF_INTER_WRITE_DATA0   m6\r
+#define        MRF_INTER_WRITE_DATA1   m7\r
+\r
+#define        MRF_WRITE_HEADER                m11\r
+#define        MRF_INTRA_WRITE_HEADER  m11\r
+\r
+#define MRF_CMD_BUF_REG_OFF            12\r
+.declare       MRF_CMD_BUF_D           Base=m12 ElementSize=4 SrcRegion=REGION(8,1) Type=ud            // Command buffer stored in MRF\r
+.declare       MRF_CMD_BUF_W           Base=m12 ElementSize=2 SrcRegion=REGION(16,1) Type=uw           // Command buffer stored in MRF\r
+.declare       MRF_CMD_BUF_B           Base=m12 ElementSize=1 SrcRegion=REGION(16,1) Type=ub           // Command buffer stored in MRF\r
+\r
+#define        BI_CMD_BUFFER   0\r
+\r
+#define OWBRMSGDSC_SC   0x02088000     // OWORD Block Read Message Descriptor, reading from sampler cache = A.\r
+#define OWBWMSGDSC              0x02080000     // OWORD Block Write Message Descriptor\r
+\r
+#define OWORD_1        0x000\r
+#define OWORD_2        0x200\r
+#define OWORD_4        0x300\r
+#define OWORD_8        0x400\r
+\r
+// End of SETHWSCOREBOARD_HEADER\r
+\r
+#endif // !defined(__SETHWSCOREBOARD_HEADER__)\r
+\r
diff --git a/i965_drv_video/shaders/h264/mc/SetupForHWMC.asm b/i965_drv_video/shaders/h264/mc/SetupForHWMC.asm
new file mode 100644 (file)
index 0000000..b6dc595
--- /dev/null
@@ -0,0 +1,33 @@
+/*\r
+ * Initial setup for running HWMC kernels in HWMC-Only decoding mode\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Kernel name: SetupForHWMC.asm\r
+//\r
+// Initial setup for running HWMC kernels in HWMC-Only decoding mode\r
+//\r
+#include "header.inc"\r
+#include "intra_Header.inc"\r
+\r
+#if !defined(__SETUPFORHWMC__) // Make sure the following are only included once\r
+#define __SETUPFORHWMC__\r
+\r
+.reg_count_total    64\r
+.reg_count_payload  2\r
+\r
+//\r
+//  Now, begin source code....\r
+//\r
+\r
+.code\r
+#endif // !defined(__SETUPFORHWMC__)\r
+\r
+    mov (8)    MSGSRC<1>:ud    r0.0<8;8,1>:ud                  // Initialize message header payload with R0\r
+       shl (2) I_ORIX<1>:uw    ORIX<2;2,1>:ub  4:w             // Convert MB origin to pixel unit\r
+\r
+// End of SetupForHWMC\r
diff --git a/i965_drv_video/shaders/h264/mc/add_Error_16x16_Y.asm b/i965_drv_video/shaders/h264/mc/add_Error_16x16_Y.asm
new file mode 100644 (file)
index 0000000..df01b99
--- /dev/null
@@ -0,0 +1,51 @@
+/*\r
+ * Add macroblock correction Y data blocks to predicted picture\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+        \r
+// Module name: add_Error_16x16_Y.asm\r
+//\r
+// Add macroblock correction Y data blocks to predicted picture\r
+//\r
+\r
+//  Every line of predicted Y data is added to Y error data if CBP bit is set\r
+\r
+    mov (1) PERROR_UD<1>:ud    0x10001*ERRBUF*GRFWIB+0x00100000:ud     // Pointers to first and second row of error block\r
+\r
+    and.z.f0.1 (1)     NULLREG    REG_CBPCY    CBP_Y_MASK\r
+    (f0.1) jmpi (1) End_add_Error_16x16_Y      // Skip all blocks\r
+\r
+//  Block Y0\r
+//\r
+    $for(0,0; <8; 2,1) {\r
+       add.sat (16)    DEC_Y(%1)<2>    r[PERROR,%2*GRFWIB]REGION(8,1):w        PRED_Y(%1)REGION(8,2) {Compr}\r
+    }\r
+\r
+//  Block Y1\r
+//\r
+    $for(0,0; <8; 2,1) {\r
+       add.sat (16)    DEC_Y(%1,16)<2>         r[PERROR,%2*GRFWIB+0x80]REGION(8,1):w   PRED_Y(%1,16)REGION(8,2) {Compr}\r
+    }\r
+\r
+//  Block Y2\r
+//\r
+    $for(8,0; <16; 2,1) {\r
+       add.sat (16)    DEC_Y(%1)<2>    r[PERROR,%2*GRFWIB+0x100]REGION(8,1):w  PRED_Y(%1)REGION(8,2) {Compr}\r
+    }\r
+\r
+//  Block Y3\r
+//\r
+    $for(8,0; <16; 2,1) {\r
+       add.sat (16)    DEC_Y(%1,16)<2>         r[PERROR,%2*GRFWIB+0x180]REGION(8,1):w  PRED_Y(%1,16)REGION(8,2) {Compr}\r
+    }\r
+\r
+End_add_Error_16x16_Y:\r
+    add (1) PERROR_UD<1>:ud    PERROR_UD:ud    0x01800180:ud   // Pointers to Y3 error block\r
+\r
+//  End of add_Error_16x16_Y\r
+\r
diff --git a/i965_drv_video/shaders/h264/mc/add_Error_UV.asm b/i965_drv_video/shaders/h264/mc/add_Error_UV.asm
new file mode 100644 (file)
index 0000000..e2c0dea
--- /dev/null
@@ -0,0 +1,38 @@
+/*\r
+ * Add macroblock correction UV data blocks to predicted picture        \r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+\r
+#if !defined(__ADD_ERROR_UV__)         // Make sure this is only included once\r
+#define __ADD_ERROR_UV__\r
+\r
+// Module name: add_Error_UV.asm\r
+//\r
+// Add macroblock correction UV data blocks to predicted picture\r
+\r
+// PERROR points to error block Y3 after decoding Y component\r
+\r
+//     Update address register used in instruction compression\r
+//\r
+\r
+//  U component\r
+//\r
+    add (1) PERROR1<1>:w       PERROR:w        0x00010:w       // Pointers to next error row\r
+    $for(0,0; <8; 2,1) {\r
+       add.sat (16)    DEC_UV(%1)<4>   r[PERROR,%2*GRFWIB+0x80]REGION(8,1):w   PRED_UV(%1)REGION(8,4) {Compr}\r
+    }\r
+\r
+//  V component\r
+//\r
+    $for(0,0; <8; 2,1) {\r
+       add.sat (16)    DEC_UV(%1,2)<4> r[PERROR,%2*GRFWIB+0x100]REGION(8,1):w  PRED_UV(%1,2)REGION(8,4) {Compr}\r
+    }\r
+\r
+//  End of add_Error_UV\r
+\r
+#endif // !defined(__ADD_ERROR_UV__)\r
diff --git a/i965_drv_video/shaders/h264/mc/avc_mc.g4b b/i965_drv_video/shaders/h264/mc/avc_mc.g4b
new file mode 100644 (file)
index 0000000..5a91f32
--- /dev/null
@@ -0,0 +1,2938 @@
+   { 0x00600001, 0x27c00021, 0x008d0000, 0x00000000 },
+   { 0x00200009, 0x27fa3e29, 0x00450064, 0x00040004 },
+   { 0x00000040, 0x27e01c01, 0x00001400, 0x00000020 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000000d4 },
+   { 0x00000005, 0x220e3e2c, 0x00000070, 0x000f000f },
+   { 0x00000001, 0x26a00221, 0x00009c38, 0x00000000 },
+   { 0x00000220, 0x34001400, 0x00001400, 0x000006a0 },
+   { 0x00a02001, 0x24000229, 0x00b10624, 0x00000000 },
+   { 0x00a02001, 0x24400229, 0x00b10624, 0x00000000 },
+   { 0x00a02001, 0x24800229, 0x00b10624, 0x00000000 },
+   { 0x00a02001, 0x24c00229, 0x00b10624, 0x00000000 },
+   { 0x00a02001, 0x25000229, 0x00b10624, 0x00000000 },
+   { 0x00a02001, 0x25400229, 0x00b10624, 0x00000000 },
+   { 0x00a02001, 0x25800229, 0x00b10624, 0x00000000 },
+   { 0x00a02001, 0x25c00229, 0x00b10624, 0x00000000 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000086 },
+   { 0x00000001, 0x22080060, 0x00000000, 0x06440640 },
+   { 0x00a02001, 0x24000229, 0x00009003, 0x00000000 },
+   { 0x00a02001, 0x24400229, 0x0000900b, 0x00000000 },
+   { 0x00a02001, 0x24800229, 0x00009013, 0x00000000 },
+   { 0x00a02001, 0x24c00229, 0x0000901b, 0x00000000 },
+   { 0x00a02001, 0x25000229, 0x00009023, 0x00000000 },
+   { 0x00a02001, 0x25400229, 0x0000902b, 0x00000000 },
+   { 0x00a02001, 0x25800229, 0x00009033, 0x00000000 },
+   { 0x00a02001, 0x25c00229, 0x0000903b, 0x00000000 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000072 },
+   { 0x02600005, 0x20000c20, 0x0000006c, 0x00000002 },
+   { 0x00600005, 0x24000c20, 0x0000006c, 0x00000011 },
+   { 0x01600007, 0x20000c00, 0x028d0400, 0x00000011 },
+   { 0x00780001, 0x26240169, 0x00000000, 0x80808080 },
+   { 0x00780001, 0x66430231, 0x028d0624, 0x00000000 },
+   { 0x00780001, 0x66630231, 0x028d062c, 0x00000000 },
+   { 0x00780001, 0x26240231, 0x00cf0643, 0x00000000 },
+   { 0x00780001, 0x262c0231, 0x00cf0663, 0x00000000 },
+   { 0x00800040, 0x25e04629, 0x00cf0643, 0x00b10624 },
+   { 0x00600040, 0x25e02529, 0x008d05e0, 0x008d05f0 },
+   { 0x00400040, 0x25e02529, 0x006905e0, 0x006905e8 },
+   { 0x00200040, 0x25e02529, 0x004505e0, 0x004505e4 },
+   { 0x00a02040, 0x2400252c, 0x000005e0, 0x000005e2 },
+   { 0x00a02040, 0x24003d8c, 0x00b10400, 0x00100010 },
+   { 0x00a02008, 0x24003d89, 0x00b10400, 0x00050005 },
+   { 0x00a02008, 0x24403d89, 0x00b10400, 0x00050005 },
+   { 0x00a02008, 0x24803d89, 0x00b10400, 0x00050005 },
+   { 0x00a02008, 0x24c03d89, 0x00b10400, 0x00050005 },
+   { 0x00a02008, 0x25003d89, 0x00b10400, 0x00050005 },
+   { 0x00a02008, 0x25403d89, 0x00b10400, 0x00050005 },
+   { 0x00a02008, 0x25803d89, 0x00b10400, 0x00050005 },
+   { 0x00a02008, 0x25c03d89, 0x00b10400, 0x00050005 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000044 },
+   { 0x00600041, 0x26806e2d, 0x008d062c, 0x89abcdef },
+   { 0x00600041, 0x26906e2d, 0x008d0623, 0xfedcba98 },
+   { 0x00600041, 0x26a06e2d, 0x00cf0663, 0x89abcdef },
+   { 0x00600041, 0x26b06e2d, 0x00cf0643, 0x0fedcba9 },
+   { 0x00000041, 0x26be3e2d, 0x00000623, 0xfff8fff8 },
+   { 0x00802040, 0x268035ad, 0x008d4680, 0x008d0690 },
+   { 0x00802040, 0x268035ad, 0x008d0680, 0x008d0688 },
+   { 0x00802040, 0x268035ad, 0x008d0680, 0x008d0684 },
+   { 0x00802040, 0x268035ad, 0x008d0680, 0x008d0682 },
+   { 0x00800001, 0x240001ec, 0x00000000, 0x00200020 },
+   { 0x00200048, 0x24003dac, 0x00a00680, 0x00050005 },
+   { 0x00200008, 0x26e03d8d, 0x00450400, 0x00060006 },
+   { 0x00800001, 0x240001ec, 0x00000000, 0x00100010 },
+   { 0x00800048, 0x24003e2c, 0x00000633, 0x00100010 },
+   { 0x00800048, 0x26c03e2d, 0x0000067f, 0x00100010 },
+   { 0x00800048, 0x272055ad, 0x000006e0, 0x00b10040 },
+   { 0x00600041, 0x268055ad, 0x000006e2, 0x00ae0040 },
+   { 0x00600041, 0x26a055ad, 0x000006e2, 0x00ae0041 },
+   { 0x00a02040, 0x240035ac, 0x00b10720, 0x00b00680 },
+   { 0x80a02008, 0x44003d91, 0x00b10400, 0x00050005 },
+   { 0x00a02040, 0x240035ac, 0x00b10720, 0x00b00682 },
+   { 0x80a02008, 0x44403d91, 0x00b10400, 0x00050005 },
+   { 0x00a02040, 0x240035ac, 0x00b10720, 0x00b00684 },
+   { 0x80a02008, 0x44803d91, 0x00b10400, 0x00050005 },
+   { 0x00a02040, 0x240035ac, 0x00b10720, 0x00b00686 },
+   { 0x80a02008, 0x44c03d91, 0x00b10400, 0x00050005 },
+   { 0x00a02040, 0x240035ac, 0x00b10720, 0x00b00688 },
+   { 0x80a02008, 0x45003d91, 0x00b10400, 0x00050005 },
+   { 0x00a02040, 0x240035ac, 0x00b10720, 0x00b0068a },
+   { 0x80a02008, 0x45403d91, 0x00b10400, 0x00050005 },
+   { 0x00a02040, 0x240035ac, 0x00b10720, 0x00b0068c },
+   { 0x80a02008, 0x45803d91, 0x00b10400, 0x00050005 },
+   { 0x00a02040, 0x240035ac, 0x00b10720, 0x00b0068e },
+   { 0x80a02008, 0x45c03d91, 0x00b10400, 0x00050005 },
+   { 0x00000001, 0x22040060, 0x00000000, 0x00900080 },
+   { 0x01000005, 0x20000c20, 0x02000068, 0x00003c00 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000020 },
+   { 0x80802040, 0x440045b1, 0x008d8800, 0x00ae0400 },
+   { 0x80802040, 0x444045b1, 0x008d8820, 0x00ae0440 },
+   { 0x80802040, 0x448045b1, 0x008d8840, 0x00ae0480 },
+   { 0x80802040, 0x44c045b1, 0x008d8860, 0x00ae04c0 },
+   { 0x80802040, 0x441045b1, 0x008d8880, 0x00ae0410 },
+   { 0x80802040, 0x445045b1, 0x008d88a0, 0x00ae0450 },
+   { 0x80802040, 0x449045b1, 0x008d88c0, 0x00ae0490 },
+   { 0x80802040, 0x44d045b1, 0x008d88e0, 0x00ae04d0 },
+   { 0x80802040, 0x450045b1, 0x008d8900, 0x00ae0500 },
+   { 0x80802040, 0x454045b1, 0x008d8920, 0x00ae0540 },
+   { 0x80802040, 0x458045b1, 0x008d8940, 0x00ae0580 },
+   { 0x80802040, 0x45c045b1, 0x008d8960, 0x00ae05c0 },
+   { 0x80802040, 0x451045b1, 0x008d8980, 0x00ae0510 },
+   { 0x80802040, 0x455045b1, 0x008d89a0, 0x00ae0550 },
+   { 0x80802040, 0x459045b1, 0x008d89c0, 0x00ae0590 },
+   { 0x80802040, 0x45d045b1, 0x008d89e0, 0x00ae05d0 },
+   { 0x00000040, 0x22040c00, 0x00000204, 0x01800180 },
+   { 0x00000040, 0x27e01c01, 0x00001400, 0x00000020 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000000e2 },
+   { 0x00000040, 0x27e01c01, 0x00001400, 0x00000020 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000018 },
+   { 0x00000001, 0x27800021, 0x00000780, 0x00000000 },
+   { 0x00000001, 0x27a00021, 0x000007a0, 0x00000000 },
+   { 0x01600031, 0x20001c20, 0x708d0000, 0x82000010 },
+   { 0x00200440, 0x27c055a5, 0x004507fa, 0x0045002a },
+   { 0x00000801, 0x27c80061, 0x00000000, 0x0000001b },
+   { 0x00000040, 0x22000d20, 0x00000062, 0x02186000 },
+   { 0x01600031, 0x26200021, 0x408d07c0, 0x00000200 },
+   { 0x00000440, 0x27c43ca5, 0x000007c4, 0x00010001 },
+   { 0x00000801, 0x27c80061, 0x00000000, 0x000f0003 },
+   { 0x00000040, 0x22000c00, 0x00000200, 0x00100000 },
+   { 0x02600031, 0x26400021, 0x408d07c0, 0x00000200 },
+   { 0x00000001, 0x34000020, 0x000007e0, 0x00000000 },
+   { 0x00000008, 0x27fc3dad, 0x000007fc, 0x00010001 },
+   { 0x00200440, 0x27c055a5, 0x004507fa, 0x0045002a },
+   { 0x00000801, 0x27c80061, 0x00000000, 0x00000013 },
+   { 0x00000040, 0x22000c00, 0x00000200, 0xefffc001 },
+   { 0x01600031, 0x26200021, 0x408d07c0, 0x00000200 },
+   { 0x00000440, 0x27c43ca5, 0x000007c4, 0x00010001 },
+   { 0x00000801, 0x27c80061, 0x00000000, 0x00070003 },
+   { 0x05600031, 0x26400021, 0x408d07c0, 0x00000200 },
+   { 0x00000008, 0x220e3e2c, 0x0000006c, 0x00060006 },
+   { 0x00000001, 0x26a002a5, 0x00009c3c, 0x00000000 },
+   { 0x00000220, 0x34001400, 0x00001400, 0x000006a0 },
+   { 0x02600005, 0x20000c20, 0x0000006c, 0x00000002 },
+   { 0x00780001, 0x26240169, 0x00000000, 0x80808080 },
+   { 0x02400005, 0x20000c20, 0x0200006c, 0x00000001 },
+   { 0x00560001, 0x46420129, 0x02690624, 0x00000000 },
+   { 0x02400005, 0x20000c20, 0x0200006c, 0x00000010 },
+   { 0x00560001, 0x46520129, 0x0269062c, 0x00000000 },
+   { 0x00780001, 0x26240129, 0x00ae0642, 0x00000000 },
+   { 0x00800040, 0x24004629, 0x00b10624, 0x00650642 },
+   { 0x00600040, 0x24002529, 0x00650400, 0x00650404 },
+   { 0x00600040, 0x25202529, 0x00050400, 0x00050404 },
+   { 0x00600040, 0x25702529, 0x00050408, 0x0005040c },
+   { 0x00560001, 0x26240169, 0x00000000, 0x80808080 },
+   { 0x00460001, 0x26240129, 0x028a0652, 0x00000000 },
+   { 0x00560001, 0x46520129, 0x02690624, 0x00000000 },
+   { 0x02400005, 0x20000c20, 0x0200006c, 0x00000001 },
+   { 0x00560001, 0x46420169, 0x02000000, 0x80808080 },
+   { 0x00460001, 0x46420129, 0x0069062c, 0x00000000 },
+   { 0x00560001, 0x262c0129, 0x008a0642, 0x00000000 },
+   { 0x00600040, 0x24004629, 0x008d0624, 0x00650652 },
+   { 0x00600040, 0x24104629, 0x00650642, 0x008d062c },
+   { 0x00600040, 0x24002529, 0x00650400, 0x00650404 },
+   { 0x00600040, 0x25302529, 0x00050408, 0x0005040c },
+   { 0x00600040, 0x25602529, 0x00050400, 0x00050404 },
+   { 0x00a02040, 0x24003d2c, 0x00b10520, 0x00040004 },
+   { 0x00a02008, 0x24003d89, 0x00b10400, 0x00030003 },
+   { 0x00a02008, 0x24403d89, 0x00b10400, 0x00030003 },
+   { 0x00a02040, 0x24003d2c, 0x00b10560, 0x00040004 },
+   { 0x00a02008, 0x24803d89, 0x00b10400, 0x00030003 },
+   { 0x00a02008, 0x24c03d89, 0x00b10400, 0x00030003 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000048 },
+   { 0x00000001, 0x22080060, 0x00000000, 0x06440640 },
+   { 0x00a02001, 0x24000229, 0x00059002, 0x00000000 },
+   { 0x00a02001, 0x24400229, 0x0005900a, 0x00000000 },
+   { 0x00a02001, 0x24800229, 0x00059012, 0x00000000 },
+   { 0x00a02001, 0x24c00229, 0x0005901a, 0x00000000 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000003c },
+   { 0x00a02001, 0x24000229, 0x00b10624, 0x00000000 },
+   { 0x00a02001, 0x24400229, 0x00b10624, 0x00000000 },
+   { 0x00a02001, 0x24800229, 0x00b10624, 0x00000000 },
+   { 0x00a02001, 0x24c00229, 0x00b10624, 0x00000000 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000032 },
+   { 0x00600041, 0x26806e2d, 0x008d062c, 0x44332211 },
+   { 0x00600041, 0x26906e2d, 0x008d0622, 0xffeeddcc },
+   { 0x00600041, 0x26a06e2d, 0x00650652, 0x44332211 },
+   { 0x00600041, 0x26b06e2d, 0x00650642, 0x00ffeedd },
+   { 0x00200041, 0x26bc3e2d, 0x00450622, 0xfffcfffc },
+   { 0x00802040, 0x268035ad, 0x008d0680, 0x008d0690 },
+   { 0x00802040, 0x268035ad, 0x008d0680, 0x008d0688 },
+   { 0x00802040, 0x268035ad, 0x008d0680, 0x008d0684 },
+   { 0x00800001, 0x240001ec, 0x00000000, 0x00200020 },
+   { 0x00400048, 0x24003dac, 0x00a50680, 0x00220022 },
+   { 0x00400008, 0x26e03d8d, 0x00690400, 0x00060006 },
+   { 0x00800001, 0x240001ec, 0x00000000, 0x00100010 },
+   { 0x00800048, 0x24003e2c, 0x00050632, 0x00100010 },
+   { 0x00800048, 0x26c03e2d, 0x0005065e, 0x00100010 },
+   { 0x00800048, 0x272055ad, 0x000506e0, 0x00240044 },
+   { 0x00600041, 0x268055ad, 0x000506e4, 0x00440044 },
+   { 0x00600041, 0x26a055ad, 0x000506e4, 0x00440045 },
+   { 0x00a02040, 0x240035ac, 0x00b10720, 0x00050680 },
+   { 0x80a02008, 0x44003d91, 0x00b10400, 0x00050005 },
+   { 0x00a02040, 0x240035ac, 0x00b10720, 0x00050684 },
+   { 0x80a02008, 0x44403d91, 0x00b10400, 0x00050005 },
+   { 0x00a02040, 0x240035ac, 0x00b10720, 0x00050688 },
+   { 0x80a02008, 0x44803d91, 0x00b10400, 0x00050005 },
+   { 0x00a02040, 0x240035ac, 0x00b10720, 0x0005068c },
+   { 0x80a02008, 0x44c03d91, 0x00b10400, 0x00050005 },
+   { 0x00000040, 0x22063d8c, 0x00000204, 0x00100010 },
+   { 0x80802040, 0x640045b1, 0x008d8880, 0x00cf0400 },
+   { 0x80802040, 0x644045b1, 0x008d88a0, 0x00cf0440 },
+   { 0x80802040, 0x648045b1, 0x008d88c0, 0x00cf0480 },
+   { 0x80802040, 0x64c045b1, 0x008d88e0, 0x00cf04c0 },
+   { 0x80802040, 0x640245b1, 0x008d8900, 0x00cf0402 },
+   { 0x80802040, 0x644245b1, 0x008d8920, 0x00cf0442 },
+   { 0x80802040, 0x648245b1, 0x008d8940, 0x00cf0482 },
+   { 0x80802040, 0x64c245b1, 0x008d8960, 0x00cf04c2 },
+   { 0x00000401, 0x27c80061, 0x00000000, 0x0007000f },
+   { 0x00200801, 0x27c001a1, 0x004507fa, 0x00000000 },
+   { 0x00000040, 0x22000c00, 0x00000200, 0x08004000 },
+   { 0x00800001, 0x20400232, 0x00d20400, 0x00000000 },
+   { 0x00800001, 0x20500232, 0x00d20420, 0x00000000 },
+   { 0x00800001, 0x20600232, 0x00d20440, 0x00000000 },
+   { 0x00800001, 0x20700232, 0x00d20460, 0x00000000 },
+   { 0x00800001, 0x20800232, 0x00d20480, 0x00000000 },
+   { 0x00800001, 0x20900232, 0x00d204a0, 0x00000000 },
+   { 0x00800001, 0x20a00232, 0x00d204c0, 0x00000000 },
+   { 0x00800001, 0x20b00232, 0x00d204e0, 0x00000000 },
+   { 0x01600031, 0x27a00021, 0x508d07c0, 0x00000200 },
+   { 0x00000001, 0x34000020, 0x000007e0, 0x00000000 },
+   { 0x00000401, 0x27c80061, 0x00000000, 0x000f000f },
+   { 0x00200801, 0x27c001a1, 0x004507fa, 0x00000000 },
+   { 0x00000040, 0x22000c00, 0x00000200, 0x0ff04000 },
+   { 0x00000001, 0x22080060, 0x00000000, 0x04400400 },
+   { 0x00a02001, 0x20400232, 0x00d29000, 0x00000000 },
+   { 0x00a02001, 0x20500232, 0x00d29020, 0x00000000 },
+   { 0x00a02001, 0x20800232, 0x00d29080, 0x00000000 },
+   { 0x00a02001, 0x20900232, 0x00d290a0, 0x00000000 },
+   { 0x00a02001, 0x20c00232, 0x00d29100, 0x00000000 },
+   { 0x00a02001, 0x20d00232, 0x00d29120, 0x00000000 },
+   { 0x00a02001, 0x21000232, 0x00d29180, 0x00000000 },
+   { 0x00a02001, 0x21100232, 0x00d291a0, 0x00000000 },
+   { 0x01600031, 0x27800021, 0x508d07c0, 0x00000200 },
+   { 0x00000001, 0x34000020, 0x000007e0, 0x00000000 },
+   { 0x00600001, 0x27c00021, 0x008d0000, 0x00000000 },
+   { 0x00200009, 0x27fa3e29, 0x00450064, 0x00040004 },
+   { 0x00000040, 0x27e01c01, 0x00001400, 0x00000020 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0xffffff04 },
+   { 0x00000001, 0x220401ec, 0x00000000, 0x00800080 },
+   { 0x00000001, 0x22080060, 0x00000000, 0x04100400 },
+   { 0x00200008, 0x27006e2d, 0x0000006c, 0x00000040 },
+   { 0x00400001, 0x27400061, 0x00000000, 0x00000000 },
+   { 0x00800001, 0x272001a9, 0x00b10620, 0x00000000 },
+   { 0x02600005, 0x20001c20, 0x0000006c, 0x00000008 },
+   { 0x00110001, 0x27230231, 0x00000624, 0x00000000 },
+   { 0x00600001, 0x27420231, 0x00cf0643, 0x00000000 },
+   { 0x00110001, 0x27410231, 0x00000643, 0x00000000 },
+   { 0x00240001, 0x27400231, 0x00000623, 0x00000000 },
+   { 0x02000005, 0x20001c20, 0x0200006c, 0x00000002 },
+   { 0x00010001, 0x27400231, 0x02000624, 0x00000000 },
+   { 0x02000005, 0x20001c20, 0x0200006c, 0x00000001 },
+   { 0x00110001, 0x27420231, 0x02000623, 0x00000000 },
+   { 0x00000005, 0x26803e2d, 0x00000070, 0x000f000f },
+   { 0x00000001, 0x270801ad, 0x00000700, 0x00000000 },
+   { 0x00000040, 0x27e01c01, 0x00001400, 0x00000020 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000084 },
+   { 0x00000040, 0x22043d8c, 0x00000204, 0x00800080 },
+   { 0x00800001, 0x27200231, 0x00b10628, 0x00000000 },
+   { 0x02600005, 0x20001c20, 0x0200006c, 0x00000004 },
+   { 0x00680001, 0x272c0231, 0x028d0634, 0x00000000 },
+   { 0x00780001, 0x272c0231, 0x02000633, 0x00000000 },
+   { 0x00400001, 0x27420231, 0x00a0040e, 0x00000000 },
+   { 0x00400001, 0x27460231, 0x00a0044e, 0x00000000 },
+   { 0x00200001, 0x27400231, 0x0000062b, 0x00000000 },
+   { 0x02000005, 0x20001c20, 0x0200006c, 0x00000002 },
+   { 0x00010001, 0x27400231, 0x0200062c, 0x00000000 },
+   { 0x00110001, 0x27410231, 0x0200040e, 0x00000000 },
+   { 0x00000008, 0x26803e2d, 0x00000070, 0x00040004 },
+   { 0x00200040, 0x22083d8c, 0x00450208, 0x00800080 },
+   { 0x00000006, 0x27083dad, 0x00000700, 0x00010001 },
+   { 0x00000040, 0x27e01c01, 0x00001400, 0x00000020 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000064 },
+   { 0x00000040, 0x22043d8c, 0x00000204, 0x00800080 },
+   { 0x00a02001, 0xb3800231, 0x00d20400, 0x00000000 },
+   { 0x00a02001, 0xb3a00231, 0x00d20480, 0x00000000 },
+   { 0x00a02001, 0xb3c00231, 0x00d20440, 0x00000000 },
+   { 0x00a02001, 0xb3e00231, 0x00d204c0, 0x00000000 },
+   { 0x02000005, 0x20001c20, 0x00000060, 0x00000002 },
+   { 0x01000005, 0x20001c20, 0x0200006c, 0x00000001 },
+   { 0x01010005, 0x20001c20, 0x00000060, 0x00004000 },
+   { 0x02010005, 0x20001c20, 0x02000060, 0x00000010 },
+   { 0x00030001, 0x27230231, 0x0000065f, 0x00000000 },
+   { 0x00030220, 0x34001c00, 0x00001400, 0x00000006 },
+   { 0x02600005, 0x20001c20, 0x0000006c, 0x00000001 },
+   { 0x00460001, 0x27200231, 0x0069065c, 0x00000000 },
+   { 0x00110001, 0x27230231, 0x00000458, 0x00000000 },
+   { 0x00600001, 0x27240231, 0x008d0458, 0x00000000 },
+   { 0x00600001, 0x272c0231, 0x008d0478, 0x00000000 },
+   { 0x00600001, 0x27420231, 0x00cf0663, 0x00000000 },
+   { 0x00000001, 0x27400231, 0x00000458, 0x00000000 },
+   { 0x00010001, 0x27410231, 0x0000065f, 0x00000000 },
+   { 0x00110001, 0x27410231, 0x00000663, 0x00000000 },
+   { 0x02000005, 0x20001c20, 0x0200006c, 0x00000010 },
+   { 0x00110001, 0x27420231, 0x0200065f, 0x00000000 },
+   { 0x00000005, 0x26803e2d, 0x00000071, 0x000f000f },
+   { 0x00000006, 0x27083dad, 0x00000702, 0x00020002 },
+   { 0x00000040, 0x27e01c01, 0x00001400, 0x00000020 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000030 },
+   { 0x00000040, 0x22043d8c, 0x00000204, 0x00800080 },
+   { 0x00400001, 0x27200231, 0x0069045c, 0x00000000 },
+   { 0x00600001, 0x27240231, 0x008d0478, 0x00000000 },
+   { 0x00800001, 0x272c0231, 0x0000047f, 0x00000000 },
+   { 0x00400001, 0x27420231, 0x00a0048e, 0x00000000 },
+   { 0x00400001, 0x27460231, 0x00a004ce, 0x00000000 },
+   { 0x00000001, 0x27400231, 0x00000478, 0x00000000 },
+   { 0x00000001, 0x27410231, 0x0000045f, 0x00000000 },
+   { 0x00000008, 0x26803e2d, 0x00000071, 0x00040004 },
+   { 0x00200040, 0x22083d8c, 0x00450208, 0x00800080 },
+   { 0x00000006, 0x27083dad, 0x00000700, 0x00030003 },
+   { 0x00000040, 0x27e01c01, 0x00001400, 0x00000020 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000016 },
+   { 0x00a02001, 0xb3800231, 0x00d20480, 0x00000000 },
+   { 0x00a02001, 0xb3a00231, 0x00d20500, 0x00000000 },
+   { 0x00a02001, 0xb3c00231, 0x00d204c0, 0x00000000 },
+   { 0x00a02001, 0xb3e00231, 0x00d20540, 0x00000000 },
+   { 0x00000040, 0x27e01c01, 0x00001400, 0x00000020 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000100 },
+   { 0x00000040, 0x27e01c01, 0x00001400, 0x00000020 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0xfffffe74 },
+   { 0x00000001, 0x27800021, 0x00000780, 0x00000000 },
+   { 0x00000001, 0x27a00021, 0x000007a0, 0x00000000 },
+   { 0x01600031, 0x20001c20, 0x708d0000, 0x82000010 },
+   { 0x00000001, 0x27340231, 0x00000733, 0x00000000 },
+   { 0x00600001, 0x274a0231, 0x00000749, 0x00000000 },
+   { 0x00800040, 0x24003e2c, 0x00b10723, 0x00020002 },
+   { 0x00800048, 0x24003e2c, 0x00b10724, 0x00020002 },
+   { 0x00800048, 0x24003e2c, 0x00b10725, 0x00010001 },
+   { 0x00800008, 0x26c03d8d, 0x008d0400, 0x00020002 },
+   { 0x00800040, 0x24003e2c, 0x00b10740, 0x00020002 },
+   { 0x00800048, 0x24003e2c, 0x00b10741, 0x00020002 },
+   { 0x00800048, 0x24003e2c, 0x00b10742, 0x00010001 },
+   { 0x00800008, 0x26a03d8d, 0x008d0400, 0x00020002 },
+   { 0x00800001, 0x27240231, 0x00d206c0, 0x00000000 },
+   { 0x00600001, 0x27400231, 0x00ae06a2, 0x00000000 },
+   { 0x00000001, 0x27230231, 0x000006a0, 0x00000000 },
+   { 0x00000005, 0x220e3dac, 0x00000680, 0x000f000f },
+   { 0x00000001, 0x26a00221, 0x00009c2c, 0x00000000 },
+   { 0x00000220, 0x34001400, 0x00001400, 0x000006a0 },
+   { 0x80800040, 0xd00045b1, 0x00b18800, 0x000d0724 },
+   { 0x80800040, 0xd02045b1, 0x00b18820, 0x000d0724 },
+   { 0x80800040, 0xd04045b1, 0x00b18840, 0x000d0724 },
+   { 0x80800040, 0xd06045b1, 0x00b18860, 0x000d0724 },
+   { 0x00000001, 0x34000020, 0x000007e0, 0x00000000 },
+   { 0x80800040, 0xd00045b1, 0x00b18800, 0x002c0740 },
+   { 0x80800040, 0xd02045b1, 0x00b18820, 0x002c0742 },
+   { 0x80800040, 0xd04045b1, 0x00b18840, 0x002c0744 },
+   { 0x80800040, 0xd06045b1, 0x00b18860, 0x002c0746 },
+   { 0x00000001, 0x34000020, 0x000007e0, 0x00000000 },
+   { 0x02800005, 0x20003da0, 0x00000708, 0x00020002 },
+   { 0x02600005, 0x20003da0, 0x02000708, 0x00010001 },
+   { 0x009a0001, 0x27200169, 0x00000000, 0x80808080 },
+   { 0x00780001, 0x27400231, 0x028d0724, 0x00000000 },
+   { 0x00780001, 0x27240231, 0x008d0740, 0x00000000 },
+   { 0x00600040, 0x25e04629, 0x008d0724, 0x008d0740 },
+   { 0x00400040, 0x25e02529, 0x006905e0, 0x006905e8 },
+   { 0x00200040, 0x25e02529, 0x004505e0, 0x004505e4 },
+   { 0x00800040, 0x2400252c, 0x000005e0, 0x000005e2 },
+   { 0x00800040, 0x24003d8c, 0x008d0400, 0x00080008 },
+   { 0x00800008, 0x26803d8d, 0x008d0400, 0x00040004 },
+   { 0x80800040, 0xd00035b1, 0x00b18800, 0x00b10680 },
+   { 0x80800040, 0xd02035b1, 0x00b18820, 0x00b10680 },
+   { 0x80800040, 0xd04035b1, 0x00b18840, 0x00b10680 },
+   { 0x80800040, 0xd06035b1, 0x00b18860, 0x00b10680 },
+   { 0x00000001, 0x34000020, 0x000007e0, 0x00000000 },
+   { 0x00600001, 0x27340231, 0x008d0733, 0x00000000 },
+   { 0x00800040, 0x24003e2c, 0x00b10726, 0x00020002 },
+   { 0x00800048, 0x24003e2c, 0x00b10725, 0x00020002 },
+   { 0x00800048, 0x24003e2c, 0x00b10724, 0x00010001 },
+   { 0x00800008, 0x26803d8d, 0x00b10400, 0x00020002 },
+   { 0x80800040, 0xd00035b1, 0x00b18800, 0x002d0680 },
+   { 0x80800040, 0xd02035b1, 0x00b18820, 0x002d0684 },
+   { 0x80800040, 0xd04035b1, 0x00b18840, 0x002d0688 },
+   { 0x80800040, 0xd06035b1, 0x00b18860, 0x002d068c },
+   { 0x00000001, 0x34000020, 0x000007e0, 0x00000000 },
+   { 0x00400009, 0x26c05421, 0x00000744, 0x00690050 },
+   { 0x00400009, 0x26d05421, 0x00000740, 0x00690050 },
+   { 0x00600001, 0x26a00231, 0x00cf06c3, 0x00000000 },
+   { 0x00800001, 0x26a80231, 0x00b10723, 0x00000000 },
+   { 0x00800040, 0x24003e2c, 0x00b106a2, 0x00020002 },
+   { 0x00800048, 0x24003e2c, 0x00b106a1, 0x00020002 },
+   { 0x00800048, 0x24003e2c, 0x00b106a0, 0x00010001 },
+   { 0x00800008, 0x26a03d8d, 0x00b10400, 0x00020002 },
+   { 0x00200040, 0x220c3eac, 0x00450056, 0x06a006a0 },
+   { 0x80800040, 0xd06035b1, 0x01ed9800, 0x00b18860 },
+   { 0x80800040, 0xd04035b1, 0x01ed9804, 0x00b18840 },
+   { 0x80800040, 0xd02035b1, 0x01ed9808, 0x00b18820 },
+   { 0x80800040, 0xd00035b1, 0x01ed980c, 0x00b18800 },
+   { 0x00000001, 0x34000020, 0x000007e0, 0x00000000 },
+   { 0x00400009, 0x26c05421, 0x00000744, 0x00690050 },
+   { 0x00400009, 0x26d05421, 0x00000740, 0x00690050 },
+   { 0x00600001, 0x26a00231, 0x00cf06c3, 0x00000000 },
+   { 0x00800001, 0x26a80231, 0x00b10723, 0x00000000 },
+   { 0x00800042, 0x25c04629, 0x00b106a8, 0x00b106a9 },
+   { 0x00800040, 0x24003e2c, 0x00b106a3, 0x00020002 },
+   { 0x00800048, 0x24003e2c, 0x00b106a2, 0x00020002 },
+   { 0x00800048, 0x24003e2c, 0x00b106a1, 0x00010001 },
+   { 0x00800008, 0x26c03d8d, 0x008d0400, 0x00020002 },
+   { 0x00600001, 0x26a00231, 0x00ae06c0, 0x00000000 },
+   { 0x00600001, 0x46a60231, 0x00ae06cc, 0x00000000 },
+   { 0x00600001, 0x46a70231, 0x00ae05c0, 0x00000000 },
+   { 0x00200040, 0x220c3eac, 0x00450036, 0x06a006a0 },
+   { 0x80800040, 0xd0603631, 0x01ee9800, 0x00b18860 },
+   { 0x80800040, 0xd0403631, 0x01ee9802, 0x00b18840 },
+   { 0x80800040, 0xd0203631, 0x01ee9804, 0x00b18820 },
+   { 0x80800040, 0xd0003631, 0x01ee9806, 0x00b18800 },
+   { 0x00000001, 0x34000020, 0x000007e0, 0x00000000 },
+   { 0x00400009, 0x26c05421, 0x00000744, 0x00690050 },
+   { 0x00400009, 0x26d05421, 0x00000740, 0x00690050 },
+   { 0x00600001, 0x26a00231, 0x00ab06c3, 0x00000000 },
+   { 0x00800001, 0x26a80231, 0x00b10723, 0x00000000 },
+   { 0x00800040, 0x24003e2c, 0x00b106a2, 0x00020002 },
+   { 0x00800048, 0x24003e2c, 0x00b106a1, 0x00020002 },
+   { 0x00800048, 0x24003e2c, 0x00b106a0, 0x00010001 },
+   { 0x00800008, 0x25c03d89, 0x008d0400, 0x00020002 },
+   { 0x00800042, 0x26a0462d, 0x00b106a0, 0x00b106a1 },
+   { 0x00600001, 0x46a10231, 0x00ae05c0, 0x00000000 },
+   { 0x00600001, 0x26b00231, 0x00ae05d0, 0x00000000 },
+   { 0x00200040, 0x220c3eac, 0x00450056, 0x06a006a0 },
+   { 0x80800040, 0xd0603631, 0x01ed9800, 0x00b18860 },
+   { 0x80800040, 0xd0403631, 0x01ed9804, 0x00b18840 },
+   { 0x80800040, 0xd0203631, 0x01ed9808, 0x00b18820 },
+   { 0x80800040, 0xd0003631, 0x01ed980c, 0x00b18800 },
+   { 0x00000001, 0x34000020, 0x000007e0, 0x00000000 },
+   { 0x00800042, 0x25c04629, 0x00b10724, 0x00b10725 },
+   { 0x00800040, 0x24003e2c, 0x00b10726, 0x00020002 },
+   { 0x00800048, 0x24003e2c, 0x00b10725, 0x00020002 },
+   { 0x00800048, 0x24003e2c, 0x00b10724, 0x00010001 },
+   { 0x00800008, 0x25e03d89, 0x00240400, 0x00020002 },
+   { 0x80800040, 0xd0003531, 0x00ad05c0, 0x00b18800 },
+   { 0x80800040, 0xd0203531, 0x00ad05c2, 0x00b18820 },
+   { 0x80800040, 0xd0403531, 0x00ad05c4, 0x00b18840 },
+   { 0x80800040, 0xd0603531, 0x00ad05c6, 0x00b18860 },
+   { 0x00000001, 0x34000020, 0x000007e0, 0x00000000 },
+   { 0x00600001, 0x27480231, 0x00000747, 0x00000000 },
+   { 0x00800042, 0x25c04629, 0x00b10740, 0x00b10741 },
+   { 0x00800040, 0x24003e2c, 0x00b10742, 0x00020002 },
+   { 0x00800048, 0x24003e2c, 0x00b10741, 0x00020002 },
+   { 0x00800048, 0x24003e2c, 0x00b10740, 0x00010001 },
+   { 0x00800008, 0x25e03d89, 0x00240400, 0x00020002 },
+   { 0x00800001, 0x45c10231, 0x00d205e0, 0x00000000 },
+   { 0x80800040, 0xd0003631, 0x004d05c0, 0x00b18800 },
+   { 0x80800040, 0xd0203631, 0x004d05c4, 0x00b18820 },
+   { 0x80800040, 0xd0403631, 0x004d05c8, 0x00b18840 },
+   { 0x80800040, 0xd0603631, 0x004d05cc, 0x00b18860 },
+   { 0x00000001, 0x34000020, 0x000007e0, 0x00000000 },
+   { 0x00000401, 0x27c80061, 0x00000000, 0x000f000f },
+   { 0x00200801, 0x27c001a1, 0x004507fa, 0x00000000 },
+   { 0x00000040, 0x22000c00, 0x00000200, 0x0ff04000 },
+   { 0x00800001, 0x20400232, 0x00cd0400, 0x00000000 },
+   { 0x00800001, 0x20500232, 0x00cd0408, 0x00000000 },
+   { 0x00800001, 0x20600232, 0x00cd0410, 0x00000000 },
+   { 0x00800001, 0x20700232, 0x00cd0418, 0x00000000 },
+   { 0x00800001, 0x20800232, 0x00cd0440, 0x00000000 },
+   { 0x00800001, 0x20900232, 0x00cd0448, 0x00000000 },
+   { 0x00800001, 0x20a00232, 0x00cd0450, 0x00000000 },
+   { 0x00800001, 0x20b00232, 0x00cd0458, 0x00000000 },
+   { 0x00800001, 0x20c00232, 0x00cd0480, 0x00000000 },
+   { 0x00800001, 0x20d00232, 0x00cd0488, 0x00000000 },
+   { 0x00800001, 0x20e00232, 0x00cd0490, 0x00000000 },
+   { 0x00800001, 0x20f00232, 0x00cd0498, 0x00000000 },
+   { 0x00800001, 0x21000232, 0x00cd04c0, 0x00000000 },
+   { 0x00800001, 0x21100232, 0x00cd04c8, 0x00000000 },
+   { 0x00800001, 0x21200232, 0x00cd04d0, 0x00000000 },
+   { 0x00800001, 0x21300232, 0x00cd04d8, 0x00000000 },
+   { 0x01600031, 0x27800021, 0x508d07c0, 0x00000200 },
+   { 0x00000001, 0x34000020, 0x000007e0, 0x00000000 },
+   { 0x00600001, 0x27c00021, 0x008d0000, 0x00000000 },
+   { 0x00200009, 0x27fa3e29, 0x00450064, 0x00040004 },
+   { 0x00000040, 0x27e01c01, 0x00001400, 0x00000020 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0xfffffd34 },
+   { 0x00000001, 0x220401ec, 0x00000000, 0x00800080 },
+   { 0x00000001, 0x220601ec, 0x00000000, 0x04000400 },
+   { 0x00200008, 0x27006e2d, 0x0000006c, 0x00000040 },
+   { 0x02600005, 0x20003e20, 0x0000006c, 0x00040004 },
+   { 0x00780001, 0x26340231, 0x00000633, 0x00000000 },
+   { 0x00800001, 0x27200231, 0x00b10620, 0x00000000 },
+   { 0x00600001, 0x27400231, 0x00cf0643, 0x00000000 },
+   { 0x00400008, 0x26806e2d, 0x00240070, 0x00004040 },
+   { 0x00000040, 0x27e01c01, 0x00001400, 0x00000020 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000006a },
+   { 0x00000040, 0x22043d8c, 0x00000204, 0x00800080 },
+   { 0x00000006, 0x27003dad, 0x00000700, 0x00010001 },
+   { 0x00800001, 0x27200231, 0x00b10628, 0x00000000 },
+   { 0x00400001, 0x27400231, 0x00808c26, 0x00000000 },
+   { 0x00400001, 0x27440231, 0x00808c66, 0x00000000 },
+   { 0x00400008, 0x26806e2d, 0x00240072, 0x00004040 },
+   { 0x00000040, 0x22063d8c, 0x00000206, 0x00800080 },
+   { 0x00000040, 0x27e01c01, 0x00001400, 0x00000020 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000058 },
+   { 0x00000040, 0x22043d8c, 0x00000204, 0x00800080 },
+   { 0x00000006, 0x27003dad, 0x00000702, 0x00020002 },
+   { 0x00800001, 0xaf800231, 0x00d28f80, 0x00000000 },
+   { 0x00800001, 0xaf900231, 0x00d28fa0, 0x00000000 },
+   { 0x00800001, 0xafc00231, 0x00d28fc0, 0x00000000 },
+   { 0x00800001, 0xafd00231, 0x00d28fe0, 0x00000000 },
+   { 0x00800001, 0xafa00231, 0x00d28c00, 0x00000000 },
+   { 0x00800001, 0xafb00231, 0x00d28c20, 0x00000000 },
+   { 0x00800001, 0xafe00231, 0x00d28c40, 0x00000000 },
+   { 0x00800001, 0xaff00231, 0x00d28c60, 0x00000000 },
+   { 0x00400001, 0x27200231, 0x0069065c, 0x00000000 },
+   { 0x00600001, 0x27240231, 0x00a98fcc, 0x00000000 },
+   { 0x00600001, 0x272c0231, 0x00a98fec, 0x00000000 },
+   { 0x00600001, 0x27340231, 0x00008fff, 0x00000000 },
+   { 0x00800001, 0x2620012d, 0x00b10720, 0x00000000 },
+   { 0x00600001, 0x27400231, 0x00cf0663, 0x00000000 },
+   { 0x00400008, 0x26806e2d, 0x00240074, 0x00004040 },
+   { 0x00000040, 0x27e01c01, 0x00001400, 0x00000020 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000032 },
+   { 0x00000040, 0x22043d8c, 0x00000204, 0x00800080 },
+   { 0x00000006, 0x27003dad, 0x00000700, 0x00010001 },
+   { 0x00800001, 0x27200231, 0x00b10628, 0x00000000 },
+   { 0x00600001, 0x27300231, 0x008d0638, 0x00000000 },
+   { 0x00400001, 0x27400231, 0x00808c26, 0x00000000 },
+   { 0x00400001, 0x27440231, 0x00808c66, 0x00000000 },
+   { 0x00400008, 0x26806e2d, 0x00240076, 0x00004040 },
+   { 0x00000040, 0x22063d8c, 0x00000206, 0x00800080 },
+   { 0x00000040, 0x27e01c01, 0x00001400, 0x00000020 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000001e },
+   { 0x00800001, 0xaf800231, 0x00d28f80, 0x00000000 },
+   { 0x00800001, 0xaf900231, 0x00d28fa0, 0x00000000 },
+   { 0x00800001, 0xafc00231, 0x00d28fc0, 0x00000000 },
+   { 0x00800001, 0xafd00231, 0x00d28fe0, 0x00000000 },
+   { 0x00800001, 0xafa00231, 0x00d28c00, 0x00000000 },
+   { 0x00800001, 0xafb00231, 0x00d28c20, 0x00000000 },
+   { 0x00800001, 0xafe00231, 0x00d28c40, 0x00000000 },
+   { 0x00800001, 0xaff00231, 0x00d28c60, 0x00000000 },
+   { 0x00000040, 0x27e01c01, 0x00001400, 0x00000020 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000000e0 },
+   { 0x00000040, 0x27e01c01, 0x00001400, 0x00000020 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0xfffffcce },
+   { 0x00000001, 0x27800021, 0x00000780, 0x00000000 },
+   { 0x00000001, 0x27a00021, 0x000007a0, 0x00000000 },
+   { 0x01600031, 0x20001c20, 0x708d0000, 0x82000010 },
+   { 0x00600001, 0x27800231, 0x008d0740, 0x00000000 },
+   { 0x00400005, 0x22083dac, 0x00690680, 0x000f000f },
+   { 0x00400040, 0x26a04625, 0x01e09020, 0x00690058 },
+   { 0x00000001, 0x26d001ad, 0x00000700, 0x00000000 },
+   { 0x00000040, 0x27e41c01, 0x00001400, 0x00000020 },
+   { 0x00000220, 0x34001400, 0x00001400, 0x000006a0 },
+   { 0x80600040, 0xcc0035b1, 0x00898800, 0x008d0760 },
+   { 0x80600040, 0xcc1035b1, 0x00898820, 0x008d0770 },
+   { 0x00800001, 0x27200231, 0x008d0724, 0x00000000 },
+   { 0x00400001, 0x27400231, 0x00808c06, 0x00000000 },
+   { 0x00000006, 0x26d03dad, 0x00000700, 0x00010001 },
+   { 0x00000040, 0x27e41c01, 0x00001400, 0x00000020 },
+   { 0x00000220, 0x34001400, 0x00001400, 0x000006a4 },
+   { 0x80600040, 0xcc2035b1, 0x00898808, 0x008d0760 },
+   { 0x80600040, 0xcc3035b1, 0x00898828, 0x008d0770 },
+   { 0x00000001, 0x27230231, 0x00000783, 0x00000000 },
+   { 0x00400001, 0x27240231, 0x008a8c18, 0x00000000 },
+   { 0x00400001, 0x27280231, 0x008a8c38, 0x00000000 },
+   { 0x00400001, 0x272c0231, 0x00008c3e, 0x00000000 },
+   { 0x00400001, 0x27400231, 0x00690784, 0x00000000 },
+   { 0x00000006, 0x26d03dad, 0x00000700, 0x00020002 },
+   { 0x00000040, 0x27e41c01, 0x00001400, 0x00000020 },
+   { 0x00000220, 0x34001400, 0x00001400, 0x000006a8 },
+   { 0x80600040, 0xcc4035b1, 0x00898840, 0x008d0760 },
+   { 0x80600040, 0xcc5035b1, 0x00898860, 0x008d0770 },
+   { 0x00800001, 0x27200231, 0x008d0724, 0x00000000 },
+   { 0x00600001, 0x27280231, 0x00000727, 0x00000000 },
+   { 0x00400001, 0x27400231, 0x00808c46, 0x00000000 },
+   { 0x00000006, 0x26d03dad, 0x00000700, 0x00030003 },
+   { 0x00000040, 0x27e41c01, 0x00001400, 0x00000020 },
+   { 0x00000220, 0x34001400, 0x00001400, 0x000006ac },
+   { 0x80600040, 0xcc6035b1, 0x00898848, 0x008d0760 },
+   { 0x80600040, 0xcc7035b1, 0x00898868, 0x008d0770 },
+   { 0x00000001, 0x34000020, 0x000007e0, 0x00000000 },
+   { 0x00800001, 0x2760022d, 0x00090724, 0x00000000 },
+   { 0x00000001, 0x34000020, 0x000007e4, 0x00000000 },
+   { 0x00800001, 0x2760022d, 0x00280740, 0x00000000 },
+   { 0x00000001, 0x34000020, 0x000007e4, 0x00000000 },
+   { 0x02802005, 0x20003da0, 0x000006d0, 0x00020002 },
+   { 0x02802005, 0x20003da0, 0x020006d0, 0x00010001 },
+   { 0x009a0001, 0x27200169, 0x00000000, 0x80808080 },
+   { 0x00780001, 0x27400231, 0x028d0724, 0x00000000 },
+   { 0x00780001, 0x27240231, 0x008d0740, 0x00000000 },
+   { 0x00400040, 0x25e04629, 0x00690724, 0x00690740 },
+   { 0x00200040, 0x25e02529, 0x004505e0, 0x004505e4 },
+   { 0x00800040, 0x2400252c, 0x000005e0, 0x000005e2 },
+   { 0x00800040, 0x24003d8c, 0x008d0400, 0x00040004 },
+   { 0x00800008, 0x27603d8d, 0x008d0400, 0x00030003 },
+   { 0x00000001, 0x34000020, 0x000007e4, 0x00000000 },
+   { 0x00600001, 0x26c00231, 0x008d0724, 0x00000000 },
+   { 0x00400001, 0x26c80231, 0x0069072b, 0x00000000 },
+   { 0x00600040, 0x24003e2c, 0x008d06c2, 0x00020002 },
+   { 0x00600048, 0x24003e2c, 0x008d06c1, 0x00020002 },
+   { 0x00600048, 0x25e03e29, 0x008d06c0, 0x00010001 },
+   { 0x00800008, 0x27603d2d, 0x002905e0, 0x00020002 },
+   { 0x00000001, 0x34000020, 0x000007e4, 0x00000000 },
+   { 0x00400009, 0x26e05421, 0x00000740, 0x00690050 },
+   { 0x00600001, 0x26c40231, 0x008d0723, 0x00000000 },
+   { 0x00400001, 0x26c00231, 0x00ab06e3, 0x00000000 },
+   { 0x00600040, 0x24003e2c, 0x008d06c2, 0x00020002 },
+   { 0x00600048, 0x24003e2c, 0x008d06c1, 0x00020002 },
+   { 0x00600048, 0x26c03e2d, 0x008d06c0, 0x00010001 },
+   { 0x00400040, 0x22083eac, 0x00690054, 0x06c006c0 },
+   { 0x00800008, 0x27603dad, 0x01e99000, 0x00020002 },
+   { 0x00000001, 0x34000020, 0x000007e4, 0x00000000 },
+   { 0x00400009, 0x26e05421, 0x00000740, 0x00690050 },
+   { 0x00600001, 0x26c40231, 0x008d0723, 0x00000000 },
+   { 0x00400001, 0x26c00231, 0x00ab06e3, 0x00000000 },
+   { 0x00600042, 0x25c04629, 0x008d06c4, 0x008d06c5 },
+   { 0x00600040, 0x24003e2c, 0x008d06c3, 0x00020002 },
+   { 0x00600048, 0x24003e2c, 0x008d06c2, 0x00020002 },
+   { 0x00600048, 0x24003e2c, 0x008d06c1, 0x00010001 },
+   { 0x00600008, 0x26c03d8d, 0x008d0400, 0x00020002 },
+   { 0x00400001, 0x46c401ad, 0x006906c4, 0x00000000 },
+   { 0x00400001, 0x46c6012d, 0x006905c0, 0x00000000 },
+   { 0x00400040, 0x22083eac, 0x00690054, 0x06c006c0 },
+   { 0x00800001, 0x276001ad, 0x01ea9000, 0x00000000 },
+   { 0x00000001, 0x34000020, 0x000007e4, 0x00000000 },
+   { 0x00400009, 0x26e05421, 0x00000740, 0x00690050 },
+   { 0x00600001, 0x26c40231, 0x008d0723, 0x00000000 },
+   { 0x00400001, 0x26c00231, 0x00ab06e3, 0x00000000 },
+   { 0x00600042, 0x25c04629, 0x008d06c0, 0x008d06c1 },
+   { 0x00600040, 0x24003e2c, 0x008d06c2, 0x00020002 },
+   { 0x00600048, 0x24003e2c, 0x008d06c1, 0x00020002 },
+   { 0x00600048, 0x26e03e2d, 0x008d06c0, 0x00010001 },
+   { 0x00400008, 0x46c23dad, 0x006906e0, 0x00020002 },
+   { 0x00200008, 0x26d03dad, 0x004506e8, 0x00020002 },
+   { 0x00400001, 0x46c0012d, 0x006905c0, 0x00000000 },
+   { 0x00400009, 0x22083eac, 0x00690054, 0x00010001 },
+   { 0x00400040, 0x22083d8c, 0x00690208, 0x06c006c0 },
+   { 0x00800001, 0x276001ad, 0x01e99000, 0x00000000 },
+   { 0x00000001, 0x34000020, 0x000007e4, 0x00000000 },
+   { 0x00600042, 0x45c04629, 0x008d0724, 0x008d0725 },
+   { 0x00600040, 0x24003e2c, 0x008d0726, 0x00020002 },
+   { 0x00600048, 0x24003e2c, 0x008d0725, 0x00020002 },
+   { 0x00600048, 0x25e03e29, 0x008d0724, 0x00010001 },
+   { 0x00600008, 0x45c23d29, 0x008d05e0, 0x00020002 },
+   { 0x00800001, 0x2760012d, 0x002a05c0, 0x00000000 },
+   { 0x00000001, 0x34000020, 0x000007e4, 0x00000000 },
+   { 0x00600001, 0x27440231, 0x00000743, 0x00000000 },
+   { 0x00600042, 0x45c04629, 0x008d0740, 0x008d0741 },
+   { 0x00600040, 0x24003e2c, 0x008d0742, 0x00020002 },
+   { 0x00600048, 0x24003e2c, 0x008d0741, 0x00020002 },
+   { 0x00600048, 0x25e03e29, 0x008d0740, 0x00010001 },
+   { 0x00600008, 0x45c23d29, 0x008d05e0, 0x00020002 },
+   { 0x00800001, 0x2760012d, 0x004905c0, 0x00000000 },
+   { 0x00000001, 0x34000020, 0x000007e4, 0x00000000 },
+   { 0x00000401, 0x27c80061, 0x00000000, 0x000f000f },
+   { 0x00200801, 0x27c001a1, 0x004507fa, 0x00000000 },
+   { 0x00000040, 0x22000c00, 0x00000200, 0x0ff04000 },
+   { 0x00800001, 0x20400232, 0x00a90400, 0x00000000 },
+   { 0x00800001, 0x20500232, 0x00a90404, 0x00000000 },
+   { 0x00800001, 0x20600232, 0x00a90408, 0x00000000 },
+   { 0x00800001, 0x20700232, 0x00a9040c, 0x00000000 },
+   { 0x00800001, 0x20800232, 0x00a90440, 0x00000000 },
+   { 0x00800001, 0x20900232, 0x00a90444, 0x00000000 },
+   { 0x00800001, 0x20a00232, 0x00a90448, 0x00000000 },
+   { 0x00800001, 0x20b00232, 0x00a9044c, 0x00000000 },
+   { 0x00800001, 0x20c00232, 0x00a90480, 0x00000000 },
+   { 0x00800001, 0x20d00232, 0x00a90484, 0x00000000 },
+   { 0x00800001, 0x20e00232, 0x00a90488, 0x00000000 },
+   { 0x00800001, 0x20f00232, 0x00a9048c, 0x00000000 },
+   { 0x00800001, 0x21000232, 0x00a904c0, 0x00000000 },
+   { 0x00800001, 0x21100232, 0x00a904c4, 0x00000000 },
+   { 0x00800001, 0x21200232, 0x00a904c8, 0x00000000 },
+   { 0x00800001, 0x21300232, 0x00a904cc, 0x00000000 },
+   { 0x01600031, 0x27800021, 0x508d07c0, 0x00000200 },
+   { 0x00000001, 0x34000020, 0x000007e0, 0x00000000 },
+   { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27c00021, 0x008d0000, 0x00000000 },
+   { 0x00200009, 0x27fa3e29, 0x00450064, 0x00040004 },
+   { 0x00000401, 0x27c80061, 0x00000000, 0x000f000f },
+   { 0x00200809, 0x27c03e21, 0x00450064, 0x00040004 },
+   { 0x00000040, 0x22000d20, 0x00000062, 0x1218a000 },
+   { 0x00a02401, 0x20400232, 0x00b10080, 0x00000000 },
+   { 0x00a02801, 0x20500232, 0x00b10090, 0x00000000 },
+   { 0x00a02401, 0x20800232, 0x00b100c0, 0x00000000 },
+   { 0x00a02801, 0x20900232, 0x00b100d0, 0x00000000 },
+   { 0x00a02401, 0x20c00232, 0x00b10100, 0x00000000 },
+   { 0x00a02801, 0x20d00232, 0x00b10110, 0x00000000 },
+   { 0x00a02401, 0x21000232, 0x00b10140, 0x00000000 },
+   { 0x00a02801, 0x21100232, 0x00b10150, 0x00000000 },
+   { 0x01600031, 0x27800021, 0x508d07c0, 0x00000200 },
+   { 0x00000401, 0x20280062, 0x00000000, 0x0007000f },
+   { 0x0000080c, 0x20243c22, 0x000007c4, 0x00010001 },
+   { 0x00000040, 0x22001c00, 0x00000200, 0xf8000001 },
+   { 0x00800001, 0x40400232, 0x00b10180, 0x00000000 },
+   { 0x00800001, 0x40410232, 0x00b101c0, 0x00000000 },
+   { 0x00800001, 0x40600232, 0x00b10190, 0x00000000 },
+   { 0x00800001, 0x40610232, 0x00b101d0, 0x00000000 },
+   { 0x00800001, 0x40800232, 0x00b101a0, 0x00000000 },
+   { 0x00800001, 0x40810232, 0x00b101e0, 0x00000000 },
+   { 0x00800001, 0x40a00232, 0x00b101b0, 0x00000000 },
+   { 0x00800001, 0x40a10232, 0x00b101f0, 0x00000000 },
+   { 0x01600031, 0x27a00001, 0x508d0000, 0x00000200 },
+   { 0x00000001, 0x27800021, 0x00000780, 0x00000000 },
+   { 0x00000001, 0x27a00021, 0x000007a0, 0x00000000 },
+   { 0x01600031, 0x20001c20, 0x708d0000, 0x82000010 },
+   { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x20400021, 0x008d0000, 0x00000000 },
+   { 0x00000005, 0x20203e2d, 0x00000061, 0x001f001f },
+   { 0x00200009, 0x20643e2d, 0x00450064, 0x00040004 },
+   { 0x00000001, 0x203a01ed, 0x00000000, 0x00000000 },
+   { 0x01000010, 0x20003dac, 0x00000020, 0x00160016 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x0000001e },
+   { 0x01200010, 0x20003e2c, 0x0200006c, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000024 },
+   { 0x00800009, 0x25403dad, 0x00050064, 0x00020002 },
+   { 0x00000001, 0x203a01ed, 0x00000000, 0x00180018 },
+   { 0x00200040, 0x45483dad, 0x00660548, 0x00100010 },
+   { 0x00200040, 0x45523dad, 0x00660552, 0x00100010 },
+   { 0x00400040, 0x25583dad, 0x00690558, 0x00100010 },
+   { 0x00800040, 0x208035ad, 0x00b10080, 0x00b10540 },
+   { 0x00600040, 0x45403dad, 0x00ae0540, 0x00200020 },
+   { 0x00800040, 0x20a035ad, 0x00b100a0, 0x00b10540 },
+   { 0x00600040, 0x45423dad, 0x00ae0542, 0x00200020 },
+   { 0x00800040, 0x20e035ad, 0x00b100e0, 0x00b10540 },
+   { 0x00600040, 0x45403dad, 0x00ae0540, 0xffe0ffe0 },
+   { 0x00800040, 0x20c035ad, 0x00b100c0, 0x00b10540 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000014 },
+   { 0x06600010, 0x20003dac, 0x02000020, 0x00030003 },
+   { 0x00000001, 0x206c0171, 0x00000000, 0x00000000 },
+   { 0x00610001, 0x208000a5, 0x02050080, 0x00000000 },
+   { 0x00610001, 0x212000a5, 0x02090100, 0x00000000 },
+   { 0x00410001, 0x211000a5, 0x02690100, 0x00000000 },
+   { 0x00200009, 0x25403dad, 0x00450064, 0x00020002 },
+   { 0x00800040, 0x208035ad, 0x00b10080, 0x00050540 },
+   { 0x00200040, 0x40883dad, 0x00660088, 0x00200020 },
+   { 0x00200040, 0x40923dad, 0x00660092, 0x00200020 },
+   { 0x00400040, 0x20983dad, 0x00690098, 0x00200020 },
+   { 0x00200401, 0x22080060, 0x00000000, 0x03400140 },
+   { 0x00000c01, 0x220c0060, 0x00000000, 0x04400080 },
+   { 0x00000801, 0x22040060, 0x00000000, 0x01000070 },
+   { 0x00000001, 0x20200169, 0x00000000, 0x00000000 },
+   { 0x0000000c, 0x2458262d, 0x0000006d, 0x00000020 },
+   { 0x00000001, 0x210e0169, 0x00000000, 0x00010001 },
+   { 0x00000005, 0x24583dad, 0x00000458, 0x00030003 },
+   { 0x01000010, 0x200035ac, 0x0000010e, 0x00000458 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x000002fe },
+   { 0x01000010, 0x20003dac, 0x0200010e, 0x00010001 },
+   { 0x00010401, 0x24540061, 0x02000000, 0x06c00640 },
+   { 0x00010805, 0x245a3e2d, 0x02008800, 0x007f007f },
+   { 0x00110401, 0x24540061, 0x02000000, 0x04800780 },
+   { 0x00110805, 0x245a3e2d, 0x02008804, 0x007f007f },
+   { 0x0000000c, 0x211e362d, 0x0000006c, 0x00000020 },
+   { 0x00000040, 0x20780d21, 0x0000045a, 0x0208a002 },
+   { 0x02000005, 0x20003dac, 0x0200011e, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000001be },
+   { 0x01000005, 0x20003dac, 0x00009800, 0x00030003 },
+   { 0x00200005, 0x24443dad, 0x00459800, 0x00030003 },
+   { 0x0020000c, 0x24403dad, 0x00459800, 0x00020002 },
+   { 0x01600006, 0x20003dac, 0x02000446, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000012 },
+   { 0x00010040, 0x22000c20, 0x02000078, 0x00400000 },
+   { 0x00110040, 0x22000c20, 0x02000078, 0x00700000 },
+   { 0x00310040, 0x20401da5, 0x02450440, 0xfffffffe },
+   { 0x00110001, 0x20480061, 0x02000000, 0x000c000c },
+   { 0x00010040, 0x20401da5, 0x02000440, 0xfffffffe },
+   { 0x00010001, 0x204401a5, 0x02000442, 0x00000000 },
+   { 0x00010001, 0x20480061, 0x02000000, 0x0007000c },
+   { 0x01600031, 0x25600021, 0x408d0040, 0x00000200 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00010040, 0x22000c20, 0x02000078, 0x00200000 },
+   { 0x00110040, 0x22000c20, 0x02000078, 0x00400000 },
+   { 0x00200001, 0x204001a5, 0x00450440, 0x00000000 },
+   { 0x00110040, 0x20441da5, 0x02000442, 0xfffffffe },
+   { 0x00110001, 0x20480061, 0x02000000, 0x000c0007 },
+   { 0x00010001, 0x20480061, 0x02000000, 0x00070007 },
+   { 0x01600031, 0x25600021, 0x408d0040, 0x00000200 },
+   { 0x0020040c, 0x24403dad, 0x00459800, 0x00030003 },
+   { 0x00200805, 0x24483dad, 0x00459800, 0x00070007 },
+   { 0x01600006, 0x20003dac, 0x0000044a, 0x00000000 },
+   { 0x00200001, 0x204001a5, 0x00450440, 0x00000000 },
+   { 0x00000009, 0x20403ca5, 0x00000040, 0x00010001 },
+   { 0x00010040, 0x22000c20, 0x00000078, 0x00200010 },
+   { 0x00110040, 0x22000c20, 0x00000078, 0x00300010 },
+   { 0x00010001, 0x20480061, 0x00000000, 0x00030009 },
+   { 0x00110001, 0x20480061, 0x00000000, 0x00040009 },
+   { 0x02600031, 0x27000021, 0x408d0040, 0x00000200 },
+   { 0x01000005, 0x20003dac, 0x00009800, 0x00030003 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x0000001a },
+   { 0x00600001, 0x26220231, 0x008d05c0, 0x00000000 },
+   { 0x00600401, 0x26120231, 0x008d05b8, 0x00000000 },
+   { 0x00600801, 0x26020231, 0x008d05b0, 0x00000000 },
+   { 0x00600401, 0x25f20231, 0x008d05a8, 0x00000000 },
+   { 0x00600801, 0x25e20231, 0x008d05a0, 0x00000000 },
+   { 0x00600401, 0x25d20231, 0x008d0598, 0x00000000 },
+   { 0x00600801, 0x25c20231, 0x008d0590, 0x00000000 },
+   { 0x00600401, 0x25b20231, 0x008d0588, 0x00000000 },
+   { 0x00600801, 0x25a20231, 0x008d0580, 0x00000000 },
+   { 0x00600401, 0x25920231, 0x008d0578, 0x00000000 },
+   { 0x00600801, 0x25820231, 0x008d0570, 0x00000000 },
+   { 0x00600001, 0x25720231, 0x008d0568, 0x00000000 },
+   { 0x00600001, 0x25620231, 0x008d0560, 0x00000000 },
+   { 0x00110001, 0x220001ec, 0x02000000, 0x05820582 },
+   { 0x00010001, 0x220001ec, 0x02000000, 0x05620562 },
+   { 0x00000001, 0x220e0128, 0x00000454, 0x00000000 },
+   { 0x01800006, 0x200035ac, 0x02000446, 0x00000444 },
+   { 0x00810001, 0xbc000229, 0x02ad8000, 0x00000000 },
+   { 0x00810001, 0xbc200229, 0x02ad8020, 0x00000000 },
+   { 0x00810001, 0xbc400229, 0x02ad8040, 0x00000000 },
+   { 0x00810001, 0xbc600229, 0x02ad8060, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000012a },
+   { 0x00600001, 0x2024018d, 0x008d0200, 0x00000000 },
+   { 0x01000041, 0x245e35ad, 0x00000446, 0x00000444 },
+   { 0x00000040, 0x22022d08, 0x00000200, 0x00100010 },
+   { 0x02000005, 0x20003db0, 0x0200045e, 0x00010001 },
+   { 0x00200040, 0x22042d08, 0x00450200, 0x00200020 },
+   { 0x00400001, 0x244c0109, 0x00690200, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000066 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000064 },
+   { 0x00000040, 0x22003d08, 0x00000200, 0xffdeffde },
+   { 0x00000440, 0x22023d08, 0x00000202, 0xffeeffee },
+   { 0x00000801, 0x220c0060, 0x00000000, 0x04a00480 },
+   { 0x01000010, 0x20003db0, 0x0000045e, 0x00040004 },
+   { 0x00a02040, 0x2400462c, 0x00ad8000, 0x00ad8005 },
+   { 0x00a02048, 0x24003e2c, 0x00ad8001, 0xfffbfffb },
+   { 0x00a02048, 0x24003e2c, 0x00ad8002, 0x00140014 },
+   { 0x00a02048, 0x24003e2c, 0x00ad8003, 0x00140014 },
+   { 0x00a02048, 0xb8003e2d, 0x00ad8004, 0xfffbfffb },
+   { 0x00a02040, 0x2400462c, 0x00ad8040, 0x00ad8045 },
+   { 0x00a02048, 0x24003e2c, 0x00ad8041, 0xfffbfffb },
+   { 0x00a02048, 0x24003e2c, 0x00ad8042, 0x00140014 },
+   { 0x00a02048, 0x24003e2c, 0x00ad8043, 0x00140014 },
+   { 0x00a02048, 0xb8403e2d, 0x00ad8044, 0xfffbfffb },
+   { 0x00a02040, 0x2400462c, 0x00ad8080, 0x00ad8085 },
+   { 0x00a02048, 0x24003e2c, 0x00ad8081, 0xfffbfffb },
+   { 0x00a02048, 0x24003e2c, 0x00ad8082, 0x00140014 },
+   { 0x00a02048, 0x24003e2c, 0x00ad8083, 0x00140014 },
+   { 0x00a02048, 0xb8803e2d, 0x00ad8084, 0xfffbfffb },
+   { 0x00600040, 0x2400462c, 0x008d80c0, 0x008d80c5 },
+   { 0x00600048, 0x24003e2c, 0x008d80c1, 0xfffbfffb },
+   { 0x00600048, 0x24003e2c, 0x008d80c2, 0x00140014 },
+   { 0x00600048, 0x24003e2c, 0x008d80c3, 0x00140014 },
+   { 0x00600048, 0xb8c03e2d, 0x008d80c4, 0xfffbfffb },
+   { 0x00000401, 0x22000060, 0x00000000, 0x04a00480 },
+   { 0x00000c01, 0x22040060, 0x00000000, 0x04a00490 },
+   { 0x00010802, 0x220c2d28, 0x00000454, 0x04800480 },
+   { 0x00a02040, 0x24003dac, 0x00b18000, 0x02000200 },
+   { 0x00800048, 0x24003dac, 0x01ed8800, 0xfffbfffb },
+   { 0x00800048, 0x24203dac, 0x01ed8820, 0xfffbfffb },
+   { 0x00a02048, 0x24003dac, 0x00b18020, 0x00140014 },
+   { 0x00800048, 0x24003dac, 0x01ed8820, 0x00140014 },
+   { 0x00800048, 0x24203dac, 0x01ed8840, 0x00140014 },
+   { 0x00a02048, 0x24003dac, 0x00b18040, 0xfffbfffb },
+   { 0x00800048, 0x24003dac, 0x01ed8840, 0x00010001 },
+   { 0x00800048, 0x24203dac, 0x01ed8860, 0x00010001 },
+   { 0x8080000c, 0xd8003d91, 0x00b10400, 0x000a000a },
+   { 0x8080100c, 0xd8203d91, 0x00b10420, 0x000a000a },
+   { 0x00a02040, 0x24003dac, 0x00b18040, 0x02000200 },
+   { 0x00800048, 0x24003dac, 0x01ed8840, 0xfffbfffb },
+   { 0x00800048, 0x24203dac, 0x01ed8860, 0xfffbfffb },
+   { 0x00a02048, 0x24003dac, 0x00b18060, 0x00140014 },
+   { 0x00800048, 0x24003dac, 0x01ed8860, 0x00140014 },
+   { 0x00800048, 0x24203dac, 0x01ed8880, 0x00140014 },
+   { 0x00a02048, 0x24003dac, 0x00b18080, 0xfffbfffb },
+   { 0x00800048, 0x24003dac, 0x01ed8880, 0x00010001 },
+   { 0x00800048, 0x24203dac, 0x01ed88a0, 0x00010001 },
+   { 0x8080000c, 0xd8403d91, 0x00b10400, 0x000a000a },
+   { 0x8080100c, 0xd8603d91, 0x00b10420, 0x000a000a },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x000000b4 },
+   { 0x01000010, 0x20003db0, 0x00000444, 0x00000000 },
+   { 0x01000010, 0x20003db0, 0x02000446, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x0000004c },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000004a },
+   { 0x00400040, 0x22003d28, 0x0069044c, 0xfffefffe },
+   { 0x03400010, 0x20003dac, 0x00000446, 0x00020002 },
+   { 0x01000010, 0x20003db0, 0x02000444, 0x00020002 },
+   { 0x00410040, 0x22002d08, 0x00690200, 0x00100010 },
+   { 0x01000010, 0x20003dac, 0x00000446, 0x00000000 },
+   { 0x00010002, 0x220c2d28, 0x02000454, 0x04800480 },
+   { 0x00800040, 0x24003e2c, 0x01ed8000, 0x00100010 },
+   { 0x00800040, 0x24203e2c, 0x01ed8020, 0x00100010 },
+   { 0x00800048, 0x24003e2c, 0x01ed8001, 0xfffbfffb },
+   { 0x00800048, 0x24203e2c, 0x01ed8021, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01ed8002, 0x00140014 },
+   { 0x00800048, 0x24203e2c, 0x01ed8022, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01ed8003, 0x00140014 },
+   { 0x00800048, 0x24203e2c, 0x01ed8023, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01ed8004, 0xfffbfffb },
+   { 0x00800048, 0x24203e2c, 0x01ed8024, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01ed8005, 0x00010001 },
+   { 0x00800048, 0x24203e2c, 0x01ed8025, 0x00010001 },
+   { 0x8080000c, 0xd8003d91, 0x00b10400, 0x00050005 },
+   { 0x8080100c, 0xd8203d91, 0x00b10420, 0x00050005 },
+   { 0x00800040, 0x24003e2c, 0x01ed8040, 0x00100010 },
+   { 0x00800040, 0x24203e2c, 0x01ed8060, 0x00100010 },
+   { 0x00800048, 0x24003e2c, 0x01ed8041, 0xfffbfffb },
+   { 0x00800048, 0x24203e2c, 0x01ed8061, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01ed8042, 0x00140014 },
+   { 0x00800048, 0x24203e2c, 0x01ed8062, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01ed8043, 0x00140014 },
+   { 0x00800048, 0x24203e2c, 0x01ed8063, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01ed8044, 0xfffbfffb },
+   { 0x00800048, 0x24203e2c, 0x01ed8064, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01ed8045, 0x00010001 },
+   { 0x00800048, 0x24203e2c, 0x01ed8065, 0x00010001 },
+   { 0x8080000c, 0xd8403d91, 0x00b10400, 0x00050005 },
+   { 0x8080100c, 0xd8603d91, 0x00b10420, 0x00050005 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000004 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x0000005c },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000062 },
+   { 0x01000010, 0x20003db0, 0x00000446, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000042 },
+   { 0x00200401, 0x22000128, 0x0066044c, 0x00000000 },
+   { 0x00200c01, 0x22040128, 0x0045044e, 0x00000000 },
+   { 0x00000801, 0x220c0128, 0x00000454, 0x00000000 },
+   { 0x03400010, 0x20003dac, 0x02000444, 0x00020002 },
+   { 0x01000010, 0x20003dac, 0x00000444, 0x00000000 },
+   { 0x00410040, 0x22002d08, 0x02690200, 0x00010001 },
+   { 0x01000010, 0x20003db0, 0x02000446, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000002 },
+   { 0x00110001, 0x220c0168, 0x02000000, 0x04800480 },
+   { 0x00a02040, 0x24003e2c, 0x00ad83e0, 0x00100010 },
+   { 0x00800048, 0x24003e2c, 0x01ed8be0, 0xfffbfffb },
+   { 0x00800048, 0x24203e2c, 0x01ed8800, 0xfffbfffb },
+   { 0x00a02048, 0x24003e2c, 0x00ad8000, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01ed8800, 0x00140014 },
+   { 0x00800048, 0x24203e2c, 0x01ed8820, 0x00140014 },
+   { 0x00a02048, 0x24003e2c, 0x00ad8020, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01ed8820, 0x00010001 },
+   { 0x00800048, 0x24203e2c, 0x01ed8840, 0x00010001 },
+   { 0x8080000c, 0xd8003d91, 0x00b10400, 0x00050005 },
+   { 0x8080100c, 0xd8203d91, 0x00b10420, 0x00050005 },
+   { 0x00a02040, 0x24003e2c, 0x00ad8020, 0x00100010 },
+   { 0x00800048, 0x24003e2c, 0x01ed8820, 0xfffbfffb },
+   { 0x00800048, 0x24203e2c, 0x01ed8840, 0xfffbfffb },
+   { 0x00a02048, 0x24003e2c, 0x00ad8040, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01ed8840, 0x00140014 },
+   { 0x00800048, 0x24203e2c, 0x01ed8860, 0x00140014 },
+   { 0x00a02048, 0x24003e2c, 0x00ad8060, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01ed8860, 0x00010001 },
+   { 0x00800048, 0x24203e2c, 0x01ed8880, 0x00010001 },
+   { 0x8080000c, 0xd8403d91, 0x00b10400, 0x00050005 },
+   { 0x8080100c, 0xd8603d91, 0x00b10420, 0x00050005 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000016 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001c },
+   { 0x00200401, 0x22000128, 0x0045044c, 0x00000000 },
+   { 0x00000801, 0x220c0128, 0x00000454, 0x00000000 },
+   { 0x01200010, 0x20003dac, 0x00000444, 0x00030003 },
+   { 0x01200010, 0x20003dac, 0x02000446, 0x00030003 },
+   { 0x00210040, 0x22002d08, 0x00450200, 0x00010001 },
+   { 0x00210040, 0x22002d08, 0x02450200, 0x00100010 },
+   { 0x00800001, 0xb8000229, 0x01ed8000, 0x00000000 },
+   { 0x00800001, 0xb8200229, 0x01ed8020, 0x00000000 },
+   { 0x00800001, 0xb8400229, 0x01ed8040, 0x00000000 },
+   { 0x00800001, 0xb8600229, 0x01ed8060, 0x00000000 },
+   { 0x80800042, 0xd8004631, 0x00d29800, 0x00d20480 },
+   { 0x80800042, 0xd8204631, 0x00d29820, 0x00d204a0 },
+   { 0x80800042, 0xd8404631, 0x00d29840, 0x00d204c0 },
+   { 0x80800042, 0xd8604631, 0x00d29860, 0x00d204e0 },
+   { 0x00600001, 0x220001ac, 0x008d0024, 0x00000000 },
+   { 0x00200040, 0x244c3dad, 0x00450448, 0xfff8fff8 },
+   { 0x00000401, 0x220001ec, 0x00000000, 0x07000700 },
+   { 0x00000c01, 0x220201e8, 0x00000000, 0x07100710 },
+   { 0x00000801, 0x220e0128, 0x00000456, 0x00000000 },
+   { 0x00000441, 0x254635ad, 0x00000448, 0x0000044a },
+   { 0x00000c41, 0x254025ad, 0x0000444c, 0x0000444e },
+   { 0x00000c41, 0x254225ad, 0x00000448, 0x0000444e },
+   { 0x00000841, 0x254435ad, 0x0000444c, 0x0000044a },
+   { 0x00200040, 0x244c3d09, 0x00450200, 0x00100010 },
+   { 0x00800041, 0x24002628, 0x00ad8000, 0x00000540 },
+   { 0x00800041, 0x24202628, 0x00ad8020, 0x00000540 },
+   { 0x00800048, 0x24002628, 0x00ad8002, 0x00000542 },
+   { 0x00800048, 0x24202628, 0x00ad8022, 0x00000542 },
+   { 0x00200001, 0x22000128, 0x0045044c, 0x00000000 },
+   { 0x00800048, 0x24002628, 0x01ed8000, 0x00000544 },
+   { 0x00800048, 0x24202628, 0x01ed8020, 0x00000544 },
+   { 0x00800048, 0xbc002629, 0x01ed8002, 0x00000546 },
+   { 0x00801048, 0xbc202629, 0x01ed8022, 0x00000546 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000126 },
+   { 0x00000001, 0x20220169, 0x00000000, 0x00040004 },
+   { 0x00200005, 0x24443dad, 0x00459800, 0x00030003 },
+   { 0x0020000c, 0x24403dad, 0x00459800, 0x00020002 },
+   { 0x01600006, 0x20003dac, 0x02000446, 0x00000000 },
+   { 0x00010040, 0x22000c20, 0x02000078, 0x00200000 },
+   { 0x00110040, 0x22000c20, 0x02000078, 0x00500000 },
+   { 0x00310040, 0x20401da5, 0x02450440, 0xfffffffe },
+   { 0x00110001, 0x20480061, 0x02000000, 0x00080008 },
+   { 0x00010040, 0x20401da5, 0x02000440, 0xfffffffe },
+   { 0x00010001, 0x204401a5, 0x02000442, 0x00000000 },
+   { 0x00010001, 0x20480061, 0x02000000, 0x00030008 },
+   { 0x01600031, 0x25600021, 0x408d0040, 0x00000200 },
+   { 0x00000040, 0x22000c20, 0x00000078, 0x00100010 },
+   { 0x0020040c, 0x24403dad, 0x00459800, 0x00030003 },
+   { 0x00200805, 0x24483dad, 0x00459800, 0x00070007 },
+   { 0x00200001, 0x204001a5, 0x00450440, 0x00000000 },
+   { 0x00000009, 0x20403ca5, 0x00000040, 0x00010001 },
+   { 0x00000001, 0x20480061, 0x00000000, 0x00020005 },
+   { 0x02600031, 0x27000021, 0x408d0040, 0x00000200 },
+   { 0x00110001, 0x220001ec, 0x02000000, 0x05820582 },
+   { 0x00010001, 0x220001ec, 0x02000000, 0x05620562 },
+   { 0x00000001, 0x220e0128, 0x00000454, 0x00000000 },
+   { 0x01800006, 0x200035ac, 0x02000446, 0x00000444 },
+   { 0x00410001, 0xbc000229, 0x02698000, 0x00000000 },
+   { 0x00410001, 0xbc100229, 0x02698010, 0x00000000 },
+   { 0x00410001, 0xbc200229, 0x02698020, 0x00000000 },
+   { 0x00410001, 0xbc300229, 0x02698030, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000be },
+   { 0x00600001, 0x2024018d, 0x008d0200, 0x00000000 },
+   { 0x01000041, 0x245e35ad, 0x00000446, 0x00000444 },
+   { 0x02000005, 0x20003db0, 0x0200045e, 0x00010001 },
+   { 0x00000040, 0x22022d08, 0x00000200, 0x00100010 },
+   { 0x00200040, 0x22042d08, 0x00450200, 0x00200020 },
+   { 0x00400001, 0x244c0109, 0x00690200, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000044 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000042 },
+   { 0x00000040, 0x22003d08, 0x00000200, 0xffdeffde },
+   { 0x00000440, 0x22023d08, 0x00000202, 0xffeeffee },
+   { 0x00000801, 0x220c0060, 0x00000000, 0x04d004c0 },
+   { 0x01000010, 0x20003db0, 0x0000045e, 0x00040004 },
+   { 0x00802040, 0x2400462c, 0x00a98000, 0x00a98005 },
+   { 0x00802048, 0x24003e2c, 0x00a98001, 0xfffbfffb },
+   { 0x00802048, 0x24003e2c, 0x00a98002, 0x00140014 },
+   { 0x00802048, 0x24003e2c, 0x00a98003, 0x00140014 },
+   { 0x00802048, 0xb8003e2d, 0x00a98004, 0xfffbfffb },
+   { 0x00802040, 0x2400462c, 0x00a98040, 0x00a98045 },
+   { 0x00802048, 0x24003e2c, 0x00a98041, 0xfffbfffb },
+   { 0x00802048, 0x24003e2c, 0x00a98042, 0x00140014 },
+   { 0x00802048, 0x24003e2c, 0x00a98043, 0x00140014 },
+   { 0x00802048, 0xb8203e2d, 0x00a98044, 0xfffbfffb },
+   { 0x00400040, 0x2400462c, 0x00698080, 0x00698085 },
+   { 0x00400048, 0x24003e2c, 0x00698081, 0xfffbfffb },
+   { 0x00400048, 0x24003e2c, 0x00698082, 0x00140014 },
+   { 0x00400048, 0x24003e2c, 0x00698083, 0x00140014 },
+   { 0x00400048, 0xb8403e2d, 0x00698084, 0xfffbfffb },
+   { 0x00000401, 0x220c0060, 0x00000000, 0x04e004d0 },
+   { 0x00000c01, 0x22000060, 0x00000000, 0x04d004c8 },
+   { 0x00000c01, 0x22040060, 0x00000000, 0x04e004d8 },
+   { 0x00000801, 0x22080060, 0x00000000, 0x04f004e8 },
+   { 0x00800040, 0x24003dac, 0x00b104c0, 0x02000200 },
+   { 0x00800048, 0x24003dac, 0x00b104e0, 0xfffbfffb },
+   { 0x00800048, 0x24003dac, 0x01ed9800, 0x00140014 },
+   { 0x00010001, 0x220c0168, 0x00000000, 0x05400540 },
+   { 0x00110001, 0x220c0168, 0x00000000, 0x04c004c0 },
+   { 0x00800048, 0x24003dac, 0x01e98000, 0xfffbfffb },
+   { 0x00800048, 0x24003dac, 0x01e98020, 0x00010001 },
+   { 0x00800048, 0x24003dac, 0x01e98800, 0x00140014 },
+   { 0x8080000c, 0xd8003d91, 0x00b10400, 0x000a000a },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000060 },
+   { 0x01000010, 0x20003db0, 0x00000444, 0x00000000 },
+   { 0x01000010, 0x20003db0, 0x02000446, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000022 },
+   { 0x00400040, 0x22003d28, 0x0069044c, 0xfffefffe },
+   { 0x03400010, 0x20003dac, 0x00000446, 0x00020002 },
+   { 0x01000010, 0x20003db0, 0x02000444, 0x00020002 },
+   { 0x00410040, 0x22002d08, 0x00690200, 0x00100010 },
+   { 0x01000010, 0x20003dac, 0x00000446, 0x00000000 },
+   { 0x00010001, 0x220e0168, 0x02000000, 0x05400540 },
+   { 0x00110001, 0x220e0168, 0x02000000, 0x04c004c0 },
+   { 0x00800040, 0x24003e2c, 0x01e98000, 0x00100010 },
+   { 0x00800048, 0x24003e2c, 0x01e98001, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01e98002, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01e98003, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01e98004, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01e98005, 0x00010001 },
+   { 0x8080000c, 0xdc003d91, 0x00b10400, 0x00050005 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000004 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000036 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000036 },
+   { 0x01000010, 0x20003db0, 0x00000446, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x03600010, 0x20003dac, 0x02000444, 0x00020002 },
+   { 0x00400401, 0x22000128, 0x0069044c, 0x00000000 },
+   { 0x00400840, 0x22083d2c, 0x0069044c, 0x00100010 },
+   { 0x00610040, 0x22002d08, 0x02690200, 0x00010001 },
+   { 0x01000010, 0x20003dac, 0x00000444, 0x00000000 },
+   { 0x01000010, 0x20003db0, 0x02000446, 0x00020002 },
+   { 0x00800040, 0x24003e2c, 0x01e983e0, 0x00100010 },
+   { 0x00800048, 0x24003e2c, 0x01e98000, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01e98020, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01e993e0, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01e99000, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01e99020, 0x00010001 },
+   { 0x00000001, 0x220e0168, 0x00000000, 0x05400540 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000002 },
+   { 0x00110001, 0x220e0168, 0x02000000, 0x04c004c0 },
+   { 0x8080000c, 0xdc003d91, 0x00b10400, 0x00050005 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000000e },
+   { 0x00400001, 0x22000128, 0x0069044c, 0x00000000 },
+   { 0x01400010, 0x20003dac, 0x00000444, 0x00030003 },
+   { 0x01400010, 0x20003dac, 0x02000446, 0x00030003 },
+   { 0x00410040, 0x22002d08, 0x00690200, 0x00010001 },
+   { 0x00410040, 0x22002d08, 0x02690200, 0x00100010 },
+   { 0x00800001, 0x25400229, 0x01e98000, 0x00000000 },
+   { 0x80800042, 0x45404631, 0x00d20540, 0x00d204c0 },
+   { 0x00000001, 0x220c0128, 0x00000454, 0x00000000 },
+   { 0x00400001, 0xd8000231, 0x008a0540, 0x00000000 },
+   { 0x00400001, 0xd8100231, 0x008a0548, 0x00000000 },
+   { 0x00400001, 0xd8200231, 0x008a0550, 0x00000000 },
+   { 0x00400001, 0xd8300231, 0x008a0558, 0x00000000 },
+   { 0x00600001, 0x220001ac, 0x008d0024, 0x00000000 },
+   { 0x00200040, 0x244c3dad, 0x00450448, 0xfff8fff8 },
+   { 0x00000401, 0x220001ec, 0x00000000, 0x07000700 },
+   { 0x00000801, 0x220e0128, 0x00000456, 0x00000000 },
+   { 0x00000441, 0x254635ad, 0x00000448, 0x0000044a },
+   { 0x00000c41, 0x254025ad, 0x0000444c, 0x0000444e },
+   { 0x00000c41, 0x254225ad, 0x00000448, 0x0000444e },
+   { 0x00000841, 0x254435ad, 0x0000444c, 0x0000044a },
+   { 0x00600041, 0x24002628, 0x00898000, 0x00000540 },
+   { 0x00600048, 0x24002628, 0x00898002, 0x00000542 },
+   { 0x00600048, 0x24002628, 0x00898008, 0x00000544 },
+   { 0x00600048, 0x2540262d, 0x0089800a, 0x00000546 },
+   { 0x00400401, 0xbc0001a9, 0x00690540, 0x00000000 },
+   { 0x00400801, 0xbc1001a9, 0x00690548, 0x00000000 },
+   { 0x01000010, 0x20003d2c, 0x00000022, 0x00030003 },
+   { 0x01000040, 0x20223d29, 0x02000022, 0xffffffff },
+   { 0x00000040, 0x220c3d8c, 0x0000020c, 0x00080008 },
+   { 0x00110040, 0x24540c21, 0x00000454, 0x00080008 },
+   { 0x00010040, 0x24540c21, 0x00000454, 0x00180038 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0xfffffee6 },
+   { 0x01800010, 0x20003dac, 0x0200010e, 0x00010001 },
+   { 0x00000040, 0x220c3d8c, 0x0000020c, 0xffe0ffe0 },
+   { 0x00010001, 0x24540061, 0x02000000, 0x06c00640 },
+   { 0x00110001, 0x24540061, 0x02000000, 0x04800780 },
+   { 0x00000001, 0x220e0128, 0x00000456, 0x00000000 },
+   { 0x00800040, 0x24003dac, 0x00b19c00, 0x00200020 },
+   { 0x00800040, 0x24203dac, 0x00b19c20, 0x00200020 },
+   { 0x8080000c, 0xdc003d91, 0x00b10400, 0x00060006 },
+   { 0x8080000c, 0xdc203d91, 0x00b10420, 0x00060006 },
+   { 0x02000040, 0x210e3d29, 0x0200010e, 0xffffffff },
+   { 0x00000040, 0x220c3d8c, 0x0000020c, 0x00040004 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0xfffffcf8 },
+   { 0x01000005, 0x25643e2d, 0x00000060, 0x00c000c0 },
+   { 0x01000010, 0x20003dac, 0x02000458, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000016 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000000c },
+   { 0x01000010, 0x20003dac, 0x00000458, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000086 },
+   { 0x00a02001, 0x46400231, 0x00d20780, 0x00000000 },
+   { 0x00a02001, 0x46800231, 0x00d207c0, 0x00000000 },
+   { 0x00a02001, 0x46c00231, 0x00d20480, 0x00000000 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000007e },
+   { 0x80a02042, 0x46404631, 0x00d20640, 0x00d20780 },
+   { 0x80a02042, 0x46804631, 0x00d20680, 0x00d207c0 },
+   { 0x80a02042, 0x46c04631, 0x00d206c0, 0x00d20480 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000076 },
+   { 0x01000010, 0x20003dac, 0x02000564, 0x00800080 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x0000000e },
+   { 0x01000010, 0x20003dac, 0x00000458, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0xffffffe4 },
+   { 0x00200401, 0x256801ed, 0x00000000, 0x00200020 },
+   { 0x00200801, 0x256c01ed, 0x00000000, 0x00060006 },
+   { 0x00400001, 0x258001ed, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x45a001ad, 0x00058c00, 0x00000000 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000028 },
+   { 0x02600005, 0x20006e28, 0x02008c0c, 0x88848421 },
+   { 0x0220000c, 0x25663dad, 0x00000458, 0x00010001 },
+   { 0x0000000c, 0x25643dad, 0x00000564, 0x00060006 },
+   { 0x00310001, 0x25c401ad, 0x00000458, 0x00000000 },
+   { 0x00210001, 0x25c00061, 0x00000000, 0x00010001 },
+   { 0x00310040, 0x25c03dad, 0x004545c4, 0x00010001 },
+   { 0x00610001, 0x25a00061, 0x02000000, 0x00000080 },
+   { 0x00710001, 0x45a002ad, 0x02ae8c00, 0x00000000 },
+   { 0x00710001, 0x45a202ad, 0x02ae8c01, 0x00000000 },
+   { 0x00800041, 0x25a035ad, 0x00b105a0, 0x000905c0 },
+   { 0x01200010, 0x20003e2c, 0x0245006e, 0x00000000 },
+   { 0x00310009, 0x244c45ad, 0x02000564, 0x0045006e },
+   { 0x00210001, 0x244c01ed, 0x02000000, 0x00000000 },
+   { 0x0031000c, 0x244c3dad, 0x0245044c, 0x00010001 },
+   { 0x00200009, 0x256835ad, 0x0045044c, 0x00000566 },
+   { 0x00210040, 0x256835ad, 0x02450568, 0x00000566 },
+   { 0x00200040, 0x256c362d, 0x0045006e, 0x00000566 },
+   { 0x00400040, 0x240035ac, 0x00ab05a2, 0x00ab05a6 },
+   { 0x00400040, 0x2400358c, 0x00690400, 0x00000566 },
+   { 0x0040000c, 0x2580358d, 0x00690400, 0x00000566 },
+   { 0x00800041, 0x2400362c, 0x00d20640, 0x000005a0 },
+   { 0x00800041, 0x2420362c, 0x00d20660, 0x000005a0 },
+   { 0x00800048, 0x2400362c, 0x00d20780, 0x000005a4 },
+   { 0x00800048, 0x2420362c, 0x00d207a0, 0x000005a4 },
+   { 0x00800040, 0x2400358c, 0x00b10400, 0x00000568 },
+   { 0x00800040, 0x2420358c, 0x00b10420, 0x00000568 },
+   { 0x0080000c, 0x24c0358d, 0x00b10400, 0x0000056c },
+   { 0x0080000c, 0x24e0358d, 0x00b10420, 0x0000056c },
+   { 0x80800040, 0x464035b1, 0x00b104c0, 0x00000580 },
+   { 0x80800040, 0x466035b1, 0x00b104e0, 0x00000580 },
+   { 0x00800041, 0x2400362c, 0x00d20680, 0x000005a0 },
+   { 0x00800041, 0x2420362c, 0x00d206a0, 0x000005a0 },
+   { 0x00800048, 0x2400362c, 0x00d207c0, 0x000005a4 },
+   { 0x00800048, 0x2420362c, 0x00d207e0, 0x000005a4 },
+   { 0x00800040, 0x2400358c, 0x00b10400, 0x00000568 },
+   { 0x00800040, 0x2420358c, 0x00b10420, 0x00000568 },
+   { 0x0080000c, 0x24c0358d, 0x00b10400, 0x0000056c },
+   { 0x0080000c, 0x24e0358d, 0x00b10420, 0x0000056c },
+   { 0x80800040, 0x468035b1, 0x00b104c0, 0x00000580 },
+   { 0x80800040, 0x46a035b1, 0x00b104e0, 0x00000580 },
+   { 0x00800041, 0x2400362c, 0x00d206c0, 0x000705a8 },
+   { 0x00800041, 0x2420362c, 0x00d206e0, 0x000705a8 },
+   { 0x00800048, 0x2400362c, 0x00d20480, 0x000705ac },
+   { 0x00800048, 0x2420362c, 0x00d204a0, 0x000705ac },
+   { 0x00800040, 0x2400358c, 0x00b10400, 0x0000056a },
+   { 0x00800040, 0x2420358c, 0x00b10420, 0x0000056a },
+   { 0x0080000c, 0x24c0358d, 0x00b10400, 0x0000056e },
+   { 0x0080000c, 0x24e0358d, 0x00b10420, 0x0000056e },
+   { 0x80800040, 0x46c035b1, 0x00b104c0, 0x00050582 },
+   { 0x80800040, 0x46e035b1, 0x00b104e0, 0x00050582 },
+   { 0x01800005, 0x20003d2c, 0x02000020, 0x00020002 },
+   { 0x80800040, 0xd00045b1, 0x00b19000, 0x00d20640 },
+   { 0x80800040, 0xd02045b1, 0x00b19020, 0x00d20660 },
+   { 0x80800040, 0xd04045b1, 0x00b19040, 0x00d20680 },
+   { 0x80800040, 0xd06045b1, 0x00b19060, 0x00d206a0 },
+   { 0x00000040, 0x22083d8c, 0x00000208, 0x00800080 },
+   { 0x80400040, 0xd40045b1, 0x00699400, 0x00ab06c0 },
+   { 0x80400040, 0xd48045b1, 0x00699480, 0x00ab06c2 },
+   { 0x80400040, 0xd42045b1, 0x00699420, 0x00ab06e0 },
+   { 0x80400040, 0xd4a045b1, 0x006994a0, 0x00ab06e2 },
+   { 0x80400040, 0xd41045b1, 0x00699410, 0x00ab06d0 },
+   { 0x80400040, 0xd49045b1, 0x00699490, 0x00ab06d2 },
+   { 0x80400040, 0xd43045b1, 0x00699430, 0x00ab06f0 },
+   { 0x80400040, 0xd4b045b1, 0x006994b0, 0x00ab06f2 },
+   { 0x00000040, 0x220a3d8c, 0x0000020a, 0x00080008 },
+   { 0x00110040, 0x220a3d8c, 0x0200020a, 0x00300030 },
+   { 0x01000010, 0x20003d2c, 0x02000020, 0x00060006 },
+   { 0x00000040, 0x20203d29, 0x00000020, 0x00020002 },
+   { 0x00000040, 0x22040c00, 0x00000204, 0x00100001 },
+   { 0x00000040, 0x220c358c, 0x0000020c, 0x0000003a },
+   { 0x00110220, 0x34001c00, 0x02001400, 0xfffffc36 },
+   { 0x00000001, 0x220001ec, 0x00000000, 0x01400140 },
+   { 0x00000001, 0x220201ec, 0x00000000, 0x01c001c0 },
+   { 0x00800401, 0x20400236, 0x01ee8000, 0x00000000 },
+   { 0x00800801, 0x20500236, 0x01ee8010, 0x00000000 },
+   { 0x00800401, 0x20600236, 0x01ee8020, 0x00000000 },
+   { 0x00800801, 0x20700236, 0x01ee8030, 0x00000000 },
+   { 0x00800401, 0x20800236, 0x01ee8040, 0x00000000 },
+   { 0x00800801, 0x20900236, 0x01ee8050, 0x00000000 },
+   { 0x00800401, 0x20a00236, 0x01ee8060, 0x00000000 },
+   { 0x00800801, 0x20b00236, 0x01ee8070, 0x00000000 },
+   { 0x00800401, 0x20c00236, 0x01ee8100, 0x00000000 },
+   { 0x00800801, 0x20d00236, 0x01ee8110, 0x00000000 },
+   { 0x00800401, 0x20e00236, 0x01ee8120, 0x00000000 },
+   { 0x00800801, 0x20f00236, 0x01ee8130, 0x00000000 },
+   { 0x00800401, 0x21000236, 0x01ee8140, 0x00000000 },
+   { 0x00800801, 0x21100236, 0x01ee8150, 0x00000000 },
+   { 0x00800401, 0x21200236, 0x01ee8160, 0x00000000 },
+   { 0x00800801, 0x21300236, 0x01ee8170, 0x00000000 },
+   { 0x00200401, 0x204001a5, 0x00450064, 0x00000000 },
+   { 0x00000801, 0x20480061, 0x00000000, 0x000f000f },
+   { 0x01600031, 0x21401c21, 0x508d0040, 0x1218a000 },
+   { 0x00000001, 0x220001ec, 0x00000000, 0x03400340 },
+   { 0x00800401, 0x41600236, 0x00d28000, 0x00000000 },
+   { 0x00800801, 0x41610236, 0x00d28080, 0x00000000 },
+   { 0x00800401, 0x41800236, 0x00d28020, 0x00000000 },
+   { 0x00800801, 0x41810236, 0x00d280a0, 0x00000000 },
+   { 0x00800401, 0x41a00236, 0x00d28040, 0x00000000 },
+   { 0x00800801, 0x41a10236, 0x00d280c0, 0x00000000 },
+   { 0x00800401, 0x41c00236, 0x00d28060, 0x00000000 },
+   { 0x00800801, 0x41c10236, 0x00d280e0, 0x00000000 },
+   { 0x0000040c, 0x20443ca5, 0x00000044, 0x00010001 },
+   { 0x00000801, 0x20480061, 0x00000000, 0x0007000f },
+   { 0x0a600031, 0x21601c21, 0x508d0040, 0x0a18a001 },
+   { 0x00000001, 0x21400021, 0x00000140, 0x00000000 },
+   { 0x00000001, 0x21600021, 0x00000160, 0x00000000 },
+   { 0x01600031, 0x20001c20, 0x708d0000, 0x82000010 },
+   { 0x00600001, 0x20400021, 0x008d0000, 0x00000000 },
+   { 0x00000005, 0x20203e2d, 0x00000061, 0x001f001f },
+   { 0x00200009, 0x20643e2d, 0x00450064, 0x00040004 },
+   { 0x00000001, 0x203a01ed, 0x00000000, 0x00000000 },
+   { 0x01000010, 0x20003dac, 0x00000020, 0x00160016 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x0000001e },
+   { 0x01200010, 0x20003e2c, 0x0200006c, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000024 },
+   { 0x00800009, 0x25403dad, 0x00050064, 0x00020002 },
+   { 0x00000001, 0x203a01ed, 0x00000000, 0x00180018 },
+   { 0x00200040, 0x45483dad, 0x00660548, 0x00100010 },
+   { 0x00200040, 0x45523dad, 0x00660552, 0x00100010 },
+   { 0x00400040, 0x25583dad, 0x00690558, 0x00100010 },
+   { 0x00800040, 0x208035ad, 0x00b10080, 0x00b10540 },
+   { 0x00600040, 0x45403dad, 0x00ae0540, 0x00200020 },
+   { 0x00800040, 0x20a035ad, 0x00b100a0, 0x00b10540 },
+   { 0x00600040, 0x45423dad, 0x00ae0542, 0x00200020 },
+   { 0x00800040, 0x20e035ad, 0x00b100e0, 0x00b10540 },
+   { 0x00600040, 0x45403dad, 0x00ae0540, 0xffe0ffe0 },
+   { 0x00800040, 0x20c035ad, 0x00b100c0, 0x00b10540 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000014 },
+   { 0x06600010, 0x20003dac, 0x02000020, 0x00030003 },
+   { 0x00000001, 0x206c0171, 0x00000000, 0x00000000 },
+   { 0x00610001, 0x208000a5, 0x02050080, 0x00000000 },
+   { 0x00610001, 0x212000a5, 0x02090100, 0x00000000 },
+   { 0x00410001, 0x211000a5, 0x02690100, 0x00000000 },
+   { 0x00200009, 0x25403dad, 0x00450064, 0x00020002 },
+   { 0x00800040, 0x208035ad, 0x00b10080, 0x00050540 },
+   { 0x00200040, 0x40883dad, 0x00660088, 0x00200020 },
+   { 0x00200040, 0x40923dad, 0x00660092, 0x00200020 },
+   { 0x00400040, 0x20983dad, 0x00690098, 0x00200020 },
+   { 0x00200401, 0x22080060, 0x00000000, 0x03400140 },
+   { 0x00000c01, 0x220c0060, 0x00000000, 0x04400080 },
+   { 0x00000801, 0x22040060, 0x00000000, 0x01000070 },
+   { 0x00000005, 0x203e2e29, 0x00000063, 0x00010001 },
+   { 0x00000001, 0x20200169, 0x00000000, 0x00000000 },
+   { 0x0000000c, 0x2458262d, 0x0000006d, 0x00000020 },
+   { 0x00000001, 0x210e0169, 0x00000000, 0x00010001 },
+   { 0x00000005, 0x24583dad, 0x00000458, 0x00030003 },
+   { 0x01000010, 0x200035ac, 0x0000010e, 0x00000458 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000316 },
+   { 0x01000010, 0x20003dac, 0x0200010e, 0x00010001 },
+   { 0x00010401, 0x24540061, 0x02000000, 0x06c00640 },
+   { 0x00010805, 0x245a3e2d, 0x02008800, 0x007f007f },
+   { 0x00110401, 0x24540061, 0x02000000, 0x04800780 },
+   { 0x00110805, 0x245a3e2d, 0x02008804, 0x007f007f },
+   { 0x00010005, 0x245c3e2d, 0x02008800, 0x00800080 },
+   { 0x00110005, 0x245c3e2d, 0x02008804, 0x00800080 },
+   { 0x00000009, 0x245c3dad, 0x0000045c, 0x00010001 },
+   { 0x0000000c, 0x211e362d, 0x0000006c, 0x00000020 },
+   { 0x00000040, 0x240035ac, 0x0000003e, 0x0000045c },
+   { 0x01000010, 0x20003d8c, 0x00210400, 0x00010001 },
+   { 0x01000010, 0x20003d8c, 0x02210400, 0x01000100 },
+   { 0x00000001, 0x203c01ed, 0x00000000, 0x00000000 },
+   { 0x00010001, 0x203c01ed, 0x00000000, 0x00020002 },
+   { 0x00010001, 0x203c01ed, 0x02000000, 0xfffefffe },
+   { 0x00000040, 0x20780d21, 0x0000045a, 0x0208e602 },
+   { 0x00000040, 0x20782421, 0x00000078, 0x0000045c },
+   { 0x02000005, 0x20003dac, 0x0200011e, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000001c0 },
+   { 0x01000005, 0x20003dac, 0x00009800, 0x00030003 },
+   { 0x00200005, 0x24443dad, 0x00459800, 0x00030003 },
+   { 0x0020000c, 0x24403dad, 0x00459800, 0x00020002 },
+   { 0x01600006, 0x20003dac, 0x02000446, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000012 },
+   { 0x00010040, 0x22000c20, 0x02000078, 0x00400000 },
+   { 0x00110040, 0x22000c20, 0x02000078, 0x00700000 },
+   { 0x00310040, 0x20401da5, 0x02450440, 0xfffffffe },
+   { 0x00110001, 0x20480061, 0x02000000, 0x000c000c },
+   { 0x00010040, 0x20401da5, 0x02000440, 0xfffffffe },
+   { 0x00010001, 0x204401a5, 0x02000442, 0x00000000 },
+   { 0x00010001, 0x20480061, 0x02000000, 0x0007000c },
+   { 0x01600031, 0x25600021, 0x408d0040, 0x00000200 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00010040, 0x22000c20, 0x02000078, 0x00200000 },
+   { 0x00110040, 0x22000c20, 0x02000078, 0x00400000 },
+   { 0x00200001, 0x204001a5, 0x00450440, 0x00000000 },
+   { 0x00110040, 0x20441da5, 0x02000442, 0xfffffffe },
+   { 0x00110001, 0x20480061, 0x02000000, 0x000c0007 },
+   { 0x00010001, 0x20480061, 0x02000000, 0x00070007 },
+   { 0x01600031, 0x25600021, 0x408d0040, 0x00000200 },
+   { 0x00000040, 0xb80235ad, 0x00009802, 0x0000003c },
+   { 0x0020040c, 0x24403dad, 0x00459800, 0x00030003 },
+   { 0x00200805, 0x24483dad, 0x00459800, 0x00070007 },
+   { 0x01600006, 0x20003dac, 0x0000044a, 0x00000000 },
+   { 0x00200001, 0x204001a5, 0x00450440, 0x00000000 },
+   { 0x00000009, 0x20403ca5, 0x00000040, 0x00010001 },
+   { 0x00010040, 0x22000c20, 0x00000078, 0x00200010 },
+   { 0x00110040, 0x22000c20, 0x00000078, 0x00300010 },
+   { 0x00010001, 0x20480061, 0x00000000, 0x00030009 },
+   { 0x00110001, 0x20480061, 0x00000000, 0x00040009 },
+   { 0x02600031, 0x27000021, 0x408d0040, 0x00000200 },
+   { 0x01000005, 0x20003dac, 0x00009800, 0x00030003 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x0000001a },
+   { 0x00600001, 0x26220231, 0x008d05c0, 0x00000000 },
+   { 0x00600401, 0x26120231, 0x008d05b8, 0x00000000 },
+   { 0x00600801, 0x26020231, 0x008d05b0, 0x00000000 },
+   { 0x00600401, 0x25f20231, 0x008d05a8, 0x00000000 },
+   { 0x00600801, 0x25e20231, 0x008d05a0, 0x00000000 },
+   { 0x00600401, 0x25d20231, 0x008d0598, 0x00000000 },
+   { 0x00600801, 0x25c20231, 0x008d0590, 0x00000000 },
+   { 0x00600401, 0x25b20231, 0x008d0588, 0x00000000 },
+   { 0x00600801, 0x25a20231, 0x008d0580, 0x00000000 },
+   { 0x00600401, 0x25920231, 0x008d0578, 0x00000000 },
+   { 0x00600801, 0x25820231, 0x008d0570, 0x00000000 },
+   { 0x00600001, 0x25720231, 0x008d0568, 0x00000000 },
+   { 0x00600001, 0x25620231, 0x008d0560, 0x00000000 },
+   { 0x00110001, 0x220001ec, 0x02000000, 0x05820582 },
+   { 0x00010001, 0x220001ec, 0x02000000, 0x05620562 },
+   { 0x00000001, 0x220e0128, 0x00000454, 0x00000000 },
+   { 0x01800006, 0x200035ac, 0x02000446, 0x00000444 },
+   { 0x00810001, 0xbc000229, 0x02ad8000, 0x00000000 },
+   { 0x00810001, 0xbc200229, 0x02ad8020, 0x00000000 },
+   { 0x00810001, 0xbc400229, 0x02ad8040, 0x00000000 },
+   { 0x00810001, 0xbc600229, 0x02ad8060, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000012a },
+   { 0x00600001, 0x2024018d, 0x008d0200, 0x00000000 },
+   { 0x01000041, 0x245e35ad, 0x00000446, 0x00000444 },
+   { 0x00000040, 0x22022d08, 0x00000200, 0x00100010 },
+   { 0x02000005, 0x20003db0, 0x0200045e, 0x00010001 },
+   { 0x00200040, 0x22042d08, 0x00450200, 0x00200020 },
+   { 0x00400001, 0x244c0109, 0x00690200, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000066 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000064 },
+   { 0x00000040, 0x22003d08, 0x00000200, 0xffdeffde },
+   { 0x00000440, 0x22023d08, 0x00000202, 0xffeeffee },
+   { 0x00000801, 0x220c0060, 0x00000000, 0x04a00480 },
+   { 0x01000010, 0x20003db0, 0x0000045e, 0x00040004 },
+   { 0x00a02040, 0x2400462c, 0x00ad8000, 0x00ad8005 },
+   { 0x00a02048, 0x24003e2c, 0x00ad8001, 0xfffbfffb },
+   { 0x00a02048, 0x24003e2c, 0x00ad8002, 0x00140014 },
+   { 0x00a02048, 0x24003e2c, 0x00ad8003, 0x00140014 },
+   { 0x00a02048, 0xb8003e2d, 0x00ad8004, 0xfffbfffb },
+   { 0x00a02040, 0x2400462c, 0x00ad8040, 0x00ad8045 },
+   { 0x00a02048, 0x24003e2c, 0x00ad8041, 0xfffbfffb },
+   { 0x00a02048, 0x24003e2c, 0x00ad8042, 0x00140014 },
+   { 0x00a02048, 0x24003e2c, 0x00ad8043, 0x00140014 },
+   { 0x00a02048, 0xb8403e2d, 0x00ad8044, 0xfffbfffb },
+   { 0x00a02040, 0x2400462c, 0x00ad8080, 0x00ad8085 },
+   { 0x00a02048, 0x24003e2c, 0x00ad8081, 0xfffbfffb },
+   { 0x00a02048, 0x24003e2c, 0x00ad8082, 0x00140014 },
+   { 0x00a02048, 0x24003e2c, 0x00ad8083, 0x00140014 },
+   { 0x00a02048, 0xb8803e2d, 0x00ad8084, 0xfffbfffb },
+   { 0x00600040, 0x2400462c, 0x008d80c0, 0x008d80c5 },
+   { 0x00600048, 0x24003e2c, 0x008d80c1, 0xfffbfffb },
+   { 0x00600048, 0x24003e2c, 0x008d80c2, 0x00140014 },
+   { 0x00600048, 0x24003e2c, 0x008d80c3, 0x00140014 },
+   { 0x00600048, 0xb8c03e2d, 0x008d80c4, 0xfffbfffb },
+   { 0x00000401, 0x22000060, 0x00000000, 0x04a00480 },
+   { 0x00000c01, 0x22040060, 0x00000000, 0x04a00490 },
+   { 0x00010802, 0x220c2d28, 0x00000454, 0x04800480 },
+   { 0x00a02040, 0x24003dac, 0x00b18000, 0x02000200 },
+   { 0x00800048, 0x24003dac, 0x01ed8800, 0xfffbfffb },
+   { 0x00800048, 0x24203dac, 0x01ed8820, 0xfffbfffb },
+   { 0x00a02048, 0x24003dac, 0x00b18020, 0x00140014 },
+   { 0x00800048, 0x24003dac, 0x01ed8820, 0x00140014 },
+   { 0x00800048, 0x24203dac, 0x01ed8840, 0x00140014 },
+   { 0x00a02048, 0x24003dac, 0x00b18040, 0xfffbfffb },
+   { 0x00800048, 0x24003dac, 0x01ed8840, 0x00010001 },
+   { 0x00800048, 0x24203dac, 0x01ed8860, 0x00010001 },
+   { 0x8080000c, 0xd8003d91, 0x00b10400, 0x000a000a },
+   { 0x8080100c, 0xd8203d91, 0x00b10420, 0x000a000a },
+   { 0x00a02040, 0x24003dac, 0x00b18040, 0x02000200 },
+   { 0x00800048, 0x24003dac, 0x01ed8840, 0xfffbfffb },
+   { 0x00800048, 0x24203dac, 0x01ed8860, 0xfffbfffb },
+   { 0x00a02048, 0x24003dac, 0x00b18060, 0x00140014 },
+   { 0x00800048, 0x24003dac, 0x01ed8860, 0x00140014 },
+   { 0x00800048, 0x24203dac, 0x01ed8880, 0x00140014 },
+   { 0x00a02048, 0x24003dac, 0x00b18080, 0xfffbfffb },
+   { 0x00800048, 0x24003dac, 0x01ed8880, 0x00010001 },
+   { 0x00800048, 0x24203dac, 0x01ed88a0, 0x00010001 },
+   { 0x8080000c, 0xd8403d91, 0x00b10400, 0x000a000a },
+   { 0x8080100c, 0xd8603d91, 0x00b10420, 0x000a000a },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x000000b4 },
+   { 0x01000010, 0x20003db0, 0x00000444, 0x00000000 },
+   { 0x01000010, 0x20003db0, 0x02000446, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x0000004c },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000004a },
+   { 0x00400040, 0x22003d28, 0x0069044c, 0xfffefffe },
+   { 0x03400010, 0x20003dac, 0x00000446, 0x00020002 },
+   { 0x01000010, 0x20003db0, 0x02000444, 0x00020002 },
+   { 0x00410040, 0x22002d08, 0x00690200, 0x00100010 },
+   { 0x01000010, 0x20003dac, 0x00000446, 0x00000000 },
+   { 0x00010002, 0x220c2d28, 0x02000454, 0x04800480 },
+   { 0x00800040, 0x24003e2c, 0x01ed8000, 0x00100010 },
+   { 0x00800040, 0x24203e2c, 0x01ed8020, 0x00100010 },
+   { 0x00800048, 0x24003e2c, 0x01ed8001, 0xfffbfffb },
+   { 0x00800048, 0x24203e2c, 0x01ed8021, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01ed8002, 0x00140014 },
+   { 0x00800048, 0x24203e2c, 0x01ed8022, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01ed8003, 0x00140014 },
+   { 0x00800048, 0x24203e2c, 0x01ed8023, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01ed8004, 0xfffbfffb },
+   { 0x00800048, 0x24203e2c, 0x01ed8024, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01ed8005, 0x00010001 },
+   { 0x00800048, 0x24203e2c, 0x01ed8025, 0x00010001 },
+   { 0x8080000c, 0xd8003d91, 0x00b10400, 0x00050005 },
+   { 0x8080100c, 0xd8203d91, 0x00b10420, 0x00050005 },
+   { 0x00800040, 0x24003e2c, 0x01ed8040, 0x00100010 },
+   { 0x00800040, 0x24203e2c, 0x01ed8060, 0x00100010 },
+   { 0x00800048, 0x24003e2c, 0x01ed8041, 0xfffbfffb },
+   { 0x00800048, 0x24203e2c, 0x01ed8061, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01ed8042, 0x00140014 },
+   { 0x00800048, 0x24203e2c, 0x01ed8062, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01ed8043, 0x00140014 },
+   { 0x00800048, 0x24203e2c, 0x01ed8063, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01ed8044, 0xfffbfffb },
+   { 0x00800048, 0x24203e2c, 0x01ed8064, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01ed8045, 0x00010001 },
+   { 0x00800048, 0x24203e2c, 0x01ed8065, 0x00010001 },
+   { 0x8080000c, 0xd8403d91, 0x00b10400, 0x00050005 },
+   { 0x8080100c, 0xd8603d91, 0x00b10420, 0x00050005 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000004 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x0000005c },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000062 },
+   { 0x01000010, 0x20003db0, 0x00000446, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000042 },
+   { 0x00200401, 0x22000128, 0x0066044c, 0x00000000 },
+   { 0x00200c01, 0x22040128, 0x0045044e, 0x00000000 },
+   { 0x00000801, 0x220c0128, 0x00000454, 0x00000000 },
+   { 0x03400010, 0x20003dac, 0x02000444, 0x00020002 },
+   { 0x01000010, 0x20003dac, 0x00000444, 0x00000000 },
+   { 0x00410040, 0x22002d08, 0x02690200, 0x00010001 },
+   { 0x01000010, 0x20003db0, 0x02000446, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000002 },
+   { 0x00110001, 0x220c0168, 0x02000000, 0x04800480 },
+   { 0x00a02040, 0x24003e2c, 0x00ad83e0, 0x00100010 },
+   { 0x00800048, 0x24003e2c, 0x01ed8be0, 0xfffbfffb },
+   { 0x00800048, 0x24203e2c, 0x01ed8800, 0xfffbfffb },
+   { 0x00a02048, 0x24003e2c, 0x00ad8000, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01ed8800, 0x00140014 },
+   { 0x00800048, 0x24203e2c, 0x01ed8820, 0x00140014 },
+   { 0x00a02048, 0x24003e2c, 0x00ad8020, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01ed8820, 0x00010001 },
+   { 0x00800048, 0x24203e2c, 0x01ed8840, 0x00010001 },
+   { 0x8080000c, 0xd8003d91, 0x00b10400, 0x00050005 },
+   { 0x8080100c, 0xd8203d91, 0x00b10420, 0x00050005 },
+   { 0x00a02040, 0x24003e2c, 0x00ad8020, 0x00100010 },
+   { 0x00800048, 0x24003e2c, 0x01ed8820, 0xfffbfffb },
+   { 0x00800048, 0x24203e2c, 0x01ed8840, 0xfffbfffb },
+   { 0x00a02048, 0x24003e2c, 0x00ad8040, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01ed8840, 0x00140014 },
+   { 0x00800048, 0x24203e2c, 0x01ed8860, 0x00140014 },
+   { 0x00a02048, 0x24003e2c, 0x00ad8060, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01ed8860, 0x00010001 },
+   { 0x00800048, 0x24203e2c, 0x01ed8880, 0x00010001 },
+   { 0x8080000c, 0xd8403d91, 0x00b10400, 0x00050005 },
+   { 0x8080100c, 0xd8603d91, 0x00b10420, 0x00050005 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000016 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001c },
+   { 0x00200401, 0x22000128, 0x0045044c, 0x00000000 },
+   { 0x00000801, 0x220c0128, 0x00000454, 0x00000000 },
+   { 0x01200010, 0x20003dac, 0x00000444, 0x00030003 },
+   { 0x01200010, 0x20003dac, 0x02000446, 0x00030003 },
+   { 0x00210040, 0x22002d08, 0x00450200, 0x00010001 },
+   { 0x00210040, 0x22002d08, 0x02450200, 0x00100010 },
+   { 0x00800001, 0xb8000229, 0x01ed8000, 0x00000000 },
+   { 0x00800001, 0xb8200229, 0x01ed8020, 0x00000000 },
+   { 0x00800001, 0xb8400229, 0x01ed8040, 0x00000000 },
+   { 0x00800001, 0xb8600229, 0x01ed8060, 0x00000000 },
+   { 0x80800042, 0xd8004631, 0x00d29800, 0x00d20480 },
+   { 0x80800042, 0xd8204631, 0x00d29820, 0x00d204a0 },
+   { 0x80800042, 0xd8404631, 0x00d29840, 0x00d204c0 },
+   { 0x80800042, 0xd8604631, 0x00d29860, 0x00d204e0 },
+   { 0x00600001, 0x220001ac, 0x008d0024, 0x00000000 },
+   { 0x00200040, 0x244c3dad, 0x00450448, 0xfff8fff8 },
+   { 0x00000401, 0x220001ec, 0x00000000, 0x07000700 },
+   { 0x00000c01, 0x220201e8, 0x00000000, 0x07100710 },
+   { 0x00000801, 0x220e0128, 0x00000456, 0x00000000 },
+   { 0x00000441, 0x254635ad, 0x00000448, 0x0000044a },
+   { 0x00000c41, 0x254025ad, 0x0000444c, 0x0000444e },
+   { 0x00000c41, 0x254225ad, 0x00000448, 0x0000444e },
+   { 0x00000841, 0x254435ad, 0x0000444c, 0x0000044a },
+   { 0x00200040, 0x244c3d09, 0x00450200, 0x00100010 },
+   { 0x00800041, 0x24002628, 0x00ad8000, 0x00000540 },
+   { 0x00800041, 0x24202628, 0x00ad8020, 0x00000540 },
+   { 0x00800048, 0x24002628, 0x00ad8002, 0x00000542 },
+   { 0x00800048, 0x24202628, 0x00ad8022, 0x00000542 },
+   { 0x00200001, 0x22000128, 0x0045044c, 0x00000000 },
+   { 0x00800048, 0x24002628, 0x01ed8000, 0x00000544 },
+   { 0x00800048, 0x24202628, 0x01ed8020, 0x00000544 },
+   { 0x00800048, 0xbc002629, 0x01ed8002, 0x00000546 },
+   { 0x00801048, 0xbc202629, 0x01ed8022, 0x00000546 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000128 },
+   { 0x00000001, 0x20220169, 0x00000000, 0x00040004 },
+   { 0x00200005, 0x24443dad, 0x00459800, 0x00030003 },
+   { 0x0020000c, 0x24403dad, 0x00459800, 0x00020002 },
+   { 0x01600006, 0x20003dac, 0x02000446, 0x00000000 },
+   { 0x00010040, 0x22000c20, 0x02000078, 0x00200000 },
+   { 0x00110040, 0x22000c20, 0x02000078, 0x00500000 },
+   { 0x00310040, 0x20401da5, 0x02450440, 0xfffffffe },
+   { 0x00110001, 0x20480061, 0x02000000, 0x00080008 },
+   { 0x00010040, 0x20401da5, 0x02000440, 0xfffffffe },
+   { 0x00010001, 0x204401a5, 0x02000442, 0x00000000 },
+   { 0x00010001, 0x20480061, 0x02000000, 0x00030008 },
+   { 0x01600031, 0x25600021, 0x408d0040, 0x00000200 },
+   { 0x00000040, 0xb80235ad, 0x00009802, 0x0000003c },
+   { 0x00000040, 0x22000c20, 0x00000078, 0x00100010 },
+   { 0x0020040c, 0x24403dad, 0x00459800, 0x00030003 },
+   { 0x00200805, 0x24483dad, 0x00459800, 0x00070007 },
+   { 0x00200001, 0x204001a5, 0x00450440, 0x00000000 },
+   { 0x00000009, 0x20403ca5, 0x00000040, 0x00010001 },
+   { 0x00000001, 0x20480061, 0x00000000, 0x00020005 },
+   { 0x02600031, 0x27000021, 0x408d0040, 0x00000200 },
+   { 0x00110001, 0x220001ec, 0x02000000, 0x05820582 },
+   { 0x00010001, 0x220001ec, 0x02000000, 0x05620562 },
+   { 0x00000001, 0x220e0128, 0x00000454, 0x00000000 },
+   { 0x01800006, 0x200035ac, 0x02000446, 0x00000444 },
+   { 0x00410001, 0xbc000229, 0x02698000, 0x00000000 },
+   { 0x00410001, 0xbc100229, 0x02698010, 0x00000000 },
+   { 0x00410001, 0xbc200229, 0x02698020, 0x00000000 },
+   { 0x00410001, 0xbc300229, 0x02698030, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000be },
+   { 0x00600001, 0x2024018d, 0x008d0200, 0x00000000 },
+   { 0x01000041, 0x245e35ad, 0x00000446, 0x00000444 },
+   { 0x02000005, 0x20003db0, 0x0200045e, 0x00010001 },
+   { 0x00000040, 0x22022d08, 0x00000200, 0x00100010 },
+   { 0x00200040, 0x22042d08, 0x00450200, 0x00200020 },
+   { 0x00400001, 0x244c0109, 0x00690200, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000044 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000042 },
+   { 0x00000040, 0x22003d08, 0x00000200, 0xffdeffde },
+   { 0x00000440, 0x22023d08, 0x00000202, 0xffeeffee },
+   { 0x00000801, 0x220c0060, 0x00000000, 0x04d004c0 },
+   { 0x01000010, 0x20003db0, 0x0000045e, 0x00040004 },
+   { 0x00802040, 0x2400462c, 0x00a98000, 0x00a98005 },
+   { 0x00802048, 0x24003e2c, 0x00a98001, 0xfffbfffb },
+   { 0x00802048, 0x24003e2c, 0x00a98002, 0x00140014 },
+   { 0x00802048, 0x24003e2c, 0x00a98003, 0x00140014 },
+   { 0x00802048, 0xb8003e2d, 0x00a98004, 0xfffbfffb },
+   { 0x00802040, 0x2400462c, 0x00a98040, 0x00a98045 },
+   { 0x00802048, 0x24003e2c, 0x00a98041, 0xfffbfffb },
+   { 0x00802048, 0x24003e2c, 0x00a98042, 0x00140014 },
+   { 0x00802048, 0x24003e2c, 0x00a98043, 0x00140014 },
+   { 0x00802048, 0xb8203e2d, 0x00a98044, 0xfffbfffb },
+   { 0x00400040, 0x2400462c, 0x00698080, 0x00698085 },
+   { 0x00400048, 0x24003e2c, 0x00698081, 0xfffbfffb },
+   { 0x00400048, 0x24003e2c, 0x00698082, 0x00140014 },
+   { 0x00400048, 0x24003e2c, 0x00698083, 0x00140014 },
+   { 0x00400048, 0xb8403e2d, 0x00698084, 0xfffbfffb },
+   { 0x00000401, 0x220c0060, 0x00000000, 0x04e004d0 },
+   { 0x00000c01, 0x22000060, 0x00000000, 0x04d004c8 },
+   { 0x00000c01, 0x22040060, 0x00000000, 0x04e004d8 },
+   { 0x00000801, 0x22080060, 0x00000000, 0x04f004e8 },
+   { 0x00800040, 0x24003dac, 0x00b104c0, 0x02000200 },
+   { 0x00800048, 0x24003dac, 0x00b104e0, 0xfffbfffb },
+   { 0x00800048, 0x24003dac, 0x01ed9800, 0x00140014 },
+   { 0x00010001, 0x220c0168, 0x00000000, 0x05400540 },
+   { 0x00110001, 0x220c0168, 0x00000000, 0x04c004c0 },
+   { 0x00800048, 0x24003dac, 0x01e98000, 0xfffbfffb },
+   { 0x00800048, 0x24003dac, 0x01e98020, 0x00010001 },
+   { 0x00800048, 0x24003dac, 0x01e98800, 0x00140014 },
+   { 0x8080000c, 0xd8003d91, 0x00b10400, 0x000a000a },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000060 },
+   { 0x01000010, 0x20003db0, 0x00000444, 0x00000000 },
+   { 0x01000010, 0x20003db0, 0x02000446, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000022 },
+   { 0x00400040, 0x22003d28, 0x0069044c, 0xfffefffe },
+   { 0x03400010, 0x20003dac, 0x00000446, 0x00020002 },
+   { 0x01000010, 0x20003db0, 0x02000444, 0x00020002 },
+   { 0x00410040, 0x22002d08, 0x00690200, 0x00100010 },
+   { 0x01000010, 0x20003dac, 0x00000446, 0x00000000 },
+   { 0x00010001, 0x220e0168, 0x02000000, 0x05400540 },
+   { 0x00110001, 0x220e0168, 0x02000000, 0x04c004c0 },
+   { 0x00800040, 0x24003e2c, 0x01e98000, 0x00100010 },
+   { 0x00800048, 0x24003e2c, 0x01e98001, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01e98002, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01e98003, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01e98004, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01e98005, 0x00010001 },
+   { 0x8080000c, 0xdc003d91, 0x00b10400, 0x00050005 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000004 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000036 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000036 },
+   { 0x01000010, 0x20003db0, 0x00000446, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x03600010, 0x20003dac, 0x02000444, 0x00020002 },
+   { 0x00400401, 0x22000128, 0x0069044c, 0x00000000 },
+   { 0x00400840, 0x22083d2c, 0x0069044c, 0x00100010 },
+   { 0x00610040, 0x22002d08, 0x02690200, 0x00010001 },
+   { 0x01000010, 0x20003dac, 0x00000444, 0x00000000 },
+   { 0x01000010, 0x20003db0, 0x02000446, 0x00020002 },
+   { 0x00800040, 0x24003e2c, 0x01e983e0, 0x00100010 },
+   { 0x00800048, 0x24003e2c, 0x01e98000, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01e98020, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01e993e0, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01e99000, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01e99020, 0x00010001 },
+   { 0x00000001, 0x220e0168, 0x00000000, 0x05400540 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000002 },
+   { 0x00110001, 0x220e0168, 0x02000000, 0x04c004c0 },
+   { 0x8080000c, 0xdc003d91, 0x00b10400, 0x00050005 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000000e },
+   { 0x00400001, 0x22000128, 0x0069044c, 0x00000000 },
+   { 0x01400010, 0x20003dac, 0x00000444, 0x00030003 },
+   { 0x01400010, 0x20003dac, 0x02000446, 0x00030003 },
+   { 0x00410040, 0x22002d08, 0x00690200, 0x00010001 },
+   { 0x00410040, 0x22002d08, 0x02690200, 0x00100010 },
+   { 0x00800001, 0x25400229, 0x01e98000, 0x00000000 },
+   { 0x80800042, 0x45404631, 0x00d20540, 0x00d204c0 },
+   { 0x00000001, 0x220c0128, 0x00000454, 0x00000000 },
+   { 0x00400001, 0xd8000231, 0x008a0540, 0x00000000 },
+   { 0x00400001, 0xd8100231, 0x008a0548, 0x00000000 },
+   { 0x00400001, 0xd8200231, 0x008a0550, 0x00000000 },
+   { 0x00400001, 0xd8300231, 0x008a0558, 0x00000000 },
+   { 0x00600001, 0x220001ac, 0x008d0024, 0x00000000 },
+   { 0x00200040, 0x244c3dad, 0x00450448, 0xfff8fff8 },
+   { 0x00000401, 0x220001ec, 0x00000000, 0x07000700 },
+   { 0x00000801, 0x220e0128, 0x00000456, 0x00000000 },
+   { 0x00000441, 0x254635ad, 0x00000448, 0x0000044a },
+   { 0x00000c41, 0x254025ad, 0x0000444c, 0x0000444e },
+   { 0x00000c41, 0x254225ad, 0x00000448, 0x0000444e },
+   { 0x00000841, 0x254435ad, 0x0000444c, 0x0000044a },
+   { 0x00600041, 0x24002628, 0x00898000, 0x00000540 },
+   { 0x00600048, 0x24002628, 0x00898002, 0x00000542 },
+   { 0x00600048, 0x24002628, 0x00898008, 0x00000544 },
+   { 0x00600048, 0x2540262d, 0x0089800a, 0x00000546 },
+   { 0x00400401, 0xbc0001a9, 0x00690540, 0x00000000 },
+   { 0x00400801, 0xbc1001a9, 0x00690548, 0x00000000 },
+   { 0x01000010, 0x20003d2c, 0x00000022, 0x00030003 },
+   { 0x01000040, 0x20223d29, 0x02000022, 0xffffffff },
+   { 0x00000040, 0x220c3d8c, 0x0000020c, 0x00080008 },
+   { 0x00110040, 0x24540c21, 0x00000454, 0x00080008 },
+   { 0x00010040, 0x24540c21, 0x00000454, 0x00180038 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0xfffffee4 },
+   { 0x01800010, 0x20003dac, 0x0200010e, 0x00010001 },
+   { 0x00000040, 0x220c3d8c, 0x0000020c, 0xffe0ffe0 },
+   { 0x00010001, 0x24540061, 0x02000000, 0x06c00640 },
+   { 0x00110001, 0x24540061, 0x02000000, 0x04800780 },
+   { 0x00000001, 0x220e0128, 0x00000456, 0x00000000 },
+   { 0x00800040, 0x24003dac, 0x00b19c00, 0x00200020 },
+   { 0x00800040, 0x24203dac, 0x00b19c20, 0x00200020 },
+   { 0x8080000c, 0xdc003d91, 0x00b10400, 0x00060006 },
+   { 0x8080000c, 0xdc203d91, 0x00b10420, 0x00060006 },
+   { 0x02000040, 0x210e3d29, 0x0200010e, 0xffffffff },
+   { 0x00000040, 0x220c3d8c, 0x0000020c, 0x00040004 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0xfffffce0 },
+   { 0x01000005, 0x25643e2d, 0x00000060, 0x00c000c0 },
+   { 0x01000010, 0x20003dac, 0x02000458, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000016 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000000c },
+   { 0x01000010, 0x20003dac, 0x00000458, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000086 },
+   { 0x00a02001, 0x46400231, 0x00d20780, 0x00000000 },
+   { 0x00a02001, 0x46800231, 0x00d207c0, 0x00000000 },
+   { 0x00a02001, 0x46c00231, 0x00d20480, 0x00000000 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000007e },
+   { 0x80a02042, 0x46404631, 0x00d20640, 0x00d20780 },
+   { 0x80a02042, 0x46804631, 0x00d20680, 0x00d207c0 },
+   { 0x80a02042, 0x46c04631, 0x00d206c0, 0x00d20480 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000076 },
+   { 0x01000010, 0x20003dac, 0x02000564, 0x00800080 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x0000000e },
+   { 0x01000010, 0x20003dac, 0x00000458, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0xffffffe4 },
+   { 0x00200401, 0x256801ed, 0x00000000, 0x00200020 },
+   { 0x00200801, 0x256c01ed, 0x00000000, 0x00060006 },
+   { 0x00400001, 0x258001ed, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x45a001ad, 0x00058c00, 0x00000000 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000028 },
+   { 0x02600005, 0x20006e28, 0x02008c0c, 0x88848421 },
+   { 0x0220000c, 0x25663dad, 0x00000458, 0x00010001 },
+   { 0x0000000c, 0x25643dad, 0x00000564, 0x00060006 },
+   { 0x00310001, 0x25c401ad, 0x00000458, 0x00000000 },
+   { 0x00210001, 0x25c00061, 0x00000000, 0x00010001 },
+   { 0x00310040, 0x25c03dad, 0x004545c4, 0x00010001 },
+   { 0x00610001, 0x25a00061, 0x02000000, 0x00000080 },
+   { 0x00710001, 0x45a002ad, 0x02ae8c00, 0x00000000 },
+   { 0x00710001, 0x45a202ad, 0x02ae8c01, 0x00000000 },
+   { 0x00800041, 0x25a035ad, 0x00b105a0, 0x000905c0 },
+   { 0x01200010, 0x20003e2c, 0x0245006e, 0x00000000 },
+   { 0x00310009, 0x244c45ad, 0x02000564, 0x0045006e },
+   { 0x00210001, 0x244c01ed, 0x02000000, 0x00000000 },
+   { 0x0031000c, 0x244c3dad, 0x0245044c, 0x00010001 },
+   { 0x00200009, 0x256835ad, 0x0045044c, 0x00000566 },
+   { 0x00210040, 0x256835ad, 0x02450568, 0x00000566 },
+   { 0x00200040, 0x256c362d, 0x0045006e, 0x00000566 },
+   { 0x00400040, 0x240035ac, 0x00ab05a2, 0x00ab05a6 },
+   { 0x00400040, 0x2400358c, 0x00690400, 0x00000566 },
+   { 0x0040000c, 0x2580358d, 0x00690400, 0x00000566 },
+   { 0x00800041, 0x2400362c, 0x00d20640, 0x000005a0 },
+   { 0x00800041, 0x2420362c, 0x00d20660, 0x000005a0 },
+   { 0x00800048, 0x2400362c, 0x00d20780, 0x000005a4 },
+   { 0x00800048, 0x2420362c, 0x00d207a0, 0x000005a4 },
+   { 0x00800040, 0x2400358c, 0x00b10400, 0x00000568 },
+   { 0x00800040, 0x2420358c, 0x00b10420, 0x00000568 },
+   { 0x0080000c, 0x24c0358d, 0x00b10400, 0x0000056c },
+   { 0x0080000c, 0x24e0358d, 0x00b10420, 0x0000056c },
+   { 0x80800040, 0x464035b1, 0x00b104c0, 0x00000580 },
+   { 0x80800040, 0x466035b1, 0x00b104e0, 0x00000580 },
+   { 0x00800041, 0x2400362c, 0x00d20680, 0x000005a0 },
+   { 0x00800041, 0x2420362c, 0x00d206a0, 0x000005a0 },
+   { 0x00800048, 0x2400362c, 0x00d207c0, 0x000005a4 },
+   { 0x00800048, 0x2420362c, 0x00d207e0, 0x000005a4 },
+   { 0x00800040, 0x2400358c, 0x00b10400, 0x00000568 },
+   { 0x00800040, 0x2420358c, 0x00b10420, 0x00000568 },
+   { 0x0080000c, 0x24c0358d, 0x00b10400, 0x0000056c },
+   { 0x0080000c, 0x24e0358d, 0x00b10420, 0x0000056c },
+   { 0x80800040, 0x468035b1, 0x00b104c0, 0x00000580 },
+   { 0x80800040, 0x46a035b1, 0x00b104e0, 0x00000580 },
+   { 0x00800041, 0x2400362c, 0x00d206c0, 0x000705a8 },
+   { 0x00800041, 0x2420362c, 0x00d206e0, 0x000705a8 },
+   { 0x00800048, 0x2400362c, 0x00d20480, 0x000705ac },
+   { 0x00800048, 0x2420362c, 0x00d204a0, 0x000705ac },
+   { 0x00800040, 0x2400358c, 0x00b10400, 0x0000056a },
+   { 0x00800040, 0x2420358c, 0x00b10420, 0x0000056a },
+   { 0x0080000c, 0x24c0358d, 0x00b10400, 0x0000056e },
+   { 0x0080000c, 0x24e0358d, 0x00b10420, 0x0000056e },
+   { 0x80800040, 0x46c035b1, 0x00b104c0, 0x00050582 },
+   { 0x80800040, 0x46e035b1, 0x00b104e0, 0x00050582 },
+   { 0x01800005, 0x20003d2c, 0x02000020, 0x00020002 },
+   { 0x80800040, 0xd00045b1, 0x00b19000, 0x00d20640 },
+   { 0x80800040, 0xd02045b1, 0x00b19020, 0x00d20660 },
+   { 0x80800040, 0xd04045b1, 0x00b19040, 0x00d20680 },
+   { 0x80800040, 0xd06045b1, 0x00b19060, 0x00d206a0 },
+   { 0x00000040, 0x22083d8c, 0x00000208, 0x00800080 },
+   { 0x80400040, 0xd40045b1, 0x00699400, 0x00ab06c0 },
+   { 0x80400040, 0xd48045b1, 0x00699480, 0x00ab06c2 },
+   { 0x80400040, 0xd42045b1, 0x00699420, 0x00ab06e0 },
+   { 0x80400040, 0xd4a045b1, 0x006994a0, 0x00ab06e2 },
+   { 0x80400040, 0xd41045b1, 0x00699410, 0x00ab06d0 },
+   { 0x80400040, 0xd49045b1, 0x00699490, 0x00ab06d2 },
+   { 0x80400040, 0xd43045b1, 0x00699430, 0x00ab06f0 },
+   { 0x80400040, 0xd4b045b1, 0x006994b0, 0x00ab06f2 },
+   { 0x00000040, 0x220a3d8c, 0x0000020a, 0x00080008 },
+   { 0x00110040, 0x220a3d8c, 0x0200020a, 0x00300030 },
+   { 0x01000010, 0x20003d2c, 0x02000020, 0x00060006 },
+   { 0x00000040, 0x20203d29, 0x00000020, 0x00020002 },
+   { 0x00000040, 0x22040c00, 0x00000204, 0x00100001 },
+   { 0x00000040, 0x220c358c, 0x0000020c, 0x0000003a },
+   { 0x00110220, 0x34001c00, 0x02001400, 0xfffffc1e },
+   { 0x00000001, 0x220001ec, 0x00000000, 0x01400140 },
+   { 0x00000001, 0x220201ec, 0x00000000, 0x01c001c0 },
+   { 0x00800401, 0x20400236, 0x01ee8000, 0x00000000 },
+   { 0x00800801, 0x20500236, 0x01ee8010, 0x00000000 },
+   { 0x00800401, 0x20600236, 0x01ee8020, 0x00000000 },
+   { 0x00800801, 0x20700236, 0x01ee8030, 0x00000000 },
+   { 0x00800401, 0x20800236, 0x01ee8040, 0x00000000 },
+   { 0x00800801, 0x20900236, 0x01ee8050, 0x00000000 },
+   { 0x00800401, 0x20a00236, 0x01ee8060, 0x00000000 },
+   { 0x00800801, 0x20b00236, 0x01ee8070, 0x00000000 },
+   { 0x00800401, 0x20c00236, 0x01ee8100, 0x00000000 },
+   { 0x00800801, 0x20d00236, 0x01ee8110, 0x00000000 },
+   { 0x00800401, 0x20e00236, 0x01ee8120, 0x00000000 },
+   { 0x00800801, 0x20f00236, 0x01ee8130, 0x00000000 },
+   { 0x00800401, 0x21000236, 0x01ee8140, 0x00000000 },
+   { 0x00800801, 0x21100236, 0x01ee8150, 0x00000000 },
+   { 0x00800401, 0x21200236, 0x01ee8160, 0x00000000 },
+   { 0x00800801, 0x21300236, 0x01ee8170, 0x00000000 },
+   { 0x00000040, 0x22000d20, 0x00000062, 0x1218a600 },
+   { 0x00200401, 0x204001a5, 0x00450064, 0x00000000 },
+   { 0x00000801, 0x20480061, 0x00000000, 0x000f000f },
+   { 0x01600031, 0x21400021, 0x508d0040, 0x00000200 },
+   { 0x00000001, 0x220001ec, 0x00000000, 0x03400340 },
+   { 0x00800401, 0x41600236, 0x00d28000, 0x00000000 },
+   { 0x00800801, 0x41610236, 0x00d28080, 0x00000000 },
+   { 0x00800401, 0x41800236, 0x00d28020, 0x00000000 },
+   { 0x00800801, 0x41810236, 0x00d280a0, 0x00000000 },
+   { 0x00800401, 0x41a00236, 0x00d28040, 0x00000000 },
+   { 0x00800801, 0x41a10236, 0x00d280c0, 0x00000000 },
+   { 0x00800401, 0x41c00236, 0x00d28060, 0x00000000 },
+   { 0x00800801, 0x41c10236, 0x00d280e0, 0x00000000 },
+   { 0x00000040, 0x22000d20, 0x00000062, 0x0a18a601 },
+   { 0x0000040c, 0x20443ca5, 0x00000044, 0x00010001 },
+   { 0x00000801, 0x20480061, 0x00000000, 0x0007000f },
+   { 0x0a600031, 0x21600021, 0x508d0040, 0x00000200 },
+   { 0x00000001, 0x21400021, 0x00000140, 0x00000000 },
+   { 0x00000001, 0x21600021, 0x00000160, 0x00000000 },
+   { 0x01600031, 0x20001c20, 0x708d0000, 0x82000010 },
+   { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x20400021, 0x008d0000, 0x00000000 },
+   { 0x00000005, 0x20203e2d, 0x00000061, 0x001f001f },
+   { 0x00200009, 0x20643e2d, 0x00450064, 0x00040004 },
+   { 0x00000001, 0x203a01ed, 0x00000000, 0x00000000 },
+   { 0x01000010, 0x20003dac, 0x00000020, 0x00160016 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x0000001e },
+   { 0x01200010, 0x20003e2c, 0x0200006c, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000024 },
+   { 0x00800009, 0x25403dad, 0x00050064, 0x00020002 },
+   { 0x00000001, 0x203a01ed, 0x00000000, 0x00180018 },
+   { 0x00200040, 0x45483dad, 0x00660548, 0x00100010 },
+   { 0x00200040, 0x45523dad, 0x00660552, 0x00100010 },
+   { 0x00400040, 0x25583dad, 0x00690558, 0x00100010 },
+   { 0x00800040, 0x208035ad, 0x00b10080, 0x00b10540 },
+   { 0x00600040, 0x45403dad, 0x00ae0540, 0x00200020 },
+   { 0x00800040, 0x20a035ad, 0x00b100a0, 0x00b10540 },
+   { 0x00600040, 0x45423dad, 0x00ae0542, 0x00200020 },
+   { 0x00800040, 0x20e035ad, 0x00b100e0, 0x00b10540 },
+   { 0x00600040, 0x45403dad, 0x00ae0540, 0xffe0ffe0 },
+   { 0x00800040, 0x20c035ad, 0x00b100c0, 0x00b10540 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000014 },
+   { 0x06600010, 0x20003dac, 0x02000020, 0x00030003 },
+   { 0x00000001, 0x206c0171, 0x00000000, 0x00000000 },
+   { 0x00610001, 0x208000a5, 0x02050080, 0x00000000 },
+   { 0x00610001, 0x212000a5, 0x02090100, 0x00000000 },
+   { 0x00410001, 0x211000a5, 0x02690100, 0x00000000 },
+   { 0x00200009, 0x25403dad, 0x00450064, 0x00020002 },
+   { 0x00800040, 0x208035ad, 0x00b10080, 0x00050540 },
+   { 0x00200040, 0x40883dad, 0x00660088, 0x00200020 },
+   { 0x00200040, 0x40923dad, 0x00660092, 0x00200020 },
+   { 0x00400040, 0x20983dad, 0x00690098, 0x00200020 },
+   { 0x00200401, 0x22080060, 0x00000000, 0x03400140 },
+   { 0x00000c01, 0x220c0060, 0x00000000, 0x04400080 },
+   { 0x00000801, 0x22040060, 0x00000000, 0x01000070 },
+   { 0x02000005, 0x20002e28, 0x00000061, 0x00400040 },
+   { 0x00010005, 0x203e2e29, 0x00000063, 0x00010001 },
+   { 0x00110001, 0x203e0169, 0x00000000, 0x00030003 },
+   { 0x00000001, 0x20200169, 0x00000000, 0x00000000 },
+   { 0x0000000c, 0x2458262d, 0x0000006d, 0x00000020 },
+   { 0x00000001, 0x210e0169, 0x00000000, 0x00010001 },
+   { 0x00000005, 0x24583dad, 0x00000458, 0x00030003 },
+   { 0x01000010, 0x200035ac, 0x0000010e, 0x00000458 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x0000031a },
+   { 0x01000010, 0x20003dac, 0x0200010e, 0x00010001 },
+   { 0x00010401, 0x24540061, 0x02000000, 0x06c00640 },
+   { 0x00010805, 0x245a3e2d, 0x02008800, 0x007f007f },
+   { 0x00110401, 0x24540061, 0x02000000, 0x04800780 },
+   { 0x00110805, 0x245a3e2d, 0x02008804, 0x007f007f },
+   { 0x00010005, 0x245c3e2d, 0x02008800, 0x00800080 },
+   { 0x00110005, 0x245c3e2d, 0x02008804, 0x00800080 },
+   { 0x00000009, 0x245c3dad, 0x0000045c, 0x00010001 },
+   { 0x0000000c, 0x211e362d, 0x0000006c, 0x00000020 },
+   { 0x00000040, 0x240035ac, 0x0000003e, 0x0000045c },
+   { 0x01000010, 0x20003d8c, 0x00210400, 0x00010001 },
+   { 0x01000010, 0x20003d8c, 0x02210400, 0x01000100 },
+   { 0x00000001, 0x203c01ed, 0x00000000, 0x00000000 },
+   { 0x00010001, 0x203c01ed, 0x00000000, 0x00020002 },
+   { 0x00010001, 0x203c01ed, 0x02000000, 0xfffefffe },
+   { 0x02000005, 0x20002e28, 0x00000061, 0x00400040 },
+   { 0x00010040, 0x244c0d21, 0x0000045a, 0x0208e602 },
+   { 0x00110040, 0x20780d21, 0x0000045a, 0x0208a002 },
+   { 0x00010040, 0x20782421, 0x0000044c, 0x0000045c },
+   { 0x02000005, 0x20003dac, 0x0200011e, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000001c0 },
+   { 0x01000005, 0x20003dac, 0x00009800, 0x00030003 },
+   { 0x00200005, 0x24443dad, 0x00459800, 0x00030003 },
+   { 0x0020000c, 0x24403dad, 0x00459800, 0x00020002 },
+   { 0x01600006, 0x20003dac, 0x02000446, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000012 },
+   { 0x00010040, 0x22000c20, 0x02000078, 0x00400000 },
+   { 0x00110040, 0x22000c20, 0x02000078, 0x00700000 },
+   { 0x00310040, 0x20401da5, 0x02450440, 0xfffffffe },
+   { 0x00110001, 0x20480061, 0x02000000, 0x000c000c },
+   { 0x00010040, 0x20401da5, 0x02000440, 0xfffffffe },
+   { 0x00010001, 0x204401a5, 0x02000442, 0x00000000 },
+   { 0x00010001, 0x20480061, 0x02000000, 0x0007000c },
+   { 0x01600031, 0x25600021, 0x408d0040, 0x00000200 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00010040, 0x22000c20, 0x02000078, 0x00200000 },
+   { 0x00110040, 0x22000c20, 0x02000078, 0x00400000 },
+   { 0x00200001, 0x204001a5, 0x00450440, 0x00000000 },
+   { 0x00110040, 0x20441da5, 0x02000442, 0xfffffffe },
+   { 0x00110001, 0x20480061, 0x02000000, 0x000c0007 },
+   { 0x00010001, 0x20480061, 0x02000000, 0x00070007 },
+   { 0x01600031, 0x25600021, 0x408d0040, 0x00000200 },
+   { 0x00000040, 0xb80235ad, 0x00009802, 0x0000003c },
+   { 0x0020040c, 0x24403dad, 0x00459800, 0x00030003 },
+   { 0x00200805, 0x24483dad, 0x00459800, 0x00070007 },
+   { 0x01600006, 0x20003dac, 0x0000044a, 0x00000000 },
+   { 0x00200001, 0x204001a5, 0x00450440, 0x00000000 },
+   { 0x00000009, 0x20403ca5, 0x00000040, 0x00010001 },
+   { 0x00010040, 0x22000c20, 0x00000078, 0x00200010 },
+   { 0x00110040, 0x22000c20, 0x00000078, 0x00300010 },
+   { 0x00010001, 0x20480061, 0x00000000, 0x00030009 },
+   { 0x00110001, 0x20480061, 0x00000000, 0x00040009 },
+   { 0x02600031, 0x27000021, 0x408d0040, 0x00000200 },
+   { 0x01000005, 0x20003dac, 0x00009800, 0x00030003 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x0000001a },
+   { 0x00600001, 0x26220231, 0x008d05c0, 0x00000000 },
+   { 0x00600401, 0x26120231, 0x008d05b8, 0x00000000 },
+   { 0x00600801, 0x26020231, 0x008d05b0, 0x00000000 },
+   { 0x00600401, 0x25f20231, 0x008d05a8, 0x00000000 },
+   { 0x00600801, 0x25e20231, 0x008d05a0, 0x00000000 },
+   { 0x00600401, 0x25d20231, 0x008d0598, 0x00000000 },
+   { 0x00600801, 0x25c20231, 0x008d0590, 0x00000000 },
+   { 0x00600401, 0x25b20231, 0x008d0588, 0x00000000 },
+   { 0x00600801, 0x25a20231, 0x008d0580, 0x00000000 },
+   { 0x00600401, 0x25920231, 0x008d0578, 0x00000000 },
+   { 0x00600801, 0x25820231, 0x008d0570, 0x00000000 },
+   { 0x00600001, 0x25720231, 0x008d0568, 0x00000000 },
+   { 0x00600001, 0x25620231, 0x008d0560, 0x00000000 },
+   { 0x00110001, 0x220001ec, 0x02000000, 0x05820582 },
+   { 0x00010001, 0x220001ec, 0x02000000, 0x05620562 },
+   { 0x00000001, 0x220e0128, 0x00000454, 0x00000000 },
+   { 0x01800006, 0x200035ac, 0x02000446, 0x00000444 },
+   { 0x00810001, 0xbc000229, 0x02ad8000, 0x00000000 },
+   { 0x00810001, 0xbc200229, 0x02ad8020, 0x00000000 },
+   { 0x00810001, 0xbc400229, 0x02ad8040, 0x00000000 },
+   { 0x00810001, 0xbc600229, 0x02ad8060, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000012a },
+   { 0x00600001, 0x2024018d, 0x008d0200, 0x00000000 },
+   { 0x01000041, 0x245e35ad, 0x00000446, 0x00000444 },
+   { 0x00000040, 0x22022d08, 0x00000200, 0x00100010 },
+   { 0x02000005, 0x20003db0, 0x0200045e, 0x00010001 },
+   { 0x00200040, 0x22042d08, 0x00450200, 0x00200020 },
+   { 0x00400001, 0x244c0109, 0x00690200, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000066 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000064 },
+   { 0x00000040, 0x22003d08, 0x00000200, 0xffdeffde },
+   { 0x00000440, 0x22023d08, 0x00000202, 0xffeeffee },
+   { 0x00000801, 0x220c0060, 0x00000000, 0x04a00480 },
+   { 0x01000010, 0x20003db0, 0x0000045e, 0x00040004 },
+   { 0x00a02040, 0x2400462c, 0x00ad8000, 0x00ad8005 },
+   { 0x00a02048, 0x24003e2c, 0x00ad8001, 0xfffbfffb },
+   { 0x00a02048, 0x24003e2c, 0x00ad8002, 0x00140014 },
+   { 0x00a02048, 0x24003e2c, 0x00ad8003, 0x00140014 },
+   { 0x00a02048, 0xb8003e2d, 0x00ad8004, 0xfffbfffb },
+   { 0x00a02040, 0x2400462c, 0x00ad8040, 0x00ad8045 },
+   { 0x00a02048, 0x24003e2c, 0x00ad8041, 0xfffbfffb },
+   { 0x00a02048, 0x24003e2c, 0x00ad8042, 0x00140014 },
+   { 0x00a02048, 0x24003e2c, 0x00ad8043, 0x00140014 },
+   { 0x00a02048, 0xb8403e2d, 0x00ad8044, 0xfffbfffb },
+   { 0x00a02040, 0x2400462c, 0x00ad8080, 0x00ad8085 },
+   { 0x00a02048, 0x24003e2c, 0x00ad8081, 0xfffbfffb },
+   { 0x00a02048, 0x24003e2c, 0x00ad8082, 0x00140014 },
+   { 0x00a02048, 0x24003e2c, 0x00ad8083, 0x00140014 },
+   { 0x00a02048, 0xb8803e2d, 0x00ad8084, 0xfffbfffb },
+   { 0x00600040, 0x2400462c, 0x008d80c0, 0x008d80c5 },
+   { 0x00600048, 0x24003e2c, 0x008d80c1, 0xfffbfffb },
+   { 0x00600048, 0x24003e2c, 0x008d80c2, 0x00140014 },
+   { 0x00600048, 0x24003e2c, 0x008d80c3, 0x00140014 },
+   { 0x00600048, 0xb8c03e2d, 0x008d80c4, 0xfffbfffb },
+   { 0x00000401, 0x22000060, 0x00000000, 0x04a00480 },
+   { 0x00000c01, 0x22040060, 0x00000000, 0x04a00490 },
+   { 0x00010802, 0x220c2d28, 0x00000454, 0x04800480 },
+   { 0x00a02040, 0x24003dac, 0x00b18000, 0x02000200 },
+   { 0x00800048, 0x24003dac, 0x01ed8800, 0xfffbfffb },
+   { 0x00800048, 0x24203dac, 0x01ed8820, 0xfffbfffb },
+   { 0x00a02048, 0x24003dac, 0x00b18020, 0x00140014 },
+   { 0x00800048, 0x24003dac, 0x01ed8820, 0x00140014 },
+   { 0x00800048, 0x24203dac, 0x01ed8840, 0x00140014 },
+   { 0x00a02048, 0x24003dac, 0x00b18040, 0xfffbfffb },
+   { 0x00800048, 0x24003dac, 0x01ed8840, 0x00010001 },
+   { 0x00800048, 0x24203dac, 0x01ed8860, 0x00010001 },
+   { 0x8080000c, 0xd8003d91, 0x00b10400, 0x000a000a },
+   { 0x8080100c, 0xd8203d91, 0x00b10420, 0x000a000a },
+   { 0x00a02040, 0x24003dac, 0x00b18040, 0x02000200 },
+   { 0x00800048, 0x24003dac, 0x01ed8840, 0xfffbfffb },
+   { 0x00800048, 0x24203dac, 0x01ed8860, 0xfffbfffb },
+   { 0x00a02048, 0x24003dac, 0x00b18060, 0x00140014 },
+   { 0x00800048, 0x24003dac, 0x01ed8860, 0x00140014 },
+   { 0x00800048, 0x24203dac, 0x01ed8880, 0x00140014 },
+   { 0x00a02048, 0x24003dac, 0x00b18080, 0xfffbfffb },
+   { 0x00800048, 0x24003dac, 0x01ed8880, 0x00010001 },
+   { 0x00800048, 0x24203dac, 0x01ed88a0, 0x00010001 },
+   { 0x8080000c, 0xd8403d91, 0x00b10400, 0x000a000a },
+   { 0x8080100c, 0xd8603d91, 0x00b10420, 0x000a000a },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x000000b4 },
+   { 0x01000010, 0x20003db0, 0x00000444, 0x00000000 },
+   { 0x01000010, 0x20003db0, 0x02000446, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x0000004c },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000004a },
+   { 0x00400040, 0x22003d28, 0x0069044c, 0xfffefffe },
+   { 0x03400010, 0x20003dac, 0x00000446, 0x00020002 },
+   { 0x01000010, 0x20003db0, 0x02000444, 0x00020002 },
+   { 0x00410040, 0x22002d08, 0x00690200, 0x00100010 },
+   { 0x01000010, 0x20003dac, 0x00000446, 0x00000000 },
+   { 0x00010002, 0x220c2d28, 0x02000454, 0x04800480 },
+   { 0x00800040, 0x24003e2c, 0x01ed8000, 0x00100010 },
+   { 0x00800040, 0x24203e2c, 0x01ed8020, 0x00100010 },
+   { 0x00800048, 0x24003e2c, 0x01ed8001, 0xfffbfffb },
+   { 0x00800048, 0x24203e2c, 0x01ed8021, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01ed8002, 0x00140014 },
+   { 0x00800048, 0x24203e2c, 0x01ed8022, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01ed8003, 0x00140014 },
+   { 0x00800048, 0x24203e2c, 0x01ed8023, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01ed8004, 0xfffbfffb },
+   { 0x00800048, 0x24203e2c, 0x01ed8024, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01ed8005, 0x00010001 },
+   { 0x00800048, 0x24203e2c, 0x01ed8025, 0x00010001 },
+   { 0x8080000c, 0xd8003d91, 0x00b10400, 0x00050005 },
+   { 0x8080100c, 0xd8203d91, 0x00b10420, 0x00050005 },
+   { 0x00800040, 0x24003e2c, 0x01ed8040, 0x00100010 },
+   { 0x00800040, 0x24203e2c, 0x01ed8060, 0x00100010 },
+   { 0x00800048, 0x24003e2c, 0x01ed8041, 0xfffbfffb },
+   { 0x00800048, 0x24203e2c, 0x01ed8061, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01ed8042, 0x00140014 },
+   { 0x00800048, 0x24203e2c, 0x01ed8062, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01ed8043, 0x00140014 },
+   { 0x00800048, 0x24203e2c, 0x01ed8063, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01ed8044, 0xfffbfffb },
+   { 0x00800048, 0x24203e2c, 0x01ed8064, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01ed8045, 0x00010001 },
+   { 0x00800048, 0x24203e2c, 0x01ed8065, 0x00010001 },
+   { 0x8080000c, 0xd8403d91, 0x00b10400, 0x00050005 },
+   { 0x8080100c, 0xd8603d91, 0x00b10420, 0x00050005 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000004 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x0000005c },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000062 },
+   { 0x01000010, 0x20003db0, 0x00000446, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000042 },
+   { 0x00200401, 0x22000128, 0x0066044c, 0x00000000 },
+   { 0x00200c01, 0x22040128, 0x0045044e, 0x00000000 },
+   { 0x00000801, 0x220c0128, 0x00000454, 0x00000000 },
+   { 0x03400010, 0x20003dac, 0x02000444, 0x00020002 },
+   { 0x01000010, 0x20003dac, 0x00000444, 0x00000000 },
+   { 0x00410040, 0x22002d08, 0x02690200, 0x00010001 },
+   { 0x01000010, 0x20003db0, 0x02000446, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000002 },
+   { 0x00110001, 0x220c0168, 0x02000000, 0x04800480 },
+   { 0x00a02040, 0x24003e2c, 0x00ad83e0, 0x00100010 },
+   { 0x00800048, 0x24003e2c, 0x01ed8be0, 0xfffbfffb },
+   { 0x00800048, 0x24203e2c, 0x01ed8800, 0xfffbfffb },
+   { 0x00a02048, 0x24003e2c, 0x00ad8000, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01ed8800, 0x00140014 },
+   { 0x00800048, 0x24203e2c, 0x01ed8820, 0x00140014 },
+   { 0x00a02048, 0x24003e2c, 0x00ad8020, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01ed8820, 0x00010001 },
+   { 0x00800048, 0x24203e2c, 0x01ed8840, 0x00010001 },
+   { 0x8080000c, 0xd8003d91, 0x00b10400, 0x00050005 },
+   { 0x8080100c, 0xd8203d91, 0x00b10420, 0x00050005 },
+   { 0x00a02040, 0x24003e2c, 0x00ad8020, 0x00100010 },
+   { 0x00800048, 0x24003e2c, 0x01ed8820, 0xfffbfffb },
+   { 0x00800048, 0x24203e2c, 0x01ed8840, 0xfffbfffb },
+   { 0x00a02048, 0x24003e2c, 0x00ad8040, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01ed8840, 0x00140014 },
+   { 0x00800048, 0x24203e2c, 0x01ed8860, 0x00140014 },
+   { 0x00a02048, 0x24003e2c, 0x00ad8060, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01ed8860, 0x00010001 },
+   { 0x00800048, 0x24203e2c, 0x01ed8880, 0x00010001 },
+   { 0x8080000c, 0xd8403d91, 0x00b10400, 0x00050005 },
+   { 0x8080100c, 0xd8603d91, 0x00b10420, 0x00050005 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000016 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001c },
+   { 0x00200401, 0x22000128, 0x0045044c, 0x00000000 },
+   { 0x00000801, 0x220c0128, 0x00000454, 0x00000000 },
+   { 0x01200010, 0x20003dac, 0x00000444, 0x00030003 },
+   { 0x01200010, 0x20003dac, 0x02000446, 0x00030003 },
+   { 0x00210040, 0x22002d08, 0x00450200, 0x00010001 },
+   { 0x00210040, 0x22002d08, 0x02450200, 0x00100010 },
+   { 0x00800001, 0xb8000229, 0x01ed8000, 0x00000000 },
+   { 0x00800001, 0xb8200229, 0x01ed8020, 0x00000000 },
+   { 0x00800001, 0xb8400229, 0x01ed8040, 0x00000000 },
+   { 0x00800001, 0xb8600229, 0x01ed8060, 0x00000000 },
+   { 0x80800042, 0xd8004631, 0x00d29800, 0x00d20480 },
+   { 0x80800042, 0xd8204631, 0x00d29820, 0x00d204a0 },
+   { 0x80800042, 0xd8404631, 0x00d29840, 0x00d204c0 },
+   { 0x80800042, 0xd8604631, 0x00d29860, 0x00d204e0 },
+   { 0x00600001, 0x220001ac, 0x008d0024, 0x00000000 },
+   { 0x00200040, 0x244c3dad, 0x00450448, 0xfff8fff8 },
+   { 0x00000401, 0x220001ec, 0x00000000, 0x07000700 },
+   { 0x00000c01, 0x220201e8, 0x00000000, 0x07100710 },
+   { 0x00000801, 0x220e0128, 0x00000456, 0x00000000 },
+   { 0x00000441, 0x254635ad, 0x00000448, 0x0000044a },
+   { 0x00000c41, 0x254025ad, 0x0000444c, 0x0000444e },
+   { 0x00000c41, 0x254225ad, 0x00000448, 0x0000444e },
+   { 0x00000841, 0x254435ad, 0x0000444c, 0x0000044a },
+   { 0x00200040, 0x244c3d09, 0x00450200, 0x00100010 },
+   { 0x00800041, 0x24002628, 0x00ad8000, 0x00000540 },
+   { 0x00800041, 0x24202628, 0x00ad8020, 0x00000540 },
+   { 0x00800048, 0x24002628, 0x00ad8002, 0x00000542 },
+   { 0x00800048, 0x24202628, 0x00ad8022, 0x00000542 },
+   { 0x00200001, 0x22000128, 0x0045044c, 0x00000000 },
+   { 0x00800048, 0x24002628, 0x01ed8000, 0x00000544 },
+   { 0x00800048, 0x24202628, 0x01ed8020, 0x00000544 },
+   { 0x00800048, 0xbc002629, 0x01ed8002, 0x00000546 },
+   { 0x00801048, 0xbc202629, 0x01ed8022, 0x00000546 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000128 },
+   { 0x00000001, 0x20220169, 0x00000000, 0x00040004 },
+   { 0x00200005, 0x24443dad, 0x00459800, 0x00030003 },
+   { 0x0020000c, 0x24403dad, 0x00459800, 0x00020002 },
+   { 0x01600006, 0x20003dac, 0x02000446, 0x00000000 },
+   { 0x00010040, 0x22000c20, 0x02000078, 0x00200000 },
+   { 0x00110040, 0x22000c20, 0x02000078, 0x00500000 },
+   { 0x00310040, 0x20401da5, 0x02450440, 0xfffffffe },
+   { 0x00110001, 0x20480061, 0x02000000, 0x00080008 },
+   { 0x00010040, 0x20401da5, 0x02000440, 0xfffffffe },
+   { 0x00010001, 0x204401a5, 0x02000442, 0x00000000 },
+   { 0x00010001, 0x20480061, 0x02000000, 0x00030008 },
+   { 0x01600031, 0x25600021, 0x408d0040, 0x00000200 },
+   { 0x00000040, 0xb80235ad, 0x00009802, 0x0000003c },
+   { 0x00000040, 0x22000c20, 0x00000078, 0x00100010 },
+   { 0x0020040c, 0x24403dad, 0x00459800, 0x00030003 },
+   { 0x00200805, 0x24483dad, 0x00459800, 0x00070007 },
+   { 0x00200001, 0x204001a5, 0x00450440, 0x00000000 },
+   { 0x00000009, 0x20403ca5, 0x00000040, 0x00010001 },
+   { 0x00000001, 0x20480061, 0x00000000, 0x00020005 },
+   { 0x02600031, 0x27000021, 0x408d0040, 0x00000200 },
+   { 0x00110001, 0x220001ec, 0x02000000, 0x05820582 },
+   { 0x00010001, 0x220001ec, 0x02000000, 0x05620562 },
+   { 0x00000001, 0x220e0128, 0x00000454, 0x00000000 },
+   { 0x01800006, 0x200035ac, 0x02000446, 0x00000444 },
+   { 0x00410001, 0xbc000229, 0x02698000, 0x00000000 },
+   { 0x00410001, 0xbc100229, 0x02698010, 0x00000000 },
+   { 0x00410001, 0xbc200229, 0x02698020, 0x00000000 },
+   { 0x00410001, 0xbc300229, 0x02698030, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000be },
+   { 0x00600001, 0x2024018d, 0x008d0200, 0x00000000 },
+   { 0x01000041, 0x245e35ad, 0x00000446, 0x00000444 },
+   { 0x02000005, 0x20003db0, 0x0200045e, 0x00010001 },
+   { 0x00000040, 0x22022d08, 0x00000200, 0x00100010 },
+   { 0x00200040, 0x22042d08, 0x00450200, 0x00200020 },
+   { 0x00400001, 0x244c0109, 0x00690200, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000044 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000042 },
+   { 0x00000040, 0x22003d08, 0x00000200, 0xffdeffde },
+   { 0x00000440, 0x22023d08, 0x00000202, 0xffeeffee },
+   { 0x00000801, 0x220c0060, 0x00000000, 0x04d004c0 },
+   { 0x01000010, 0x20003db0, 0x0000045e, 0x00040004 },
+   { 0x00802040, 0x2400462c, 0x00a98000, 0x00a98005 },
+   { 0x00802048, 0x24003e2c, 0x00a98001, 0xfffbfffb },
+   { 0x00802048, 0x24003e2c, 0x00a98002, 0x00140014 },
+   { 0x00802048, 0x24003e2c, 0x00a98003, 0x00140014 },
+   { 0x00802048, 0xb8003e2d, 0x00a98004, 0xfffbfffb },
+   { 0x00802040, 0x2400462c, 0x00a98040, 0x00a98045 },
+   { 0x00802048, 0x24003e2c, 0x00a98041, 0xfffbfffb },
+   { 0x00802048, 0x24003e2c, 0x00a98042, 0x00140014 },
+   { 0x00802048, 0x24003e2c, 0x00a98043, 0x00140014 },
+   { 0x00802048, 0xb8203e2d, 0x00a98044, 0xfffbfffb },
+   { 0x00400040, 0x2400462c, 0x00698080, 0x00698085 },
+   { 0x00400048, 0x24003e2c, 0x00698081, 0xfffbfffb },
+   { 0x00400048, 0x24003e2c, 0x00698082, 0x00140014 },
+   { 0x00400048, 0x24003e2c, 0x00698083, 0x00140014 },
+   { 0x00400048, 0xb8403e2d, 0x00698084, 0xfffbfffb },
+   { 0x00000401, 0x220c0060, 0x00000000, 0x04e004d0 },
+   { 0x00000c01, 0x22000060, 0x00000000, 0x04d004c8 },
+   { 0x00000c01, 0x22040060, 0x00000000, 0x04e004d8 },
+   { 0x00000801, 0x22080060, 0x00000000, 0x04f004e8 },
+   { 0x00800040, 0x24003dac, 0x00b104c0, 0x02000200 },
+   { 0x00800048, 0x24003dac, 0x00b104e0, 0xfffbfffb },
+   { 0x00800048, 0x24003dac, 0x01ed9800, 0x00140014 },
+   { 0x00010001, 0x220c0168, 0x00000000, 0x05400540 },
+   { 0x00110001, 0x220c0168, 0x00000000, 0x04c004c0 },
+   { 0x00800048, 0x24003dac, 0x01e98000, 0xfffbfffb },
+   { 0x00800048, 0x24003dac, 0x01e98020, 0x00010001 },
+   { 0x00800048, 0x24003dac, 0x01e98800, 0x00140014 },
+   { 0x8080000c, 0xd8003d91, 0x00b10400, 0x000a000a },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000060 },
+   { 0x01000010, 0x20003db0, 0x00000444, 0x00000000 },
+   { 0x01000010, 0x20003db0, 0x02000446, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000022 },
+   { 0x00400040, 0x22003d28, 0x0069044c, 0xfffefffe },
+   { 0x03400010, 0x20003dac, 0x00000446, 0x00020002 },
+   { 0x01000010, 0x20003db0, 0x02000444, 0x00020002 },
+   { 0x00410040, 0x22002d08, 0x00690200, 0x00100010 },
+   { 0x01000010, 0x20003dac, 0x00000446, 0x00000000 },
+   { 0x00010001, 0x220e0168, 0x02000000, 0x05400540 },
+   { 0x00110001, 0x220e0168, 0x02000000, 0x04c004c0 },
+   { 0x00800040, 0x24003e2c, 0x01e98000, 0x00100010 },
+   { 0x00800048, 0x24003e2c, 0x01e98001, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01e98002, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01e98003, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01e98004, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01e98005, 0x00010001 },
+   { 0x8080000c, 0xdc003d91, 0x00b10400, 0x00050005 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000004 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000036 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000036 },
+   { 0x01000010, 0x20003db0, 0x00000446, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x03600010, 0x20003dac, 0x02000444, 0x00020002 },
+   { 0x00400401, 0x22000128, 0x0069044c, 0x00000000 },
+   { 0x00400840, 0x22083d2c, 0x0069044c, 0x00100010 },
+   { 0x00610040, 0x22002d08, 0x02690200, 0x00010001 },
+   { 0x01000010, 0x20003dac, 0x00000444, 0x00000000 },
+   { 0x01000010, 0x20003db0, 0x02000446, 0x00020002 },
+   { 0x00800040, 0x24003e2c, 0x01e983e0, 0x00100010 },
+   { 0x00800048, 0x24003e2c, 0x01e98000, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01e98020, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01e993e0, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01e99000, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01e99020, 0x00010001 },
+   { 0x00000001, 0x220e0168, 0x00000000, 0x05400540 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000002 },
+   { 0x00110001, 0x220e0168, 0x02000000, 0x04c004c0 },
+   { 0x8080000c, 0xdc003d91, 0x00b10400, 0x00050005 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000000e },
+   { 0x00400001, 0x22000128, 0x0069044c, 0x00000000 },
+   { 0x01400010, 0x20003dac, 0x00000444, 0x00030003 },
+   { 0x01400010, 0x20003dac, 0x02000446, 0x00030003 },
+   { 0x00410040, 0x22002d08, 0x00690200, 0x00010001 },
+   { 0x00410040, 0x22002d08, 0x02690200, 0x00100010 },
+   { 0x00800001, 0x25400229, 0x01e98000, 0x00000000 },
+   { 0x80800042, 0x45404631, 0x00d20540, 0x00d204c0 },
+   { 0x00000001, 0x220c0128, 0x00000454, 0x00000000 },
+   { 0x00400001, 0xd8000231, 0x008a0540, 0x00000000 },
+   { 0x00400001, 0xd8100231, 0x008a0548, 0x00000000 },
+   { 0x00400001, 0xd8200231, 0x008a0550, 0x00000000 },
+   { 0x00400001, 0xd8300231, 0x008a0558, 0x00000000 },
+   { 0x00600001, 0x220001ac, 0x008d0024, 0x00000000 },
+   { 0x00200040, 0x244c3dad, 0x00450448, 0xfff8fff8 },
+   { 0x00000401, 0x220001ec, 0x00000000, 0x07000700 },
+   { 0x00000801, 0x220e0128, 0x00000456, 0x00000000 },
+   { 0x00000441, 0x254635ad, 0x00000448, 0x0000044a },
+   { 0x00000c41, 0x254025ad, 0x0000444c, 0x0000444e },
+   { 0x00000c41, 0x254225ad, 0x00000448, 0x0000444e },
+   { 0x00000841, 0x254435ad, 0x0000444c, 0x0000044a },
+   { 0x00600041, 0x24002628, 0x00898000, 0x00000540 },
+   { 0x00600048, 0x24002628, 0x00898002, 0x00000542 },
+   { 0x00600048, 0x24002628, 0x00898008, 0x00000544 },
+   { 0x00600048, 0x2540262d, 0x0089800a, 0x00000546 },
+   { 0x00400401, 0xbc0001a9, 0x00690540, 0x00000000 },
+   { 0x00400801, 0xbc1001a9, 0x00690548, 0x00000000 },
+   { 0x01000010, 0x20003d2c, 0x00000022, 0x00030003 },
+   { 0x01000040, 0x20223d29, 0x02000022, 0xffffffff },
+   { 0x00000040, 0x220c3d8c, 0x0000020c, 0x00080008 },
+   { 0x00110040, 0x24540c21, 0x00000454, 0x00080008 },
+   { 0x00010040, 0x24540c21, 0x00000454, 0x00180038 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0xfffffee4 },
+   { 0x01800010, 0x20003dac, 0x0200010e, 0x00010001 },
+   { 0x00000040, 0x220c3d8c, 0x0000020c, 0xffe0ffe0 },
+   { 0x00010001, 0x24540061, 0x02000000, 0x06c00640 },
+   { 0x00110001, 0x24540061, 0x02000000, 0x04800780 },
+   { 0x00000001, 0x220e0128, 0x00000456, 0x00000000 },
+   { 0x00800040, 0x24003dac, 0x00b19c00, 0x00200020 },
+   { 0x00800040, 0x24203dac, 0x00b19c20, 0x00200020 },
+   { 0x8080000c, 0xdc003d91, 0x00b10400, 0x00060006 },
+   { 0x8080000c, 0xdc203d91, 0x00b10420, 0x00060006 },
+   { 0x02000040, 0x210e3d29, 0x0200010e, 0xffffffff },
+   { 0x00000040, 0x220c3d8c, 0x0000020c, 0x00040004 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0xfffffcdc },
+   { 0x01000005, 0x25643e2d, 0x00000060, 0x00c000c0 },
+   { 0x01000010, 0x20003dac, 0x02000458, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000016 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000000c },
+   { 0x01000010, 0x20003dac, 0x00000458, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000086 },
+   { 0x00a02001, 0x46400231, 0x00d20780, 0x00000000 },
+   { 0x00a02001, 0x46800231, 0x00d207c0, 0x00000000 },
+   { 0x00a02001, 0x46c00231, 0x00d20480, 0x00000000 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000007e },
+   { 0x80a02042, 0x46404631, 0x00d20640, 0x00d20780 },
+   { 0x80a02042, 0x46804631, 0x00d20680, 0x00d207c0 },
+   { 0x80a02042, 0x46c04631, 0x00d206c0, 0x00d20480 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000076 },
+   { 0x01000010, 0x20003dac, 0x02000564, 0x00800080 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x0000000e },
+   { 0x01000010, 0x20003dac, 0x00000458, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0xffffffe4 },
+   { 0x00200401, 0x256801ed, 0x00000000, 0x00200020 },
+   { 0x00200801, 0x256c01ed, 0x00000000, 0x00060006 },
+   { 0x00400001, 0x258001ed, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x45a001ad, 0x00058c00, 0x00000000 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000028 },
+   { 0x02600005, 0x20006e28, 0x02008c0c, 0x88848421 },
+   { 0x0220000c, 0x25663dad, 0x00000458, 0x00010001 },
+   { 0x0000000c, 0x25643dad, 0x00000564, 0x00060006 },
+   { 0x00310001, 0x25c401ad, 0x00000458, 0x00000000 },
+   { 0x00210001, 0x25c00061, 0x00000000, 0x00010001 },
+   { 0x00310040, 0x25c03dad, 0x004545c4, 0x00010001 },
+   { 0x00610001, 0x25a00061, 0x02000000, 0x00000080 },
+   { 0x00710001, 0x45a002ad, 0x02ae8c00, 0x00000000 },
+   { 0x00710001, 0x45a202ad, 0x02ae8c01, 0x00000000 },
+   { 0x00800041, 0x25a035ad, 0x00b105a0, 0x000905c0 },
+   { 0x01200010, 0x20003e2c, 0x0245006e, 0x00000000 },
+   { 0x00310009, 0x244c45ad, 0x02000564, 0x0045006e },
+   { 0x00210001, 0x244c01ed, 0x02000000, 0x00000000 },
+   { 0x0031000c, 0x244c3dad, 0x0245044c, 0x00010001 },
+   { 0x00200009, 0x256835ad, 0x0045044c, 0x00000566 },
+   { 0x00210040, 0x256835ad, 0x02450568, 0x00000566 },
+   { 0x00200040, 0x256c362d, 0x0045006e, 0x00000566 },
+   { 0x00400040, 0x240035ac, 0x00ab05a2, 0x00ab05a6 },
+   { 0x00400040, 0x2400358c, 0x00690400, 0x00000566 },
+   { 0x0040000c, 0x2580358d, 0x00690400, 0x00000566 },
+   { 0x00800041, 0x2400362c, 0x00d20640, 0x000005a0 },
+   { 0x00800041, 0x2420362c, 0x00d20660, 0x000005a0 },
+   { 0x00800048, 0x2400362c, 0x00d20780, 0x000005a4 },
+   { 0x00800048, 0x2420362c, 0x00d207a0, 0x000005a4 },
+   { 0x00800040, 0x2400358c, 0x00b10400, 0x00000568 },
+   { 0x00800040, 0x2420358c, 0x00b10420, 0x00000568 },
+   { 0x0080000c, 0x24c0358d, 0x00b10400, 0x0000056c },
+   { 0x0080000c, 0x24e0358d, 0x00b10420, 0x0000056c },
+   { 0x80800040, 0x464035b1, 0x00b104c0, 0x00000580 },
+   { 0x80800040, 0x466035b1, 0x00b104e0, 0x00000580 },
+   { 0x00800041, 0x2400362c, 0x00d20680, 0x000005a0 },
+   { 0x00800041, 0x2420362c, 0x00d206a0, 0x000005a0 },
+   { 0x00800048, 0x2400362c, 0x00d207c0, 0x000005a4 },
+   { 0x00800048, 0x2420362c, 0x00d207e0, 0x000005a4 },
+   { 0x00800040, 0x2400358c, 0x00b10400, 0x00000568 },
+   { 0x00800040, 0x2420358c, 0x00b10420, 0x00000568 },
+   { 0x0080000c, 0x24c0358d, 0x00b10400, 0x0000056c },
+   { 0x0080000c, 0x24e0358d, 0x00b10420, 0x0000056c },
+   { 0x80800040, 0x468035b1, 0x00b104c0, 0x00000580 },
+   { 0x80800040, 0x46a035b1, 0x00b104e0, 0x00000580 },
+   { 0x00800041, 0x2400362c, 0x00d206c0, 0x000705a8 },
+   { 0x00800041, 0x2420362c, 0x00d206e0, 0x000705a8 },
+   { 0x00800048, 0x2400362c, 0x00d20480, 0x000705ac },
+   { 0x00800048, 0x2420362c, 0x00d204a0, 0x000705ac },
+   { 0x00800040, 0x2400358c, 0x00b10400, 0x0000056a },
+   { 0x00800040, 0x2420358c, 0x00b10420, 0x0000056a },
+   { 0x0080000c, 0x24c0358d, 0x00b10400, 0x0000056e },
+   { 0x0080000c, 0x24e0358d, 0x00b10420, 0x0000056e },
+   { 0x80800040, 0x46c035b1, 0x00b104c0, 0x00050582 },
+   { 0x80800040, 0x46e035b1, 0x00b104e0, 0x00050582 },
+   { 0x01800005, 0x20003d2c, 0x02000020, 0x00020002 },
+   { 0x80800040, 0xd00045b1, 0x00b19000, 0x00d20640 },
+   { 0x80800040, 0xd02045b1, 0x00b19020, 0x00d20660 },
+   { 0x80800040, 0xd04045b1, 0x00b19040, 0x00d20680 },
+   { 0x80800040, 0xd06045b1, 0x00b19060, 0x00d206a0 },
+   { 0x00000040, 0x22083d8c, 0x00000208, 0x00800080 },
+   { 0x80400040, 0xd40045b1, 0x00699400, 0x00ab06c0 },
+   { 0x80400040, 0xd48045b1, 0x00699480, 0x00ab06c2 },
+   { 0x80400040, 0xd42045b1, 0x00699420, 0x00ab06e0 },
+   { 0x80400040, 0xd4a045b1, 0x006994a0, 0x00ab06e2 },
+   { 0x80400040, 0xd41045b1, 0x00699410, 0x00ab06d0 },
+   { 0x80400040, 0xd49045b1, 0x00699490, 0x00ab06d2 },
+   { 0x80400040, 0xd43045b1, 0x00699430, 0x00ab06f0 },
+   { 0x80400040, 0xd4b045b1, 0x006994b0, 0x00ab06f2 },
+   { 0x00000040, 0x220a3d8c, 0x0000020a, 0x00080008 },
+   { 0x00110040, 0x220a3d8c, 0x0200020a, 0x00300030 },
+   { 0x01000010, 0x20003d2c, 0x02000020, 0x00060006 },
+   { 0x00000040, 0x20203d29, 0x00000020, 0x00020002 },
+   { 0x00000040, 0x22040c00, 0x00000204, 0x00100001 },
+   { 0x00000040, 0x220c358c, 0x0000020c, 0x0000003a },
+   { 0x00110220, 0x34001c00, 0x02001400, 0xfffffc1a },
+   { 0x00000001, 0x220001ec, 0x00000000, 0x01400140 },
+   { 0x00000001, 0x220201ec, 0x00000000, 0x01c001c0 },
+   { 0x00800401, 0x20400236, 0x01ee8000, 0x00000000 },
+   { 0x00800801, 0x20500236, 0x01ee8010, 0x00000000 },
+   { 0x00800401, 0x20600236, 0x01ee8020, 0x00000000 },
+   { 0x00800801, 0x20700236, 0x01ee8030, 0x00000000 },
+   { 0x00800401, 0x20800236, 0x01ee8040, 0x00000000 },
+   { 0x00800801, 0x20900236, 0x01ee8050, 0x00000000 },
+   { 0x00800401, 0x20a00236, 0x01ee8060, 0x00000000 },
+   { 0x00800801, 0x20b00236, 0x01ee8070, 0x00000000 },
+   { 0x00800401, 0x20c00236, 0x01ee8100, 0x00000000 },
+   { 0x00800801, 0x20d00236, 0x01ee8110, 0x00000000 },
+   { 0x00800401, 0x20e00236, 0x01ee8120, 0x00000000 },
+   { 0x00800801, 0x20f00236, 0x01ee8130, 0x00000000 },
+   { 0x00800401, 0x21000236, 0x01ee8140, 0x00000000 },
+   { 0x00800801, 0x21100236, 0x01ee8150, 0x00000000 },
+   { 0x00800401, 0x21200236, 0x01ee8160, 0x00000000 },
+   { 0x00800801, 0x21300236, 0x01ee8170, 0x00000000 },
+   { 0x00000040, 0x22000d20, 0x00000062, 0x1218a000 },
+   { 0x00200401, 0x204001a5, 0x00450064, 0x00000000 },
+   { 0x00000801, 0x20480061, 0x00000000, 0x000f000f },
+   { 0x01600031, 0x21400021, 0x508d0040, 0x00000200 },
+   { 0x00000001, 0x220001ec, 0x00000000, 0x03400340 },
+   { 0x00800401, 0x41600236, 0x00d28000, 0x00000000 },
+   { 0x00800801, 0x41610236, 0x00d28080, 0x00000000 },
+   { 0x00800401, 0x41800236, 0x00d28020, 0x00000000 },
+   { 0x00800801, 0x41810236, 0x00d280a0, 0x00000000 },
+   { 0x00800401, 0x41a00236, 0x00d28040, 0x00000000 },
+   { 0x00800801, 0x41a10236, 0x00d280c0, 0x00000000 },
+   { 0x00800401, 0x41c00236, 0x00d28060, 0x00000000 },
+   { 0x00800801, 0x41c10236, 0x00d280e0, 0x00000000 },
+   { 0x00000040, 0x22000d20, 0x00000062, 0x0a18a001 },
+   { 0x0000040c, 0x20443ca5, 0x00000044, 0x00010001 },
+   { 0x00000801, 0x20480061, 0x00000000, 0x0007000f },
+   { 0x0a600031, 0x21600021, 0x508d0040, 0x00000200 },
+   { 0x00000001, 0x21400021, 0x00000140, 0x00000000 },
+   { 0x00000001, 0x21600021, 0x00000160, 0x00000000 },
+   { 0x01600031, 0x20001c20, 0x708d0000, 0x82000010 },
+   { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x01000005, 0x20602d29, 0x02000042, 0x00070007 },
+   { 0x01000005, 0x20422d29, 0x00000042, 0xfff8fff8 },
+   { 0x00000001, 0x202001e9, 0x00000000, 0x100c100c },
+   { 0x00600001, 0x27e00021, 0x008d0000, 0x00000000 },
+   { 0x00000009, 0x27e82d21, 0x00000040, 0x00060006 },
+   { 0x00600001, 0x20a00022, 0x008d0000, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000100 },
+   { 0x00600001, 0x20200022, 0x008d07e0, 0x00000000 },
+   { 0x01800031, 0x20801d09, 0x408d0000, 0x02488400 },
+   { 0x00600001, 0x20400022, 0x008d07e0, 0x00000000 },
+   { 0x00000040, 0x20480c22, 0x000007e8, 0x00000080 },
+   { 0x02800031, 0x21001d09, 0x408d0000, 0x02488400 },
+   { 0x00600001, 0x20600022, 0x008d07e0, 0x00000000 },
+   { 0x00000040, 0x20680c22, 0x000007e8, 0x00000100 },
+   { 0x03800031, 0x21801d09, 0x408d0000, 0x02488400 },
+   { 0x00600001, 0x20800022, 0x008d07e0, 0x00000000 },
+   { 0x00000040, 0x20880c22, 0x000007e8, 0x00000180 },
+   { 0x04800031, 0x22001d09, 0x408d0000, 0x02488400 },
+   { 0x02600005, 0x20000c20, 0x02000090, 0x00002000 },
+   { 0x00000006, 0x20880c21, 0x00000088, 0x00200000 },
+   { 0x00200009, 0x20845529, 0x0000009c, 0x00450020 },
+   { 0x00200001, 0x40840231, 0x00450094, 0x00000000 },
+   { 0x00310001, 0x20840229, 0x02450094, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000000c },
+   { 0x00800041, 0x24002628, 0x00000095, 0x00000044 },
+   { 0x00800040, 0x24004508, 0x008d0400, 0x00000094 },
+   { 0x00000009, 0x20a82d02, 0x00000404, 0x00060006 },
+   { 0x00802001, 0x20c00022, 0x008d0080, 0x00000000 },
+   { 0x00802001, 0x20800061, 0x00000000, 0x00000000 },
+   { 0x05800031, 0x20001d0c, 0x508d0000, 0x06080300 },
+   { 0x02600005, 0x20000c20, 0x020000d0, 0x00002000 },
+   { 0x00000006, 0x20c80c21, 0x000000c8, 0x00200000 },
+   { 0x00200009, 0x20c45529, 0x000000dc, 0x00450020 },
+   { 0x00200001, 0x40c40231, 0x004500d4, 0x00000000 },
+   { 0x00310001, 0x20c40229, 0x024500d4, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000000c },
+   { 0x00800041, 0x24002628, 0x000000d5, 0x00000044 },
+   { 0x00800040, 0x24004508, 0x008d0400, 0x000000d4 },
+   { 0x00000009, 0x20a82d02, 0x00000404, 0x00060006 },
+   { 0x00802001, 0x20c00022, 0x008d00c0, 0x00000000 },
+   { 0x00802001, 0x20c00061, 0x00000000, 0x00000000 },
+   { 0x05800031, 0x20001d0c, 0x508d0000, 0x06080300 },
+   { 0x02600005, 0x20000c20, 0x02000110, 0x00002000 },
+   { 0x00000006, 0x21080c21, 0x00000108, 0x00200000 },
+   { 0x00200009, 0x21045529, 0x0000011c, 0x00450020 },
+   { 0x00200001, 0x41040231, 0x00450114, 0x00000000 },
+   { 0x00310001, 0x21040229, 0x02450114, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000000c },
+   { 0x00800041, 0x24002628, 0x00000115, 0x00000044 },
+   { 0x00800040, 0x24004508, 0x008d0400, 0x00000114 },
+   { 0x00000009, 0x20a82d02, 0x00000404, 0x00060006 },
+   { 0x00802001, 0x20c00022, 0x008d0100, 0x00000000 },
+   { 0x00802001, 0x21000061, 0x00000000, 0x00000000 },
+   { 0x05800031, 0x20001d0c, 0x508d0000, 0x06080300 },
+   { 0x02600005, 0x20000c20, 0x02000150, 0x00002000 },
+   { 0x00000006, 0x21480c21, 0x00000148, 0x00200000 },
+   { 0x00200009, 0x21445529, 0x0000015c, 0x00450020 },
+   { 0x00200001, 0x41440231, 0x00450154, 0x00000000 },
+   { 0x00310001, 0x21440229, 0x02450154, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000000c },
+   { 0x00800041, 0x24002628, 0x00000155, 0x00000044 },
+   { 0x00800040, 0x24004508, 0x008d0400, 0x00000154 },
+   { 0x00000009, 0x20a82d02, 0x00000404, 0x00060006 },
+   { 0x00802001, 0x20c00022, 0x008d0140, 0x00000000 },
+   { 0x00802001, 0x21400061, 0x00000000, 0x00000000 },
+   { 0x05800031, 0x20001d0c, 0x508d0000, 0x06080300 },
+   { 0x02600005, 0x20000c20, 0x02000190, 0x00002000 },
+   { 0x00000006, 0x21880c21, 0x00000188, 0x00200000 },
+   { 0x00200009, 0x21845529, 0x0000019c, 0x00450020 },
+   { 0x00200001, 0x41840231, 0x00450194, 0x00000000 },
+   { 0x00310001, 0x21840229, 0x02450194, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000000c },
+   { 0x00800041, 0x24002628, 0x00000195, 0x00000044 },
+   { 0x00800040, 0x24004508, 0x008d0400, 0x00000194 },
+   { 0x00000009, 0x20a82d02, 0x00000404, 0x00060006 },
+   { 0x00802001, 0x20c00022, 0x008d0180, 0x00000000 },
+   { 0x00802001, 0x21800061, 0x00000000, 0x00000000 },
+   { 0x05800031, 0x20001d0c, 0x508d0000, 0x06080300 },
+   { 0x02600005, 0x20000c20, 0x020001d0, 0x00002000 },
+   { 0x00000006, 0x21c80c21, 0x000001c8, 0x00200000 },
+   { 0x00200009, 0x21c45529, 0x000001dc, 0x00450020 },
+   { 0x00200001, 0x41c40231, 0x004501d4, 0x00000000 },
+   { 0x00310001, 0x21c40229, 0x024501d4, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000000c },
+   { 0x00800041, 0x24002628, 0x000001d5, 0x00000044 },
+   { 0x00800040, 0x24004508, 0x008d0400, 0x000001d4 },
+   { 0x00000009, 0x20a82d02, 0x00000404, 0x00060006 },
+   { 0x00802001, 0x20c00022, 0x008d01c0, 0x00000000 },
+   { 0x00802001, 0x21c00061, 0x00000000, 0x00000000 },
+   { 0x05800031, 0x20001d0c, 0x508d0000, 0x06080300 },
+   { 0x02600005, 0x20000c20, 0x02000210, 0x00002000 },
+   { 0x00000006, 0x22080c21, 0x00000208, 0x00200000 },
+   { 0x00200009, 0x22045529, 0x0000021c, 0x00450020 },
+   { 0x00200001, 0x42040231, 0x00450214, 0x00000000 },
+   { 0x00310001, 0x22040229, 0x02450214, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000000c },
+   { 0x00800041, 0x24002628, 0x00000215, 0x00000044 },
+   { 0x00800040, 0x24004508, 0x008d0400, 0x00000214 },
+   { 0x00000009, 0x20a82d02, 0x00000404, 0x00060006 },
+   { 0x00802001, 0x20c00022, 0x008d0200, 0x00000000 },
+   { 0x00802001, 0x22000061, 0x00000000, 0x00000000 },
+   { 0x05800031, 0x20001d0c, 0x508d0000, 0x06080300 },
+   { 0x02600005, 0x20000c20, 0x02000250, 0x00002000 },
+   { 0x00000006, 0x22480c21, 0x00000248, 0x00200000 },
+   { 0x00200009, 0x22445529, 0x0000025c, 0x00450020 },
+   { 0x00200001, 0x42440231, 0x00450254, 0x00000000 },
+   { 0x00310001, 0x22440229, 0x02450254, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000000c },
+   { 0x00800041, 0x24002628, 0x00000255, 0x00000044 },
+   { 0x00800040, 0x24004508, 0x008d0400, 0x00000254 },
+   { 0x00000009, 0x20a82d02, 0x00000404, 0x00060006 },
+   { 0x00802001, 0x20c00022, 0x008d0240, 0x00000000 },
+   { 0x00802001, 0x22400061, 0x00000000, 0x00000000 },
+   { 0x05800031, 0x20001d0c, 0x508d0000, 0x06080300 },
+   { 0x01000040, 0x20423dad, 0x00000042, 0xfff8fff8 },
+   { 0x00600001, 0x21600022, 0x008d07e0, 0x00000000 },
+   { 0x00802001, 0x21800022, 0x008d0080, 0x00000000 },
+   { 0x00802001, 0x21c00022, 0x008d00c0, 0x00000000 },
+   { 0x0b800031, 0x20001d0c, 0x508d0000, 0x0a080400 },
+   { 0x00600001, 0x20200022, 0x008d07e0, 0x00000000 },
+   { 0x00000040, 0x20280c22, 0x000007e8, 0x00000080 },
+   { 0x00802001, 0x20400022, 0x008d0100, 0x00000000 },
+   { 0x00802001, 0x20800022, 0x008d0140, 0x00000000 },
+   { 0x01800031, 0x20001d0c, 0x508d0000, 0x0a080400 },
+   { 0x00000040, 0x21680c22, 0x000007e8, 0x00000100 },
+   { 0x00802001, 0x21800022, 0x008d0180, 0x00000000 },
+   { 0x00802001, 0x21c00022, 0x008d01c0, 0x00000000 },
+   { 0x0b800031, 0x20001d0c, 0x508d0000, 0x0a080400 },
+   { 0x00000040, 0x20280c22, 0x000007e8, 0x00000180 },
+   { 0x00802001, 0x20400022, 0x008d0200, 0x00000000 },
+   { 0x00802001, 0x20800022, 0x008d0240, 0x00000000 },
+   { 0x01800031, 0x20001d0c, 0x508d0000, 0x0a080400 },
+   { 0x00000040, 0x27e80c21, 0x000007e8, 0x00000200 },
+   { 0x01000010, 0x20002da0, 0x02000060, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0xffffff00 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000040 },
+   { 0x00600001, 0x20200022, 0x008d07e0, 0x00000000 },
+   { 0x01800031, 0x20801d09, 0x408d0000, 0x02488400 },
+   { 0x00600001, 0x20400022, 0x008d07e0, 0x00000000 },
+   { 0x00000040, 0x20480c22, 0x000007e8, 0x00000080 },
+   { 0x02800031, 0x21001d09, 0x408d0000, 0x02488400 },
+   { 0x00600001, 0x20600022, 0x008d07e0, 0x00000000 },
+   { 0x00000040, 0x20680c22, 0x000007e8, 0x00000100 },
+   { 0x03800031, 0x21801d09, 0x408d0000, 0x02488400 },
+   { 0x00600001, 0x20800022, 0x008d07e0, 0x00000000 },
+   { 0x00000040, 0x20880c22, 0x000007e8, 0x00000180 },
+   { 0x04800031, 0x22001d09, 0x408d0000, 0x02488400 },
+   { 0x00000001, 0x220400e0, 0x00000000, 0x00a00080 },
+   { 0x00600001, 0x20a00022, 0x008d0000, 0x00000000 },
+   { 0x02600005, 0x20000c20, 0x02008810, 0x00002000 },
+   { 0x01000040, 0x20603dad, 0x00000060, 0xffffffff },
+   { 0x00000006, 0xa8080c21, 0x00008808, 0x00200000 },
+   { 0x00200009, 0xa8045529, 0x0000881c, 0x00450020 },
+   { 0x00200001, 0xc8040231, 0x00458814, 0x00000000 },
+   { 0x00310001, 0xa8040229, 0x02458814, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000000c },
+   { 0x00800041, 0x24002628, 0x00008815, 0x00000044 },
+   { 0x00800040, 0x24004508, 0x008d0400, 0x00008814 },
+   { 0x00000009, 0x20a82d02, 0x00000404, 0x00060006 },
+   { 0x00802001, 0x20c00022, 0x008d8800, 0x00000000 },
+   { 0x00802001, 0xa8000061, 0x00000000, 0x00000000 },
+   { 0x05800031, 0x20001d0c, 0x508d0000, 0x06080300 },
+   { 0x00600001, 0x21600022, 0x008d07e0, 0x00000000 },
+   { 0x00802001, 0x21800022, 0x008d8800, 0x00000000 },
+   { 0x0b800031, 0x20001d0c, 0x508d0000, 0x06080300 },
+   { 0x00000040, 0x27e80c21, 0x000007e8, 0x00000040 },
+   { 0x00000040, 0x22040c00, 0x00000204, 0x00400040 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0xffffffda },
+   { 0x01600031, 0x20001c20, 0x708d0000, 0x82000010 },
+   { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x01000005, 0x20602d29, 0x02000042, 0x00070007 },
+   { 0x01000005, 0x20422d29, 0x00000042, 0xfff8fff8 },
+   { 0x00600001, 0x27e00021, 0x008d0000, 0x00000000 },
+   { 0x00000009, 0x27e82d21, 0x00000040, 0x00060006 },
+   { 0x00600001, 0x20a00022, 0x008d0000, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000260 },
+   { 0x00600001, 0x20200022, 0x008d07e0, 0x00000000 },
+   { 0x01800031, 0x20801d09, 0x408d0000, 0x02488400 },
+   { 0x00600001, 0x20400022, 0x008d07e0, 0x00000000 },
+   { 0x00000040, 0x20480c22, 0x000007e8, 0x00000080 },
+   { 0x02800031, 0x21001d09, 0x408d0000, 0x02488400 },
+   { 0x00600001, 0x20600022, 0x008d07e0, 0x00000000 },
+   { 0x00000040, 0x20680c22, 0x000007e8, 0x00000100 },
+   { 0x03800031, 0x21801d09, 0x408d0000, 0x02488400 },
+   { 0x00600001, 0x20800022, 0x008d07e0, 0x00000000 },
+   { 0x00000040, 0x20880c22, 0x000007e8, 0x00000180 },
+   { 0x04800031, 0x22001d09, 0x408d0000, 0x02488400 },
+   { 0x00200001, 0x20640229, 0x00450094, 0x00000000 },
+   { 0x02600005, 0x20000c20, 0x02000090, 0x01000000 },
+   { 0x02600005, 0x20000c20, 0x00000090, 0x00004000 },
+   { 0x00600041, 0x24003e2c, 0x00000095, 0x00020002 },
+   { 0x00110001, 0x2066018d, 0x02000400, 0x00000000 },
+   { 0x00010040, 0x20663d8d, 0x02000400, 0x00010001 },
+   { 0x00110001, 0x2066022d, 0x00000095, 0x00000000 },
+   { 0x02600005, 0x20000c20, 0x00000090, 0x00002000 },
+   { 0x02600005, 0x20001d20, 0x02000066, 0x00000001 },
+   { 0x00000006, 0x20881c21, 0x00000088, 0x00200000 },
+   { 0x00200001, 0x20840129, 0x00450064, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00800041, 0x24002528, 0x00000066, 0x00000044 },
+   { 0x00800040, 0x24002508, 0x008d0400, 0x00000064 },
+   { 0x00000009, 0x20a82d02, 0x00000404, 0x00060006 },
+   { 0x00802001, 0x20c00022, 0x008d0080, 0x00000000 },
+   { 0x00802001, 0x20800061, 0x00000000, 0x00000000 },
+   { 0x05800031, 0x20001d0c, 0x508d0000, 0x06080300 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000001e },
+   { 0x02600005, 0x20000c20, 0x00000090, 0x00004000 },
+   { 0x00210002, 0x20700421, 0x02450038, 0x00450030 },
+   { 0x00000001, 0x20680129, 0x0000009c, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x02600005, 0x20001d20, 0x0000009c, 0x00000010 },
+   { 0x00210002, 0x20700421, 0x02450028, 0x00450020 },
+   { 0x01600005, 0x20001d20, 0x0200009c, 0x00000001 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000006 },
+   { 0x02610005, 0x20001c20, 0x02000090, 0x00008000 },
+   { 0x02610005, 0x20001d20, 0x0200009c, 0x00000010 },
+   { 0x00010006, 0x20681d29, 0x0200009c, 0x00000010 },
+   { 0x02800005, 0x2000452c, 0x02000068, 0x000d0070 },
+   { 0x00000009, 0x20843d09, 0x00000602, 0x000c000c },
+   { 0x00000005, 0x20862d09, 0x00000602, 0xf000f000 },
+   { 0x00200001, 0x40840231, 0x00660064, 0x00000000 },
+   { 0x00200001, 0x20640229, 0x004500d4, 0x00000000 },
+   { 0x02600005, 0x20000c20, 0x020000d0, 0x01000000 },
+   { 0x02600005, 0x20000c20, 0x000000d0, 0x00004000 },
+   { 0x00600041, 0x24003e2c, 0x000000d5, 0x00020002 },
+   { 0x00110001, 0x2066018d, 0x02000400, 0x00000000 },
+   { 0x00010040, 0x20663d8d, 0x02000400, 0x00010001 },
+   { 0x00110001, 0x2066022d, 0x000000d5, 0x00000000 },
+   { 0x02600005, 0x20000c20, 0x000000d0, 0x00002000 },
+   { 0x02600005, 0x20001d20, 0x02000066, 0x00000001 },
+   { 0x00000006, 0x20c81c21, 0x000000c8, 0x00200000 },
+   { 0x00200001, 0x20c40129, 0x00450064, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00800041, 0x24002528, 0x00000066, 0x00000044 },
+   { 0x00800040, 0x24002508, 0x008d0400, 0x00000064 },
+   { 0x00000009, 0x20a82d02, 0x00000404, 0x00060006 },
+   { 0x00802001, 0x20c00022, 0x008d00c0, 0x00000000 },
+   { 0x00802001, 0x20c00061, 0x00000000, 0x00000000 },
+   { 0x05800031, 0x20001d0c, 0x508d0000, 0x06080300 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000001e },
+   { 0x02600005, 0x20000c20, 0x000000d0, 0x00004000 },
+   { 0x00210002, 0x20700421, 0x02450038, 0x00450030 },
+   { 0x00000001, 0x20680129, 0x000000dc, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x02600005, 0x20001d20, 0x000000dc, 0x00000010 },
+   { 0x00210002, 0x20700421, 0x02450028, 0x00450020 },
+   { 0x01600005, 0x20001d20, 0x020000dc, 0x00000001 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000006 },
+   { 0x02610005, 0x20001c20, 0x020000d0, 0x00008000 },
+   { 0x02610005, 0x20001d20, 0x020000dc, 0x00000010 },
+   { 0x00010006, 0x20681d29, 0x020000dc, 0x00000010 },
+   { 0x02800005, 0x2000452c, 0x02000068, 0x000d0070 },
+   { 0x00000009, 0x20c43d09, 0x00000602, 0x000c000c },
+   { 0x00000005, 0x20c62d09, 0x00000602, 0xf000f000 },
+   { 0x00200001, 0x40c40231, 0x00660064, 0x00000000 },
+   { 0x00200001, 0x20640229, 0x00450114, 0x00000000 },
+   { 0x02600005, 0x20000c20, 0x02000110, 0x01000000 },
+   { 0x02600005, 0x20000c20, 0x00000110, 0x00004000 },
+   { 0x00600041, 0x24003e2c, 0x00000115, 0x00020002 },
+   { 0x00110001, 0x2066018d, 0x02000400, 0x00000000 },
+   { 0x00010040, 0x20663d8d, 0x02000400, 0x00010001 },
+   { 0x00110001, 0x2066022d, 0x00000115, 0x00000000 },
+   { 0x02600005, 0x20000c20, 0x00000110, 0x00002000 },
+   { 0x02600005, 0x20001d20, 0x02000066, 0x00000001 },
+   { 0x00000006, 0x21081c21, 0x00000108, 0x00200000 },
+   { 0x00200001, 0x21040129, 0x00450064, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00800041, 0x24002528, 0x00000066, 0x00000044 },
+   { 0x00800040, 0x24002508, 0x008d0400, 0x00000064 },
+   { 0x00000009, 0x20a82d02, 0x00000404, 0x00060006 },
+   { 0x00802001, 0x20c00022, 0x008d0100, 0x00000000 },
+   { 0x00802001, 0x21000061, 0x00000000, 0x00000000 },
+   { 0x05800031, 0x20001d0c, 0x508d0000, 0x06080300 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000001e },
+   { 0x02600005, 0x20000c20, 0x00000110, 0x00004000 },
+   { 0x00210002, 0x20700421, 0x02450038, 0x00450030 },
+   { 0x00000001, 0x20680129, 0x0000011c, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x02600005, 0x20001d20, 0x0000011c, 0x00000010 },
+   { 0x00210002, 0x20700421, 0x02450028, 0x00450020 },
+   { 0x01600005, 0x20001d20, 0x0200011c, 0x00000001 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000006 },
+   { 0x02610005, 0x20001c20, 0x02000110, 0x00008000 },
+   { 0x02610005, 0x20001d20, 0x0200011c, 0x00000010 },
+   { 0x00010006, 0x20681d29, 0x0200011c, 0x00000010 },
+   { 0x02800005, 0x2000452c, 0x02000068, 0x000d0070 },
+   { 0x00000009, 0x21043d09, 0x00000602, 0x000c000c },
+   { 0x00000005, 0x21062d09, 0x00000602, 0xf000f000 },
+   { 0x00200001, 0x41040231, 0x00660064, 0x00000000 },
+   { 0x00200001, 0x20640229, 0x00450154, 0x00000000 },
+   { 0x02600005, 0x20000c20, 0x02000150, 0x01000000 },
+   { 0x02600005, 0x20000c20, 0x00000150, 0x00004000 },
+   { 0x00600041, 0x24003e2c, 0x00000155, 0x00020002 },
+   { 0x00110001, 0x2066018d, 0x02000400, 0x00000000 },
+   { 0x00010040, 0x20663d8d, 0x02000400, 0x00010001 },
+   { 0x00110001, 0x2066022d, 0x00000155, 0x00000000 },
+   { 0x02600005, 0x20000c20, 0x00000150, 0x00002000 },
+   { 0x02600005, 0x20001d20, 0x02000066, 0x00000001 },
+   { 0x00000006, 0x21481c21, 0x00000148, 0x00200000 },
+   { 0x00200001, 0x21440129, 0x00450064, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00800041, 0x24002528, 0x00000066, 0x00000044 },
+   { 0x00800040, 0x24002508, 0x008d0400, 0x00000064 },
+   { 0x00000009, 0x20a82d02, 0x00000404, 0x00060006 },
+   { 0x00802001, 0x20c00022, 0x008d0140, 0x00000000 },
+   { 0x00802001, 0x21400061, 0x00000000, 0x00000000 },
+   { 0x05800031, 0x20001d0c, 0x508d0000, 0x06080300 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000001e },
+   { 0x02600005, 0x20000c20, 0x00000150, 0x00004000 },
+   { 0x00210002, 0x20700421, 0x02450038, 0x00450030 },
+   { 0x00000001, 0x20680129, 0x0000015c, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x02600005, 0x20001d20, 0x0000015c, 0x00000010 },
+   { 0x00210002, 0x20700421, 0x02450028, 0x00450020 },
+   { 0x01600005, 0x20001d20, 0x0200015c, 0x00000001 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000006 },
+   { 0x02610005, 0x20001c20, 0x02000150, 0x00008000 },
+   { 0x02610005, 0x20001d20, 0x0200015c, 0x00000010 },
+   { 0x00010006, 0x20681d29, 0x0200015c, 0x00000010 },
+   { 0x02800005, 0x2000452c, 0x02000068, 0x000d0070 },
+   { 0x00000009, 0x21443d09, 0x00000602, 0x000c000c },
+   { 0x00000005, 0x21462d09, 0x00000602, 0xf000f000 },
+   { 0x00200001, 0x41440231, 0x00660064, 0x00000000 },
+   { 0x00200001, 0x20640229, 0x00450194, 0x00000000 },
+   { 0x02600005, 0x20000c20, 0x02000190, 0x01000000 },
+   { 0x02600005, 0x20000c20, 0x00000190, 0x00004000 },
+   { 0x00600041, 0x24003e2c, 0x00000195, 0x00020002 },
+   { 0x00110001, 0x2066018d, 0x02000400, 0x00000000 },
+   { 0x00010040, 0x20663d8d, 0x02000400, 0x00010001 },
+   { 0x00110001, 0x2066022d, 0x00000195, 0x00000000 },
+   { 0x02600005, 0x20000c20, 0x00000190, 0x00002000 },
+   { 0x02600005, 0x20001d20, 0x02000066, 0x00000001 },
+   { 0x00000006, 0x21881c21, 0x00000188, 0x00200000 },
+   { 0x00200001, 0x21840129, 0x00450064, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00800041, 0x24002528, 0x00000066, 0x00000044 },
+   { 0x00800040, 0x24002508, 0x008d0400, 0x00000064 },
+   { 0x00000009, 0x20a82d02, 0x00000404, 0x00060006 },
+   { 0x00802001, 0x20c00022, 0x008d0180, 0x00000000 },
+   { 0x00802001, 0x21800061, 0x00000000, 0x00000000 },
+   { 0x05800031, 0x20001d0c, 0x508d0000, 0x06080300 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000001e },
+   { 0x02600005, 0x20000c20, 0x00000190, 0x00004000 },
+   { 0x00210002, 0x20700421, 0x02450038, 0x00450030 },
+   { 0x00000001, 0x20680129, 0x0000019c, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x02600005, 0x20001d20, 0x0000019c, 0x00000010 },
+   { 0x00210002, 0x20700421, 0x02450028, 0x00450020 },
+   { 0x01600005, 0x20001d20, 0x0200019c, 0x00000001 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000006 },
+   { 0x02610005, 0x20001c20, 0x02000190, 0x00008000 },
+   { 0x02610005, 0x20001d20, 0x0200019c, 0x00000010 },
+   { 0x00010006, 0x20681d29, 0x0200019c, 0x00000010 },
+   { 0x02800005, 0x2000452c, 0x02000068, 0x000d0070 },
+   { 0x00000009, 0x21843d09, 0x00000602, 0x000c000c },
+   { 0x00000005, 0x21862d09, 0x00000602, 0xf000f000 },
+   { 0x00200001, 0x41840231, 0x00660064, 0x00000000 },
+   { 0x00200001, 0x20640229, 0x004501d4, 0x00000000 },
+   { 0x02600005, 0x20000c20, 0x020001d0, 0x01000000 },
+   { 0x02600005, 0x20000c20, 0x000001d0, 0x00004000 },
+   { 0x00600041, 0x24003e2c, 0x000001d5, 0x00020002 },
+   { 0x00110001, 0x2066018d, 0x02000400, 0x00000000 },
+   { 0x00010040, 0x20663d8d, 0x02000400, 0x00010001 },
+   { 0x00110001, 0x2066022d, 0x000001d5, 0x00000000 },
+   { 0x02600005, 0x20000c20, 0x000001d0, 0x00002000 },
+   { 0x02600005, 0x20001d20, 0x02000066, 0x00000001 },
+   { 0x00000006, 0x21c81c21, 0x000001c8, 0x00200000 },
+   { 0x00200001, 0x21c40129, 0x00450064, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00800041, 0x24002528, 0x00000066, 0x00000044 },
+   { 0x00800040, 0x24002508, 0x008d0400, 0x00000064 },
+   { 0x00000009, 0x20a82d02, 0x00000404, 0x00060006 },
+   { 0x00802001, 0x20c00022, 0x008d01c0, 0x00000000 },
+   { 0x00802001, 0x21c00061, 0x00000000, 0x00000000 },
+   { 0x05800031, 0x20001d0c, 0x508d0000, 0x06080300 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000001e },
+   { 0x02600005, 0x20000c20, 0x000001d0, 0x00004000 },
+   { 0x00210002, 0x20700421, 0x02450038, 0x00450030 },
+   { 0x00000001, 0x20680129, 0x000001dc, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x02600005, 0x20001d20, 0x000001dc, 0x00000010 },
+   { 0x00210002, 0x20700421, 0x02450028, 0x00450020 },
+   { 0x01600005, 0x20001d20, 0x020001dc, 0x00000001 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000006 },
+   { 0x02610005, 0x20001c20, 0x020001d0, 0x00008000 },
+   { 0x02610005, 0x20001d20, 0x020001dc, 0x00000010 },
+   { 0x00010006, 0x20681d29, 0x020001dc, 0x00000010 },
+   { 0x02800005, 0x2000452c, 0x02000068, 0x000d0070 },
+   { 0x00000009, 0x21c43d09, 0x00000602, 0x000c000c },
+   { 0x00000005, 0x21c62d09, 0x00000602, 0xf000f000 },
+   { 0x00200001, 0x41c40231, 0x00660064, 0x00000000 },
+   { 0x00200001, 0x20640229, 0x00450214, 0x00000000 },
+   { 0x02600005, 0x20000c20, 0x02000210, 0x01000000 },
+   { 0x02600005, 0x20000c20, 0x00000210, 0x00004000 },
+   { 0x00600041, 0x24003e2c, 0x00000215, 0x00020002 },
+   { 0x00110001, 0x2066018d, 0x02000400, 0x00000000 },
+   { 0x00010040, 0x20663d8d, 0x02000400, 0x00010001 },
+   { 0x00110001, 0x2066022d, 0x00000215, 0x00000000 },
+   { 0x02600005, 0x20000c20, 0x00000210, 0x00002000 },
+   { 0x02600005, 0x20001d20, 0x02000066, 0x00000001 },
+   { 0x00000006, 0x22081c21, 0x00000208, 0x00200000 },
+   { 0x00200001, 0x22040129, 0x00450064, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00800041, 0x24002528, 0x00000066, 0x00000044 },
+   { 0x00800040, 0x24002508, 0x008d0400, 0x00000064 },
+   { 0x00000009, 0x20a82d02, 0x00000404, 0x00060006 },
+   { 0x00802001, 0x20c00022, 0x008d0200, 0x00000000 },
+   { 0x00802001, 0x22000061, 0x00000000, 0x00000000 },
+   { 0x05800031, 0x20001d0c, 0x508d0000, 0x06080300 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000001e },
+   { 0x02600005, 0x20000c20, 0x00000210, 0x00004000 },
+   { 0x00210002, 0x20700421, 0x02450038, 0x00450030 },
+   { 0x00000001, 0x20680129, 0x0000021c, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x02600005, 0x20001d20, 0x0000021c, 0x00000010 },
+   { 0x00210002, 0x20700421, 0x02450028, 0x00450020 },
+   { 0x01600005, 0x20001d20, 0x0200021c, 0x00000001 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000006 },
+   { 0x02610005, 0x20001c20, 0x02000210, 0x00008000 },
+   { 0x02610005, 0x20001d20, 0x0200021c, 0x00000010 },
+   { 0x00010006, 0x20681d29, 0x0200021c, 0x00000010 },
+   { 0x02800005, 0x2000452c, 0x02000068, 0x000d0070 },
+   { 0x00000009, 0x22043d09, 0x00000602, 0x000c000c },
+   { 0x00000005, 0x22062d09, 0x00000602, 0xf000f000 },
+   { 0x00200001, 0x42040231, 0x00660064, 0x00000000 },
+   { 0x00200001, 0x20640229, 0x00450254, 0x00000000 },
+   { 0x02600005, 0x20000c20, 0x02000250, 0x01000000 },
+   { 0x02600005, 0x20000c20, 0x00000250, 0x00004000 },
+   { 0x00600041, 0x24003e2c, 0x00000255, 0x00020002 },
+   { 0x00110001, 0x2066018d, 0x02000400, 0x00000000 },
+   { 0x00010040, 0x20663d8d, 0x02000400, 0x00010001 },
+   { 0x00110001, 0x2066022d, 0x00000255, 0x00000000 },
+   { 0x02600005, 0x20000c20, 0x00000250, 0x00002000 },
+   { 0x02600005, 0x20001d20, 0x02000066, 0x00000001 },
+   { 0x00000006, 0x22481c21, 0x00000248, 0x00200000 },
+   { 0x00200001, 0x22440129, 0x00450064, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00800041, 0x24002528, 0x00000066, 0x00000044 },
+   { 0x00800040, 0x24002508, 0x008d0400, 0x00000064 },
+   { 0x00000009, 0x20a82d02, 0x00000404, 0x00060006 },
+   { 0x00802001, 0x20c00022, 0x008d0240, 0x00000000 },
+   { 0x00802001, 0x22400061, 0x00000000, 0x00000000 },
+   { 0x05800031, 0x20001d0c, 0x508d0000, 0x06080300 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000001e },
+   { 0x02600005, 0x20000c20, 0x00000250, 0x00004000 },
+   { 0x00210002, 0x20700421, 0x02450038, 0x00450030 },
+   { 0x00000001, 0x20680129, 0x0000025c, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x02600005, 0x20001d20, 0x0000025c, 0x00000010 },
+   { 0x00210002, 0x20700421, 0x02450028, 0x00450020 },
+   { 0x01600005, 0x20001d20, 0x0200025c, 0x00000001 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000006 },
+   { 0x02610005, 0x20001c20, 0x02000250, 0x00008000 },
+   { 0x02610005, 0x20001d20, 0x0200025c, 0x00000010 },
+   { 0x00010006, 0x20681d29, 0x0200025c, 0x00000010 },
+   { 0x02800005, 0x2000452c, 0x02000068, 0x000d0070 },
+   { 0x00000009, 0x22443d09, 0x00000602, 0x000c000c },
+   { 0x00000005, 0x22462d09, 0x00000602, 0xf000f000 },
+   { 0x00200001, 0x42440231, 0x00660064, 0x00000000 },
+   { 0x01000040, 0x20423dad, 0x00000042, 0xfff8fff8 },
+   { 0x00600001, 0x21600022, 0x008d07e0, 0x00000000 },
+   { 0x00802001, 0x21800022, 0x008d0080, 0x00000000 },
+   { 0x00802001, 0x21c00022, 0x008d00c0, 0x00000000 },
+   { 0x0b800031, 0x20001d0c, 0x508d0000, 0x0a080400 },
+   { 0x00600001, 0x20200022, 0x008d07e0, 0x00000000 },
+   { 0x00000040, 0x20280c22, 0x000007e8, 0x00000080 },
+   { 0x00802001, 0x20400022, 0x008d0100, 0x00000000 },
+   { 0x00802001, 0x20800022, 0x008d0140, 0x00000000 },
+   { 0x01800031, 0x20001d0c, 0x508d0000, 0x0a080400 },
+   { 0x00000040, 0x21680c22, 0x000007e8, 0x00000100 },
+   { 0x00802001, 0x21800022, 0x008d0180, 0x00000000 },
+   { 0x00802001, 0x21c00022, 0x008d01c0, 0x00000000 },
+   { 0x0b800031, 0x20001d0c, 0x508d0000, 0x0a080400 },
+   { 0x00000040, 0x20280c22, 0x000007e8, 0x00000180 },
+   { 0x00802001, 0x20400022, 0x008d0200, 0x00000000 },
+   { 0x00802001, 0x20800022, 0x008d0240, 0x00000000 },
+   { 0x01800031, 0x20001d0c, 0x508d0000, 0x0a080400 },
+   { 0x00000040, 0x27e80c21, 0x000007e8, 0x00000200 },
+   { 0x01000010, 0x20002da0, 0x02000060, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0xfffffda0 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000006e },
+   { 0x00600001, 0x20200022, 0x008d07e0, 0x00000000 },
+   { 0x01800031, 0x20801d09, 0x408d0000, 0x02488400 },
+   { 0x00600001, 0x20400022, 0x008d07e0, 0x00000000 },
+   { 0x00000040, 0x20480c22, 0x000007e8, 0x00000080 },
+   { 0x02800031, 0x21001d09, 0x408d0000, 0x02488400 },
+   { 0x00600001, 0x20600022, 0x008d07e0, 0x00000000 },
+   { 0x00000040, 0x20680c22, 0x000007e8, 0x00000100 },
+   { 0x03800031, 0x21801d09, 0x408d0000, 0x02488400 },
+   { 0x00600001, 0x20800022, 0x008d07e0, 0x00000000 },
+   { 0x00000040, 0x20880c22, 0x000007e8, 0x00000180 },
+   { 0x04800031, 0x22001d09, 0x408d0000, 0x02488400 },
+   { 0x00000001, 0x220400e0, 0x00000000, 0x00a00080 },
+   { 0x00600001, 0x20a00022, 0x008d0000, 0x00000000 },
+   { 0x00200001, 0x20640229, 0x00458814, 0x00000000 },
+   { 0x02600005, 0x20000c20, 0x02008810, 0x01000000 },
+   { 0x02600005, 0x20000c20, 0x00008810, 0x00004000 },
+   { 0x00600041, 0x24003e2c, 0x00008815, 0x00020002 },
+   { 0x00110001, 0x2066018d, 0x02000400, 0x00000000 },
+   { 0x00010040, 0x20663d8d, 0x02000400, 0x00010001 },
+   { 0x00110001, 0x2066022d, 0x00008815, 0x00000000 },
+   { 0x02600005, 0x20000c20, 0x00008810, 0x00002000 },
+   { 0x01000040, 0x20603dad, 0x02000060, 0xffffffff },
+   { 0x00000006, 0xa8080c21, 0x00008808, 0x00200000 },
+   { 0x00200001, 0xa8040129, 0x00450064, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00800041, 0x24002528, 0x00000066, 0x00000044 },
+   { 0x00800040, 0x24002508, 0x008d0400, 0x00000064 },
+   { 0x00000009, 0x20a82d02, 0x00000404, 0x00060006 },
+   { 0x00802001, 0x20c00022, 0x008d8800, 0x00000000 },
+   { 0x00802001, 0xa8000061, 0x00000000, 0x00000000 },
+   { 0x05800031, 0x20001d0c, 0x508d0000, 0x06080300 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000022 },
+   { 0x02600005, 0x20000d20, 0x02000066, 0x00000001 },
+   { 0x02600005, 0x20000c20, 0x00008810, 0x00004000 },
+   { 0x00000001, 0x20680129, 0x0000881c, 0x00000000 },
+   { 0x00210002, 0x20700421, 0x02450038, 0x00450030 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x02600005, 0x20001d20, 0x0000881c, 0x00000010 },
+   { 0x00210002, 0x20700421, 0x02450028, 0x00450020 },
+   { 0x01600005, 0x20001d20, 0x0200881c, 0x00000001 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000006 },
+   { 0x02610005, 0x20001c20, 0x02008810, 0x00008000 },
+   { 0x02610005, 0x20001d20, 0x0200881c, 0x00000010 },
+   { 0x00010006, 0x20681d29, 0x0200881c, 0x00000010 },
+   { 0x02800005, 0x2000452c, 0x00000068, 0x000d0070 },
+   { 0x01000040, 0x20603dad, 0x02000060, 0x00000000 },
+   { 0x00000009, 0xa8043d09, 0x00000600, 0x000c000c },
+   { 0x00000005, 0xa8062d09, 0x00000600, 0xf000f000 },
+   { 0x00200001, 0xc8040231, 0x00660064, 0x00000000 },
+   { 0x00600001, 0x21600022, 0x008d07e0, 0x00000000 },
+   { 0x00802001, 0x21800022, 0x008d8800, 0x00000000 },
+   { 0x0b800031, 0x20001d0c, 0x508d0000, 0x06080300 },
+   { 0x00000040, 0x27e80c21, 0x000007e8, 0x00000040 },
+   { 0x00000040, 0x22040c00, 0x00000204, 0x00400040 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0xffffffac },
+   { 0x01600031, 0x20001c20, 0x708d0000, 0x82000010 },
+   { 0x01600031, 0x20001c20, 0x708d0000, 0x82000010 },
+   { 0x01600031, 0x20001c20, 0x708d0000, 0x82000010 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
diff --git a/i965_drv_video/shaders/h264/mc/avc_mc.g4b.gen5 b/i965_drv_video/shaders/h264/mc/avc_mc.g4b.gen5
new file mode 100644 (file)
index 0000000..5a91f32
--- /dev/null
@@ -0,0 +1,2938 @@
+   { 0x00600001, 0x27c00021, 0x008d0000, 0x00000000 },
+   { 0x00200009, 0x27fa3e29, 0x00450064, 0x00040004 },
+   { 0x00000040, 0x27e01c01, 0x00001400, 0x00000020 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000000d4 },
+   { 0x00000005, 0x220e3e2c, 0x00000070, 0x000f000f },
+   { 0x00000001, 0x26a00221, 0x00009c38, 0x00000000 },
+   { 0x00000220, 0x34001400, 0x00001400, 0x000006a0 },
+   { 0x00a02001, 0x24000229, 0x00b10624, 0x00000000 },
+   { 0x00a02001, 0x24400229, 0x00b10624, 0x00000000 },
+   { 0x00a02001, 0x24800229, 0x00b10624, 0x00000000 },
+   { 0x00a02001, 0x24c00229, 0x00b10624, 0x00000000 },
+   { 0x00a02001, 0x25000229, 0x00b10624, 0x00000000 },
+   { 0x00a02001, 0x25400229, 0x00b10624, 0x00000000 },
+   { 0x00a02001, 0x25800229, 0x00b10624, 0x00000000 },
+   { 0x00a02001, 0x25c00229, 0x00b10624, 0x00000000 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000086 },
+   { 0x00000001, 0x22080060, 0x00000000, 0x06440640 },
+   { 0x00a02001, 0x24000229, 0x00009003, 0x00000000 },
+   { 0x00a02001, 0x24400229, 0x0000900b, 0x00000000 },
+   { 0x00a02001, 0x24800229, 0x00009013, 0x00000000 },
+   { 0x00a02001, 0x24c00229, 0x0000901b, 0x00000000 },
+   { 0x00a02001, 0x25000229, 0x00009023, 0x00000000 },
+   { 0x00a02001, 0x25400229, 0x0000902b, 0x00000000 },
+   { 0x00a02001, 0x25800229, 0x00009033, 0x00000000 },
+   { 0x00a02001, 0x25c00229, 0x0000903b, 0x00000000 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000072 },
+   { 0x02600005, 0x20000c20, 0x0000006c, 0x00000002 },
+   { 0x00600005, 0x24000c20, 0x0000006c, 0x00000011 },
+   { 0x01600007, 0x20000c00, 0x028d0400, 0x00000011 },
+   { 0x00780001, 0x26240169, 0x00000000, 0x80808080 },
+   { 0x00780001, 0x66430231, 0x028d0624, 0x00000000 },
+   { 0x00780001, 0x66630231, 0x028d062c, 0x00000000 },
+   { 0x00780001, 0x26240231, 0x00cf0643, 0x00000000 },
+   { 0x00780001, 0x262c0231, 0x00cf0663, 0x00000000 },
+   { 0x00800040, 0x25e04629, 0x00cf0643, 0x00b10624 },
+   { 0x00600040, 0x25e02529, 0x008d05e0, 0x008d05f0 },
+   { 0x00400040, 0x25e02529, 0x006905e0, 0x006905e8 },
+   { 0x00200040, 0x25e02529, 0x004505e0, 0x004505e4 },
+   { 0x00a02040, 0x2400252c, 0x000005e0, 0x000005e2 },
+   { 0x00a02040, 0x24003d8c, 0x00b10400, 0x00100010 },
+   { 0x00a02008, 0x24003d89, 0x00b10400, 0x00050005 },
+   { 0x00a02008, 0x24403d89, 0x00b10400, 0x00050005 },
+   { 0x00a02008, 0x24803d89, 0x00b10400, 0x00050005 },
+   { 0x00a02008, 0x24c03d89, 0x00b10400, 0x00050005 },
+   { 0x00a02008, 0x25003d89, 0x00b10400, 0x00050005 },
+   { 0x00a02008, 0x25403d89, 0x00b10400, 0x00050005 },
+   { 0x00a02008, 0x25803d89, 0x00b10400, 0x00050005 },
+   { 0x00a02008, 0x25c03d89, 0x00b10400, 0x00050005 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000044 },
+   { 0x00600041, 0x26806e2d, 0x008d062c, 0x89abcdef },
+   { 0x00600041, 0x26906e2d, 0x008d0623, 0xfedcba98 },
+   { 0x00600041, 0x26a06e2d, 0x00cf0663, 0x89abcdef },
+   { 0x00600041, 0x26b06e2d, 0x00cf0643, 0x0fedcba9 },
+   { 0x00000041, 0x26be3e2d, 0x00000623, 0xfff8fff8 },
+   { 0x00802040, 0x268035ad, 0x008d4680, 0x008d0690 },
+   { 0x00802040, 0x268035ad, 0x008d0680, 0x008d0688 },
+   { 0x00802040, 0x268035ad, 0x008d0680, 0x008d0684 },
+   { 0x00802040, 0x268035ad, 0x008d0680, 0x008d0682 },
+   { 0x00800001, 0x240001ec, 0x00000000, 0x00200020 },
+   { 0x00200048, 0x24003dac, 0x00a00680, 0x00050005 },
+   { 0x00200008, 0x26e03d8d, 0x00450400, 0x00060006 },
+   { 0x00800001, 0x240001ec, 0x00000000, 0x00100010 },
+   { 0x00800048, 0x24003e2c, 0x00000633, 0x00100010 },
+   { 0x00800048, 0x26c03e2d, 0x0000067f, 0x00100010 },
+   { 0x00800048, 0x272055ad, 0x000006e0, 0x00b10040 },
+   { 0x00600041, 0x268055ad, 0x000006e2, 0x00ae0040 },
+   { 0x00600041, 0x26a055ad, 0x000006e2, 0x00ae0041 },
+   { 0x00a02040, 0x240035ac, 0x00b10720, 0x00b00680 },
+   { 0x80a02008, 0x44003d91, 0x00b10400, 0x00050005 },
+   { 0x00a02040, 0x240035ac, 0x00b10720, 0x00b00682 },
+   { 0x80a02008, 0x44403d91, 0x00b10400, 0x00050005 },
+   { 0x00a02040, 0x240035ac, 0x00b10720, 0x00b00684 },
+   { 0x80a02008, 0x44803d91, 0x00b10400, 0x00050005 },
+   { 0x00a02040, 0x240035ac, 0x00b10720, 0x00b00686 },
+   { 0x80a02008, 0x44c03d91, 0x00b10400, 0x00050005 },
+   { 0x00a02040, 0x240035ac, 0x00b10720, 0x00b00688 },
+   { 0x80a02008, 0x45003d91, 0x00b10400, 0x00050005 },
+   { 0x00a02040, 0x240035ac, 0x00b10720, 0x00b0068a },
+   { 0x80a02008, 0x45403d91, 0x00b10400, 0x00050005 },
+   { 0x00a02040, 0x240035ac, 0x00b10720, 0x00b0068c },
+   { 0x80a02008, 0x45803d91, 0x00b10400, 0x00050005 },
+   { 0x00a02040, 0x240035ac, 0x00b10720, 0x00b0068e },
+   { 0x80a02008, 0x45c03d91, 0x00b10400, 0x00050005 },
+   { 0x00000001, 0x22040060, 0x00000000, 0x00900080 },
+   { 0x01000005, 0x20000c20, 0x02000068, 0x00003c00 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000020 },
+   { 0x80802040, 0x440045b1, 0x008d8800, 0x00ae0400 },
+   { 0x80802040, 0x444045b1, 0x008d8820, 0x00ae0440 },
+   { 0x80802040, 0x448045b1, 0x008d8840, 0x00ae0480 },
+   { 0x80802040, 0x44c045b1, 0x008d8860, 0x00ae04c0 },
+   { 0x80802040, 0x441045b1, 0x008d8880, 0x00ae0410 },
+   { 0x80802040, 0x445045b1, 0x008d88a0, 0x00ae0450 },
+   { 0x80802040, 0x449045b1, 0x008d88c0, 0x00ae0490 },
+   { 0x80802040, 0x44d045b1, 0x008d88e0, 0x00ae04d0 },
+   { 0x80802040, 0x450045b1, 0x008d8900, 0x00ae0500 },
+   { 0x80802040, 0x454045b1, 0x008d8920, 0x00ae0540 },
+   { 0x80802040, 0x458045b1, 0x008d8940, 0x00ae0580 },
+   { 0x80802040, 0x45c045b1, 0x008d8960, 0x00ae05c0 },
+   { 0x80802040, 0x451045b1, 0x008d8980, 0x00ae0510 },
+   { 0x80802040, 0x455045b1, 0x008d89a0, 0x00ae0550 },
+   { 0x80802040, 0x459045b1, 0x008d89c0, 0x00ae0590 },
+   { 0x80802040, 0x45d045b1, 0x008d89e0, 0x00ae05d0 },
+   { 0x00000040, 0x22040c00, 0x00000204, 0x01800180 },
+   { 0x00000040, 0x27e01c01, 0x00001400, 0x00000020 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000000e2 },
+   { 0x00000040, 0x27e01c01, 0x00001400, 0x00000020 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000018 },
+   { 0x00000001, 0x27800021, 0x00000780, 0x00000000 },
+   { 0x00000001, 0x27a00021, 0x000007a0, 0x00000000 },
+   { 0x01600031, 0x20001c20, 0x708d0000, 0x82000010 },
+   { 0x00200440, 0x27c055a5, 0x004507fa, 0x0045002a },
+   { 0x00000801, 0x27c80061, 0x00000000, 0x0000001b },
+   { 0x00000040, 0x22000d20, 0x00000062, 0x02186000 },
+   { 0x01600031, 0x26200021, 0x408d07c0, 0x00000200 },
+   { 0x00000440, 0x27c43ca5, 0x000007c4, 0x00010001 },
+   { 0x00000801, 0x27c80061, 0x00000000, 0x000f0003 },
+   { 0x00000040, 0x22000c00, 0x00000200, 0x00100000 },
+   { 0x02600031, 0x26400021, 0x408d07c0, 0x00000200 },
+   { 0x00000001, 0x34000020, 0x000007e0, 0x00000000 },
+   { 0x00000008, 0x27fc3dad, 0x000007fc, 0x00010001 },
+   { 0x00200440, 0x27c055a5, 0x004507fa, 0x0045002a },
+   { 0x00000801, 0x27c80061, 0x00000000, 0x00000013 },
+   { 0x00000040, 0x22000c00, 0x00000200, 0xefffc001 },
+   { 0x01600031, 0x26200021, 0x408d07c0, 0x00000200 },
+   { 0x00000440, 0x27c43ca5, 0x000007c4, 0x00010001 },
+   { 0x00000801, 0x27c80061, 0x00000000, 0x00070003 },
+   { 0x05600031, 0x26400021, 0x408d07c0, 0x00000200 },
+   { 0x00000008, 0x220e3e2c, 0x0000006c, 0x00060006 },
+   { 0x00000001, 0x26a002a5, 0x00009c3c, 0x00000000 },
+   { 0x00000220, 0x34001400, 0x00001400, 0x000006a0 },
+   { 0x02600005, 0x20000c20, 0x0000006c, 0x00000002 },
+   { 0x00780001, 0x26240169, 0x00000000, 0x80808080 },
+   { 0x02400005, 0x20000c20, 0x0200006c, 0x00000001 },
+   { 0x00560001, 0x46420129, 0x02690624, 0x00000000 },
+   { 0x02400005, 0x20000c20, 0x0200006c, 0x00000010 },
+   { 0x00560001, 0x46520129, 0x0269062c, 0x00000000 },
+   { 0x00780001, 0x26240129, 0x00ae0642, 0x00000000 },
+   { 0x00800040, 0x24004629, 0x00b10624, 0x00650642 },
+   { 0x00600040, 0x24002529, 0x00650400, 0x00650404 },
+   { 0x00600040, 0x25202529, 0x00050400, 0x00050404 },
+   { 0x00600040, 0x25702529, 0x00050408, 0x0005040c },
+   { 0x00560001, 0x26240169, 0x00000000, 0x80808080 },
+   { 0x00460001, 0x26240129, 0x028a0652, 0x00000000 },
+   { 0x00560001, 0x46520129, 0x02690624, 0x00000000 },
+   { 0x02400005, 0x20000c20, 0x0200006c, 0x00000001 },
+   { 0x00560001, 0x46420169, 0x02000000, 0x80808080 },
+   { 0x00460001, 0x46420129, 0x0069062c, 0x00000000 },
+   { 0x00560001, 0x262c0129, 0x008a0642, 0x00000000 },
+   { 0x00600040, 0x24004629, 0x008d0624, 0x00650652 },
+   { 0x00600040, 0x24104629, 0x00650642, 0x008d062c },
+   { 0x00600040, 0x24002529, 0x00650400, 0x00650404 },
+   { 0x00600040, 0x25302529, 0x00050408, 0x0005040c },
+   { 0x00600040, 0x25602529, 0x00050400, 0x00050404 },
+   { 0x00a02040, 0x24003d2c, 0x00b10520, 0x00040004 },
+   { 0x00a02008, 0x24003d89, 0x00b10400, 0x00030003 },
+   { 0x00a02008, 0x24403d89, 0x00b10400, 0x00030003 },
+   { 0x00a02040, 0x24003d2c, 0x00b10560, 0x00040004 },
+   { 0x00a02008, 0x24803d89, 0x00b10400, 0x00030003 },
+   { 0x00a02008, 0x24c03d89, 0x00b10400, 0x00030003 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000048 },
+   { 0x00000001, 0x22080060, 0x00000000, 0x06440640 },
+   { 0x00a02001, 0x24000229, 0x00059002, 0x00000000 },
+   { 0x00a02001, 0x24400229, 0x0005900a, 0x00000000 },
+   { 0x00a02001, 0x24800229, 0x00059012, 0x00000000 },
+   { 0x00a02001, 0x24c00229, 0x0005901a, 0x00000000 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000003c },
+   { 0x00a02001, 0x24000229, 0x00b10624, 0x00000000 },
+   { 0x00a02001, 0x24400229, 0x00b10624, 0x00000000 },
+   { 0x00a02001, 0x24800229, 0x00b10624, 0x00000000 },
+   { 0x00a02001, 0x24c00229, 0x00b10624, 0x00000000 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000032 },
+   { 0x00600041, 0x26806e2d, 0x008d062c, 0x44332211 },
+   { 0x00600041, 0x26906e2d, 0x008d0622, 0xffeeddcc },
+   { 0x00600041, 0x26a06e2d, 0x00650652, 0x44332211 },
+   { 0x00600041, 0x26b06e2d, 0x00650642, 0x00ffeedd },
+   { 0x00200041, 0x26bc3e2d, 0x00450622, 0xfffcfffc },
+   { 0x00802040, 0x268035ad, 0x008d0680, 0x008d0690 },
+   { 0x00802040, 0x268035ad, 0x008d0680, 0x008d0688 },
+   { 0x00802040, 0x268035ad, 0x008d0680, 0x008d0684 },
+   { 0x00800001, 0x240001ec, 0x00000000, 0x00200020 },
+   { 0x00400048, 0x24003dac, 0x00a50680, 0x00220022 },
+   { 0x00400008, 0x26e03d8d, 0x00690400, 0x00060006 },
+   { 0x00800001, 0x240001ec, 0x00000000, 0x00100010 },
+   { 0x00800048, 0x24003e2c, 0x00050632, 0x00100010 },
+   { 0x00800048, 0x26c03e2d, 0x0005065e, 0x00100010 },
+   { 0x00800048, 0x272055ad, 0x000506e0, 0x00240044 },
+   { 0x00600041, 0x268055ad, 0x000506e4, 0x00440044 },
+   { 0x00600041, 0x26a055ad, 0x000506e4, 0x00440045 },
+   { 0x00a02040, 0x240035ac, 0x00b10720, 0x00050680 },
+   { 0x80a02008, 0x44003d91, 0x00b10400, 0x00050005 },
+   { 0x00a02040, 0x240035ac, 0x00b10720, 0x00050684 },
+   { 0x80a02008, 0x44403d91, 0x00b10400, 0x00050005 },
+   { 0x00a02040, 0x240035ac, 0x00b10720, 0x00050688 },
+   { 0x80a02008, 0x44803d91, 0x00b10400, 0x00050005 },
+   { 0x00a02040, 0x240035ac, 0x00b10720, 0x0005068c },
+   { 0x80a02008, 0x44c03d91, 0x00b10400, 0x00050005 },
+   { 0x00000040, 0x22063d8c, 0x00000204, 0x00100010 },
+   { 0x80802040, 0x640045b1, 0x008d8880, 0x00cf0400 },
+   { 0x80802040, 0x644045b1, 0x008d88a0, 0x00cf0440 },
+   { 0x80802040, 0x648045b1, 0x008d88c0, 0x00cf0480 },
+   { 0x80802040, 0x64c045b1, 0x008d88e0, 0x00cf04c0 },
+   { 0x80802040, 0x640245b1, 0x008d8900, 0x00cf0402 },
+   { 0x80802040, 0x644245b1, 0x008d8920, 0x00cf0442 },
+   { 0x80802040, 0x648245b1, 0x008d8940, 0x00cf0482 },
+   { 0x80802040, 0x64c245b1, 0x008d8960, 0x00cf04c2 },
+   { 0x00000401, 0x27c80061, 0x00000000, 0x0007000f },
+   { 0x00200801, 0x27c001a1, 0x004507fa, 0x00000000 },
+   { 0x00000040, 0x22000c00, 0x00000200, 0x08004000 },
+   { 0x00800001, 0x20400232, 0x00d20400, 0x00000000 },
+   { 0x00800001, 0x20500232, 0x00d20420, 0x00000000 },
+   { 0x00800001, 0x20600232, 0x00d20440, 0x00000000 },
+   { 0x00800001, 0x20700232, 0x00d20460, 0x00000000 },
+   { 0x00800001, 0x20800232, 0x00d20480, 0x00000000 },
+   { 0x00800001, 0x20900232, 0x00d204a0, 0x00000000 },
+   { 0x00800001, 0x20a00232, 0x00d204c0, 0x00000000 },
+   { 0x00800001, 0x20b00232, 0x00d204e0, 0x00000000 },
+   { 0x01600031, 0x27a00021, 0x508d07c0, 0x00000200 },
+   { 0x00000001, 0x34000020, 0x000007e0, 0x00000000 },
+   { 0x00000401, 0x27c80061, 0x00000000, 0x000f000f },
+   { 0x00200801, 0x27c001a1, 0x004507fa, 0x00000000 },
+   { 0x00000040, 0x22000c00, 0x00000200, 0x0ff04000 },
+   { 0x00000001, 0x22080060, 0x00000000, 0x04400400 },
+   { 0x00a02001, 0x20400232, 0x00d29000, 0x00000000 },
+   { 0x00a02001, 0x20500232, 0x00d29020, 0x00000000 },
+   { 0x00a02001, 0x20800232, 0x00d29080, 0x00000000 },
+   { 0x00a02001, 0x20900232, 0x00d290a0, 0x00000000 },
+   { 0x00a02001, 0x20c00232, 0x00d29100, 0x00000000 },
+   { 0x00a02001, 0x20d00232, 0x00d29120, 0x00000000 },
+   { 0x00a02001, 0x21000232, 0x00d29180, 0x00000000 },
+   { 0x00a02001, 0x21100232, 0x00d291a0, 0x00000000 },
+   { 0x01600031, 0x27800021, 0x508d07c0, 0x00000200 },
+   { 0x00000001, 0x34000020, 0x000007e0, 0x00000000 },
+   { 0x00600001, 0x27c00021, 0x008d0000, 0x00000000 },
+   { 0x00200009, 0x27fa3e29, 0x00450064, 0x00040004 },
+   { 0x00000040, 0x27e01c01, 0x00001400, 0x00000020 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0xffffff04 },
+   { 0x00000001, 0x220401ec, 0x00000000, 0x00800080 },
+   { 0x00000001, 0x22080060, 0x00000000, 0x04100400 },
+   { 0x00200008, 0x27006e2d, 0x0000006c, 0x00000040 },
+   { 0x00400001, 0x27400061, 0x00000000, 0x00000000 },
+   { 0x00800001, 0x272001a9, 0x00b10620, 0x00000000 },
+   { 0x02600005, 0x20001c20, 0x0000006c, 0x00000008 },
+   { 0x00110001, 0x27230231, 0x00000624, 0x00000000 },
+   { 0x00600001, 0x27420231, 0x00cf0643, 0x00000000 },
+   { 0x00110001, 0x27410231, 0x00000643, 0x00000000 },
+   { 0x00240001, 0x27400231, 0x00000623, 0x00000000 },
+   { 0x02000005, 0x20001c20, 0x0200006c, 0x00000002 },
+   { 0x00010001, 0x27400231, 0x02000624, 0x00000000 },
+   { 0x02000005, 0x20001c20, 0x0200006c, 0x00000001 },
+   { 0x00110001, 0x27420231, 0x02000623, 0x00000000 },
+   { 0x00000005, 0x26803e2d, 0x00000070, 0x000f000f },
+   { 0x00000001, 0x270801ad, 0x00000700, 0x00000000 },
+   { 0x00000040, 0x27e01c01, 0x00001400, 0x00000020 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000084 },
+   { 0x00000040, 0x22043d8c, 0x00000204, 0x00800080 },
+   { 0x00800001, 0x27200231, 0x00b10628, 0x00000000 },
+   { 0x02600005, 0x20001c20, 0x0200006c, 0x00000004 },
+   { 0x00680001, 0x272c0231, 0x028d0634, 0x00000000 },
+   { 0x00780001, 0x272c0231, 0x02000633, 0x00000000 },
+   { 0x00400001, 0x27420231, 0x00a0040e, 0x00000000 },
+   { 0x00400001, 0x27460231, 0x00a0044e, 0x00000000 },
+   { 0x00200001, 0x27400231, 0x0000062b, 0x00000000 },
+   { 0x02000005, 0x20001c20, 0x0200006c, 0x00000002 },
+   { 0x00010001, 0x27400231, 0x0200062c, 0x00000000 },
+   { 0x00110001, 0x27410231, 0x0200040e, 0x00000000 },
+   { 0x00000008, 0x26803e2d, 0x00000070, 0x00040004 },
+   { 0x00200040, 0x22083d8c, 0x00450208, 0x00800080 },
+   { 0x00000006, 0x27083dad, 0x00000700, 0x00010001 },
+   { 0x00000040, 0x27e01c01, 0x00001400, 0x00000020 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000064 },
+   { 0x00000040, 0x22043d8c, 0x00000204, 0x00800080 },
+   { 0x00a02001, 0xb3800231, 0x00d20400, 0x00000000 },
+   { 0x00a02001, 0xb3a00231, 0x00d20480, 0x00000000 },
+   { 0x00a02001, 0xb3c00231, 0x00d20440, 0x00000000 },
+   { 0x00a02001, 0xb3e00231, 0x00d204c0, 0x00000000 },
+   { 0x02000005, 0x20001c20, 0x00000060, 0x00000002 },
+   { 0x01000005, 0x20001c20, 0x0200006c, 0x00000001 },
+   { 0x01010005, 0x20001c20, 0x00000060, 0x00004000 },
+   { 0x02010005, 0x20001c20, 0x02000060, 0x00000010 },
+   { 0x00030001, 0x27230231, 0x0000065f, 0x00000000 },
+   { 0x00030220, 0x34001c00, 0x00001400, 0x00000006 },
+   { 0x02600005, 0x20001c20, 0x0000006c, 0x00000001 },
+   { 0x00460001, 0x27200231, 0x0069065c, 0x00000000 },
+   { 0x00110001, 0x27230231, 0x00000458, 0x00000000 },
+   { 0x00600001, 0x27240231, 0x008d0458, 0x00000000 },
+   { 0x00600001, 0x272c0231, 0x008d0478, 0x00000000 },
+   { 0x00600001, 0x27420231, 0x00cf0663, 0x00000000 },
+   { 0x00000001, 0x27400231, 0x00000458, 0x00000000 },
+   { 0x00010001, 0x27410231, 0x0000065f, 0x00000000 },
+   { 0x00110001, 0x27410231, 0x00000663, 0x00000000 },
+   { 0x02000005, 0x20001c20, 0x0200006c, 0x00000010 },
+   { 0x00110001, 0x27420231, 0x0200065f, 0x00000000 },
+   { 0x00000005, 0x26803e2d, 0x00000071, 0x000f000f },
+   { 0x00000006, 0x27083dad, 0x00000702, 0x00020002 },
+   { 0x00000040, 0x27e01c01, 0x00001400, 0x00000020 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000030 },
+   { 0x00000040, 0x22043d8c, 0x00000204, 0x00800080 },
+   { 0x00400001, 0x27200231, 0x0069045c, 0x00000000 },
+   { 0x00600001, 0x27240231, 0x008d0478, 0x00000000 },
+   { 0x00800001, 0x272c0231, 0x0000047f, 0x00000000 },
+   { 0x00400001, 0x27420231, 0x00a0048e, 0x00000000 },
+   { 0x00400001, 0x27460231, 0x00a004ce, 0x00000000 },
+   { 0x00000001, 0x27400231, 0x00000478, 0x00000000 },
+   { 0x00000001, 0x27410231, 0x0000045f, 0x00000000 },
+   { 0x00000008, 0x26803e2d, 0x00000071, 0x00040004 },
+   { 0x00200040, 0x22083d8c, 0x00450208, 0x00800080 },
+   { 0x00000006, 0x27083dad, 0x00000700, 0x00030003 },
+   { 0x00000040, 0x27e01c01, 0x00001400, 0x00000020 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000016 },
+   { 0x00a02001, 0xb3800231, 0x00d20480, 0x00000000 },
+   { 0x00a02001, 0xb3a00231, 0x00d20500, 0x00000000 },
+   { 0x00a02001, 0xb3c00231, 0x00d204c0, 0x00000000 },
+   { 0x00a02001, 0xb3e00231, 0x00d20540, 0x00000000 },
+   { 0x00000040, 0x27e01c01, 0x00001400, 0x00000020 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000100 },
+   { 0x00000040, 0x27e01c01, 0x00001400, 0x00000020 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0xfffffe74 },
+   { 0x00000001, 0x27800021, 0x00000780, 0x00000000 },
+   { 0x00000001, 0x27a00021, 0x000007a0, 0x00000000 },
+   { 0x01600031, 0x20001c20, 0x708d0000, 0x82000010 },
+   { 0x00000001, 0x27340231, 0x00000733, 0x00000000 },
+   { 0x00600001, 0x274a0231, 0x00000749, 0x00000000 },
+   { 0x00800040, 0x24003e2c, 0x00b10723, 0x00020002 },
+   { 0x00800048, 0x24003e2c, 0x00b10724, 0x00020002 },
+   { 0x00800048, 0x24003e2c, 0x00b10725, 0x00010001 },
+   { 0x00800008, 0x26c03d8d, 0x008d0400, 0x00020002 },
+   { 0x00800040, 0x24003e2c, 0x00b10740, 0x00020002 },
+   { 0x00800048, 0x24003e2c, 0x00b10741, 0x00020002 },
+   { 0x00800048, 0x24003e2c, 0x00b10742, 0x00010001 },
+   { 0x00800008, 0x26a03d8d, 0x008d0400, 0x00020002 },
+   { 0x00800001, 0x27240231, 0x00d206c0, 0x00000000 },
+   { 0x00600001, 0x27400231, 0x00ae06a2, 0x00000000 },
+   { 0x00000001, 0x27230231, 0x000006a0, 0x00000000 },
+   { 0x00000005, 0x220e3dac, 0x00000680, 0x000f000f },
+   { 0x00000001, 0x26a00221, 0x00009c2c, 0x00000000 },
+   { 0x00000220, 0x34001400, 0x00001400, 0x000006a0 },
+   { 0x80800040, 0xd00045b1, 0x00b18800, 0x000d0724 },
+   { 0x80800040, 0xd02045b1, 0x00b18820, 0x000d0724 },
+   { 0x80800040, 0xd04045b1, 0x00b18840, 0x000d0724 },
+   { 0x80800040, 0xd06045b1, 0x00b18860, 0x000d0724 },
+   { 0x00000001, 0x34000020, 0x000007e0, 0x00000000 },
+   { 0x80800040, 0xd00045b1, 0x00b18800, 0x002c0740 },
+   { 0x80800040, 0xd02045b1, 0x00b18820, 0x002c0742 },
+   { 0x80800040, 0xd04045b1, 0x00b18840, 0x002c0744 },
+   { 0x80800040, 0xd06045b1, 0x00b18860, 0x002c0746 },
+   { 0x00000001, 0x34000020, 0x000007e0, 0x00000000 },
+   { 0x02800005, 0x20003da0, 0x00000708, 0x00020002 },
+   { 0x02600005, 0x20003da0, 0x02000708, 0x00010001 },
+   { 0x009a0001, 0x27200169, 0x00000000, 0x80808080 },
+   { 0x00780001, 0x27400231, 0x028d0724, 0x00000000 },
+   { 0x00780001, 0x27240231, 0x008d0740, 0x00000000 },
+   { 0x00600040, 0x25e04629, 0x008d0724, 0x008d0740 },
+   { 0x00400040, 0x25e02529, 0x006905e0, 0x006905e8 },
+   { 0x00200040, 0x25e02529, 0x004505e0, 0x004505e4 },
+   { 0x00800040, 0x2400252c, 0x000005e0, 0x000005e2 },
+   { 0x00800040, 0x24003d8c, 0x008d0400, 0x00080008 },
+   { 0x00800008, 0x26803d8d, 0x008d0400, 0x00040004 },
+   { 0x80800040, 0xd00035b1, 0x00b18800, 0x00b10680 },
+   { 0x80800040, 0xd02035b1, 0x00b18820, 0x00b10680 },
+   { 0x80800040, 0xd04035b1, 0x00b18840, 0x00b10680 },
+   { 0x80800040, 0xd06035b1, 0x00b18860, 0x00b10680 },
+   { 0x00000001, 0x34000020, 0x000007e0, 0x00000000 },
+   { 0x00600001, 0x27340231, 0x008d0733, 0x00000000 },
+   { 0x00800040, 0x24003e2c, 0x00b10726, 0x00020002 },
+   { 0x00800048, 0x24003e2c, 0x00b10725, 0x00020002 },
+   { 0x00800048, 0x24003e2c, 0x00b10724, 0x00010001 },
+   { 0x00800008, 0x26803d8d, 0x00b10400, 0x00020002 },
+   { 0x80800040, 0xd00035b1, 0x00b18800, 0x002d0680 },
+   { 0x80800040, 0xd02035b1, 0x00b18820, 0x002d0684 },
+   { 0x80800040, 0xd04035b1, 0x00b18840, 0x002d0688 },
+   { 0x80800040, 0xd06035b1, 0x00b18860, 0x002d068c },
+   { 0x00000001, 0x34000020, 0x000007e0, 0x00000000 },
+   { 0x00400009, 0x26c05421, 0x00000744, 0x00690050 },
+   { 0x00400009, 0x26d05421, 0x00000740, 0x00690050 },
+   { 0x00600001, 0x26a00231, 0x00cf06c3, 0x00000000 },
+   { 0x00800001, 0x26a80231, 0x00b10723, 0x00000000 },
+   { 0x00800040, 0x24003e2c, 0x00b106a2, 0x00020002 },
+   { 0x00800048, 0x24003e2c, 0x00b106a1, 0x00020002 },
+   { 0x00800048, 0x24003e2c, 0x00b106a0, 0x00010001 },
+   { 0x00800008, 0x26a03d8d, 0x00b10400, 0x00020002 },
+   { 0x00200040, 0x220c3eac, 0x00450056, 0x06a006a0 },
+   { 0x80800040, 0xd06035b1, 0x01ed9800, 0x00b18860 },
+   { 0x80800040, 0xd04035b1, 0x01ed9804, 0x00b18840 },
+   { 0x80800040, 0xd02035b1, 0x01ed9808, 0x00b18820 },
+   { 0x80800040, 0xd00035b1, 0x01ed980c, 0x00b18800 },
+   { 0x00000001, 0x34000020, 0x000007e0, 0x00000000 },
+   { 0x00400009, 0x26c05421, 0x00000744, 0x00690050 },
+   { 0x00400009, 0x26d05421, 0x00000740, 0x00690050 },
+   { 0x00600001, 0x26a00231, 0x00cf06c3, 0x00000000 },
+   { 0x00800001, 0x26a80231, 0x00b10723, 0x00000000 },
+   { 0x00800042, 0x25c04629, 0x00b106a8, 0x00b106a9 },
+   { 0x00800040, 0x24003e2c, 0x00b106a3, 0x00020002 },
+   { 0x00800048, 0x24003e2c, 0x00b106a2, 0x00020002 },
+   { 0x00800048, 0x24003e2c, 0x00b106a1, 0x00010001 },
+   { 0x00800008, 0x26c03d8d, 0x008d0400, 0x00020002 },
+   { 0x00600001, 0x26a00231, 0x00ae06c0, 0x00000000 },
+   { 0x00600001, 0x46a60231, 0x00ae06cc, 0x00000000 },
+   { 0x00600001, 0x46a70231, 0x00ae05c0, 0x00000000 },
+   { 0x00200040, 0x220c3eac, 0x00450036, 0x06a006a0 },
+   { 0x80800040, 0xd0603631, 0x01ee9800, 0x00b18860 },
+   { 0x80800040, 0xd0403631, 0x01ee9802, 0x00b18840 },
+   { 0x80800040, 0xd0203631, 0x01ee9804, 0x00b18820 },
+   { 0x80800040, 0xd0003631, 0x01ee9806, 0x00b18800 },
+   { 0x00000001, 0x34000020, 0x000007e0, 0x00000000 },
+   { 0x00400009, 0x26c05421, 0x00000744, 0x00690050 },
+   { 0x00400009, 0x26d05421, 0x00000740, 0x00690050 },
+   { 0x00600001, 0x26a00231, 0x00ab06c3, 0x00000000 },
+   { 0x00800001, 0x26a80231, 0x00b10723, 0x00000000 },
+   { 0x00800040, 0x24003e2c, 0x00b106a2, 0x00020002 },
+   { 0x00800048, 0x24003e2c, 0x00b106a1, 0x00020002 },
+   { 0x00800048, 0x24003e2c, 0x00b106a0, 0x00010001 },
+   { 0x00800008, 0x25c03d89, 0x008d0400, 0x00020002 },
+   { 0x00800042, 0x26a0462d, 0x00b106a0, 0x00b106a1 },
+   { 0x00600001, 0x46a10231, 0x00ae05c0, 0x00000000 },
+   { 0x00600001, 0x26b00231, 0x00ae05d0, 0x00000000 },
+   { 0x00200040, 0x220c3eac, 0x00450056, 0x06a006a0 },
+   { 0x80800040, 0xd0603631, 0x01ed9800, 0x00b18860 },
+   { 0x80800040, 0xd0403631, 0x01ed9804, 0x00b18840 },
+   { 0x80800040, 0xd0203631, 0x01ed9808, 0x00b18820 },
+   { 0x80800040, 0xd0003631, 0x01ed980c, 0x00b18800 },
+   { 0x00000001, 0x34000020, 0x000007e0, 0x00000000 },
+   { 0x00800042, 0x25c04629, 0x00b10724, 0x00b10725 },
+   { 0x00800040, 0x24003e2c, 0x00b10726, 0x00020002 },
+   { 0x00800048, 0x24003e2c, 0x00b10725, 0x00020002 },
+   { 0x00800048, 0x24003e2c, 0x00b10724, 0x00010001 },
+   { 0x00800008, 0x25e03d89, 0x00240400, 0x00020002 },
+   { 0x80800040, 0xd0003531, 0x00ad05c0, 0x00b18800 },
+   { 0x80800040, 0xd0203531, 0x00ad05c2, 0x00b18820 },
+   { 0x80800040, 0xd0403531, 0x00ad05c4, 0x00b18840 },
+   { 0x80800040, 0xd0603531, 0x00ad05c6, 0x00b18860 },
+   { 0x00000001, 0x34000020, 0x000007e0, 0x00000000 },
+   { 0x00600001, 0x27480231, 0x00000747, 0x00000000 },
+   { 0x00800042, 0x25c04629, 0x00b10740, 0x00b10741 },
+   { 0x00800040, 0x24003e2c, 0x00b10742, 0x00020002 },
+   { 0x00800048, 0x24003e2c, 0x00b10741, 0x00020002 },
+   { 0x00800048, 0x24003e2c, 0x00b10740, 0x00010001 },
+   { 0x00800008, 0x25e03d89, 0x00240400, 0x00020002 },
+   { 0x00800001, 0x45c10231, 0x00d205e0, 0x00000000 },
+   { 0x80800040, 0xd0003631, 0x004d05c0, 0x00b18800 },
+   { 0x80800040, 0xd0203631, 0x004d05c4, 0x00b18820 },
+   { 0x80800040, 0xd0403631, 0x004d05c8, 0x00b18840 },
+   { 0x80800040, 0xd0603631, 0x004d05cc, 0x00b18860 },
+   { 0x00000001, 0x34000020, 0x000007e0, 0x00000000 },
+   { 0x00000401, 0x27c80061, 0x00000000, 0x000f000f },
+   { 0x00200801, 0x27c001a1, 0x004507fa, 0x00000000 },
+   { 0x00000040, 0x22000c00, 0x00000200, 0x0ff04000 },
+   { 0x00800001, 0x20400232, 0x00cd0400, 0x00000000 },
+   { 0x00800001, 0x20500232, 0x00cd0408, 0x00000000 },
+   { 0x00800001, 0x20600232, 0x00cd0410, 0x00000000 },
+   { 0x00800001, 0x20700232, 0x00cd0418, 0x00000000 },
+   { 0x00800001, 0x20800232, 0x00cd0440, 0x00000000 },
+   { 0x00800001, 0x20900232, 0x00cd0448, 0x00000000 },
+   { 0x00800001, 0x20a00232, 0x00cd0450, 0x00000000 },
+   { 0x00800001, 0x20b00232, 0x00cd0458, 0x00000000 },
+   { 0x00800001, 0x20c00232, 0x00cd0480, 0x00000000 },
+   { 0x00800001, 0x20d00232, 0x00cd0488, 0x00000000 },
+   { 0x00800001, 0x20e00232, 0x00cd0490, 0x00000000 },
+   { 0x00800001, 0x20f00232, 0x00cd0498, 0x00000000 },
+   { 0x00800001, 0x21000232, 0x00cd04c0, 0x00000000 },
+   { 0x00800001, 0x21100232, 0x00cd04c8, 0x00000000 },
+   { 0x00800001, 0x21200232, 0x00cd04d0, 0x00000000 },
+   { 0x00800001, 0x21300232, 0x00cd04d8, 0x00000000 },
+   { 0x01600031, 0x27800021, 0x508d07c0, 0x00000200 },
+   { 0x00000001, 0x34000020, 0x000007e0, 0x00000000 },
+   { 0x00600001, 0x27c00021, 0x008d0000, 0x00000000 },
+   { 0x00200009, 0x27fa3e29, 0x00450064, 0x00040004 },
+   { 0x00000040, 0x27e01c01, 0x00001400, 0x00000020 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0xfffffd34 },
+   { 0x00000001, 0x220401ec, 0x00000000, 0x00800080 },
+   { 0x00000001, 0x220601ec, 0x00000000, 0x04000400 },
+   { 0x00200008, 0x27006e2d, 0x0000006c, 0x00000040 },
+   { 0x02600005, 0x20003e20, 0x0000006c, 0x00040004 },
+   { 0x00780001, 0x26340231, 0x00000633, 0x00000000 },
+   { 0x00800001, 0x27200231, 0x00b10620, 0x00000000 },
+   { 0x00600001, 0x27400231, 0x00cf0643, 0x00000000 },
+   { 0x00400008, 0x26806e2d, 0x00240070, 0x00004040 },
+   { 0x00000040, 0x27e01c01, 0x00001400, 0x00000020 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000006a },
+   { 0x00000040, 0x22043d8c, 0x00000204, 0x00800080 },
+   { 0x00000006, 0x27003dad, 0x00000700, 0x00010001 },
+   { 0x00800001, 0x27200231, 0x00b10628, 0x00000000 },
+   { 0x00400001, 0x27400231, 0x00808c26, 0x00000000 },
+   { 0x00400001, 0x27440231, 0x00808c66, 0x00000000 },
+   { 0x00400008, 0x26806e2d, 0x00240072, 0x00004040 },
+   { 0x00000040, 0x22063d8c, 0x00000206, 0x00800080 },
+   { 0x00000040, 0x27e01c01, 0x00001400, 0x00000020 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000058 },
+   { 0x00000040, 0x22043d8c, 0x00000204, 0x00800080 },
+   { 0x00000006, 0x27003dad, 0x00000702, 0x00020002 },
+   { 0x00800001, 0xaf800231, 0x00d28f80, 0x00000000 },
+   { 0x00800001, 0xaf900231, 0x00d28fa0, 0x00000000 },
+   { 0x00800001, 0xafc00231, 0x00d28fc0, 0x00000000 },
+   { 0x00800001, 0xafd00231, 0x00d28fe0, 0x00000000 },
+   { 0x00800001, 0xafa00231, 0x00d28c00, 0x00000000 },
+   { 0x00800001, 0xafb00231, 0x00d28c20, 0x00000000 },
+   { 0x00800001, 0xafe00231, 0x00d28c40, 0x00000000 },
+   { 0x00800001, 0xaff00231, 0x00d28c60, 0x00000000 },
+   { 0x00400001, 0x27200231, 0x0069065c, 0x00000000 },
+   { 0x00600001, 0x27240231, 0x00a98fcc, 0x00000000 },
+   { 0x00600001, 0x272c0231, 0x00a98fec, 0x00000000 },
+   { 0x00600001, 0x27340231, 0x00008fff, 0x00000000 },
+   { 0x00800001, 0x2620012d, 0x00b10720, 0x00000000 },
+   { 0x00600001, 0x27400231, 0x00cf0663, 0x00000000 },
+   { 0x00400008, 0x26806e2d, 0x00240074, 0x00004040 },
+   { 0x00000040, 0x27e01c01, 0x00001400, 0x00000020 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000032 },
+   { 0x00000040, 0x22043d8c, 0x00000204, 0x00800080 },
+   { 0x00000006, 0x27003dad, 0x00000700, 0x00010001 },
+   { 0x00800001, 0x27200231, 0x00b10628, 0x00000000 },
+   { 0x00600001, 0x27300231, 0x008d0638, 0x00000000 },
+   { 0x00400001, 0x27400231, 0x00808c26, 0x00000000 },
+   { 0x00400001, 0x27440231, 0x00808c66, 0x00000000 },
+   { 0x00400008, 0x26806e2d, 0x00240076, 0x00004040 },
+   { 0x00000040, 0x22063d8c, 0x00000206, 0x00800080 },
+   { 0x00000040, 0x27e01c01, 0x00001400, 0x00000020 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000001e },
+   { 0x00800001, 0xaf800231, 0x00d28f80, 0x00000000 },
+   { 0x00800001, 0xaf900231, 0x00d28fa0, 0x00000000 },
+   { 0x00800001, 0xafc00231, 0x00d28fc0, 0x00000000 },
+   { 0x00800001, 0xafd00231, 0x00d28fe0, 0x00000000 },
+   { 0x00800001, 0xafa00231, 0x00d28c00, 0x00000000 },
+   { 0x00800001, 0xafb00231, 0x00d28c20, 0x00000000 },
+   { 0x00800001, 0xafe00231, 0x00d28c40, 0x00000000 },
+   { 0x00800001, 0xaff00231, 0x00d28c60, 0x00000000 },
+   { 0x00000040, 0x27e01c01, 0x00001400, 0x00000020 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000000e0 },
+   { 0x00000040, 0x27e01c01, 0x00001400, 0x00000020 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0xfffffcce },
+   { 0x00000001, 0x27800021, 0x00000780, 0x00000000 },
+   { 0x00000001, 0x27a00021, 0x000007a0, 0x00000000 },
+   { 0x01600031, 0x20001c20, 0x708d0000, 0x82000010 },
+   { 0x00600001, 0x27800231, 0x008d0740, 0x00000000 },
+   { 0x00400005, 0x22083dac, 0x00690680, 0x000f000f },
+   { 0x00400040, 0x26a04625, 0x01e09020, 0x00690058 },
+   { 0x00000001, 0x26d001ad, 0x00000700, 0x00000000 },
+   { 0x00000040, 0x27e41c01, 0x00001400, 0x00000020 },
+   { 0x00000220, 0x34001400, 0x00001400, 0x000006a0 },
+   { 0x80600040, 0xcc0035b1, 0x00898800, 0x008d0760 },
+   { 0x80600040, 0xcc1035b1, 0x00898820, 0x008d0770 },
+   { 0x00800001, 0x27200231, 0x008d0724, 0x00000000 },
+   { 0x00400001, 0x27400231, 0x00808c06, 0x00000000 },
+   { 0x00000006, 0x26d03dad, 0x00000700, 0x00010001 },
+   { 0x00000040, 0x27e41c01, 0x00001400, 0x00000020 },
+   { 0x00000220, 0x34001400, 0x00001400, 0x000006a4 },
+   { 0x80600040, 0xcc2035b1, 0x00898808, 0x008d0760 },
+   { 0x80600040, 0xcc3035b1, 0x00898828, 0x008d0770 },
+   { 0x00000001, 0x27230231, 0x00000783, 0x00000000 },
+   { 0x00400001, 0x27240231, 0x008a8c18, 0x00000000 },
+   { 0x00400001, 0x27280231, 0x008a8c38, 0x00000000 },
+   { 0x00400001, 0x272c0231, 0x00008c3e, 0x00000000 },
+   { 0x00400001, 0x27400231, 0x00690784, 0x00000000 },
+   { 0x00000006, 0x26d03dad, 0x00000700, 0x00020002 },
+   { 0x00000040, 0x27e41c01, 0x00001400, 0x00000020 },
+   { 0x00000220, 0x34001400, 0x00001400, 0x000006a8 },
+   { 0x80600040, 0xcc4035b1, 0x00898840, 0x008d0760 },
+   { 0x80600040, 0xcc5035b1, 0x00898860, 0x008d0770 },
+   { 0x00800001, 0x27200231, 0x008d0724, 0x00000000 },
+   { 0x00600001, 0x27280231, 0x00000727, 0x00000000 },
+   { 0x00400001, 0x27400231, 0x00808c46, 0x00000000 },
+   { 0x00000006, 0x26d03dad, 0x00000700, 0x00030003 },
+   { 0x00000040, 0x27e41c01, 0x00001400, 0x00000020 },
+   { 0x00000220, 0x34001400, 0x00001400, 0x000006ac },
+   { 0x80600040, 0xcc6035b1, 0x00898848, 0x008d0760 },
+   { 0x80600040, 0xcc7035b1, 0x00898868, 0x008d0770 },
+   { 0x00000001, 0x34000020, 0x000007e0, 0x00000000 },
+   { 0x00800001, 0x2760022d, 0x00090724, 0x00000000 },
+   { 0x00000001, 0x34000020, 0x000007e4, 0x00000000 },
+   { 0x00800001, 0x2760022d, 0x00280740, 0x00000000 },
+   { 0x00000001, 0x34000020, 0x000007e4, 0x00000000 },
+   { 0x02802005, 0x20003da0, 0x000006d0, 0x00020002 },
+   { 0x02802005, 0x20003da0, 0x020006d0, 0x00010001 },
+   { 0x009a0001, 0x27200169, 0x00000000, 0x80808080 },
+   { 0x00780001, 0x27400231, 0x028d0724, 0x00000000 },
+   { 0x00780001, 0x27240231, 0x008d0740, 0x00000000 },
+   { 0x00400040, 0x25e04629, 0x00690724, 0x00690740 },
+   { 0x00200040, 0x25e02529, 0x004505e0, 0x004505e4 },
+   { 0x00800040, 0x2400252c, 0x000005e0, 0x000005e2 },
+   { 0x00800040, 0x24003d8c, 0x008d0400, 0x00040004 },
+   { 0x00800008, 0x27603d8d, 0x008d0400, 0x00030003 },
+   { 0x00000001, 0x34000020, 0x000007e4, 0x00000000 },
+   { 0x00600001, 0x26c00231, 0x008d0724, 0x00000000 },
+   { 0x00400001, 0x26c80231, 0x0069072b, 0x00000000 },
+   { 0x00600040, 0x24003e2c, 0x008d06c2, 0x00020002 },
+   { 0x00600048, 0x24003e2c, 0x008d06c1, 0x00020002 },
+   { 0x00600048, 0x25e03e29, 0x008d06c0, 0x00010001 },
+   { 0x00800008, 0x27603d2d, 0x002905e0, 0x00020002 },
+   { 0x00000001, 0x34000020, 0x000007e4, 0x00000000 },
+   { 0x00400009, 0x26e05421, 0x00000740, 0x00690050 },
+   { 0x00600001, 0x26c40231, 0x008d0723, 0x00000000 },
+   { 0x00400001, 0x26c00231, 0x00ab06e3, 0x00000000 },
+   { 0x00600040, 0x24003e2c, 0x008d06c2, 0x00020002 },
+   { 0x00600048, 0x24003e2c, 0x008d06c1, 0x00020002 },
+   { 0x00600048, 0x26c03e2d, 0x008d06c0, 0x00010001 },
+   { 0x00400040, 0x22083eac, 0x00690054, 0x06c006c0 },
+   { 0x00800008, 0x27603dad, 0x01e99000, 0x00020002 },
+   { 0x00000001, 0x34000020, 0x000007e4, 0x00000000 },
+   { 0x00400009, 0x26e05421, 0x00000740, 0x00690050 },
+   { 0x00600001, 0x26c40231, 0x008d0723, 0x00000000 },
+   { 0x00400001, 0x26c00231, 0x00ab06e3, 0x00000000 },
+   { 0x00600042, 0x25c04629, 0x008d06c4, 0x008d06c5 },
+   { 0x00600040, 0x24003e2c, 0x008d06c3, 0x00020002 },
+   { 0x00600048, 0x24003e2c, 0x008d06c2, 0x00020002 },
+   { 0x00600048, 0x24003e2c, 0x008d06c1, 0x00010001 },
+   { 0x00600008, 0x26c03d8d, 0x008d0400, 0x00020002 },
+   { 0x00400001, 0x46c401ad, 0x006906c4, 0x00000000 },
+   { 0x00400001, 0x46c6012d, 0x006905c0, 0x00000000 },
+   { 0x00400040, 0x22083eac, 0x00690054, 0x06c006c0 },
+   { 0x00800001, 0x276001ad, 0x01ea9000, 0x00000000 },
+   { 0x00000001, 0x34000020, 0x000007e4, 0x00000000 },
+   { 0x00400009, 0x26e05421, 0x00000740, 0x00690050 },
+   { 0x00600001, 0x26c40231, 0x008d0723, 0x00000000 },
+   { 0x00400001, 0x26c00231, 0x00ab06e3, 0x00000000 },
+   { 0x00600042, 0x25c04629, 0x008d06c0, 0x008d06c1 },
+   { 0x00600040, 0x24003e2c, 0x008d06c2, 0x00020002 },
+   { 0x00600048, 0x24003e2c, 0x008d06c1, 0x00020002 },
+   { 0x00600048, 0x26e03e2d, 0x008d06c0, 0x00010001 },
+   { 0x00400008, 0x46c23dad, 0x006906e0, 0x00020002 },
+   { 0x00200008, 0x26d03dad, 0x004506e8, 0x00020002 },
+   { 0x00400001, 0x46c0012d, 0x006905c0, 0x00000000 },
+   { 0x00400009, 0x22083eac, 0x00690054, 0x00010001 },
+   { 0x00400040, 0x22083d8c, 0x00690208, 0x06c006c0 },
+   { 0x00800001, 0x276001ad, 0x01e99000, 0x00000000 },
+   { 0x00000001, 0x34000020, 0x000007e4, 0x00000000 },
+   { 0x00600042, 0x45c04629, 0x008d0724, 0x008d0725 },
+   { 0x00600040, 0x24003e2c, 0x008d0726, 0x00020002 },
+   { 0x00600048, 0x24003e2c, 0x008d0725, 0x00020002 },
+   { 0x00600048, 0x25e03e29, 0x008d0724, 0x00010001 },
+   { 0x00600008, 0x45c23d29, 0x008d05e0, 0x00020002 },
+   { 0x00800001, 0x2760012d, 0x002a05c0, 0x00000000 },
+   { 0x00000001, 0x34000020, 0x000007e4, 0x00000000 },
+   { 0x00600001, 0x27440231, 0x00000743, 0x00000000 },
+   { 0x00600042, 0x45c04629, 0x008d0740, 0x008d0741 },
+   { 0x00600040, 0x24003e2c, 0x008d0742, 0x00020002 },
+   { 0x00600048, 0x24003e2c, 0x008d0741, 0x00020002 },
+   { 0x00600048, 0x25e03e29, 0x008d0740, 0x00010001 },
+   { 0x00600008, 0x45c23d29, 0x008d05e0, 0x00020002 },
+   { 0x00800001, 0x2760012d, 0x004905c0, 0x00000000 },
+   { 0x00000001, 0x34000020, 0x000007e4, 0x00000000 },
+   { 0x00000401, 0x27c80061, 0x00000000, 0x000f000f },
+   { 0x00200801, 0x27c001a1, 0x004507fa, 0x00000000 },
+   { 0x00000040, 0x22000c00, 0x00000200, 0x0ff04000 },
+   { 0x00800001, 0x20400232, 0x00a90400, 0x00000000 },
+   { 0x00800001, 0x20500232, 0x00a90404, 0x00000000 },
+   { 0x00800001, 0x20600232, 0x00a90408, 0x00000000 },
+   { 0x00800001, 0x20700232, 0x00a9040c, 0x00000000 },
+   { 0x00800001, 0x20800232, 0x00a90440, 0x00000000 },
+   { 0x00800001, 0x20900232, 0x00a90444, 0x00000000 },
+   { 0x00800001, 0x20a00232, 0x00a90448, 0x00000000 },
+   { 0x00800001, 0x20b00232, 0x00a9044c, 0x00000000 },
+   { 0x00800001, 0x20c00232, 0x00a90480, 0x00000000 },
+   { 0x00800001, 0x20d00232, 0x00a90484, 0x00000000 },
+   { 0x00800001, 0x20e00232, 0x00a90488, 0x00000000 },
+   { 0x00800001, 0x20f00232, 0x00a9048c, 0x00000000 },
+   { 0x00800001, 0x21000232, 0x00a904c0, 0x00000000 },
+   { 0x00800001, 0x21100232, 0x00a904c4, 0x00000000 },
+   { 0x00800001, 0x21200232, 0x00a904c8, 0x00000000 },
+   { 0x00800001, 0x21300232, 0x00a904cc, 0x00000000 },
+   { 0x01600031, 0x27800021, 0x508d07c0, 0x00000200 },
+   { 0x00000001, 0x34000020, 0x000007e0, 0x00000000 },
+   { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27c00021, 0x008d0000, 0x00000000 },
+   { 0x00200009, 0x27fa3e29, 0x00450064, 0x00040004 },
+   { 0x00000401, 0x27c80061, 0x00000000, 0x000f000f },
+   { 0x00200809, 0x27c03e21, 0x00450064, 0x00040004 },
+   { 0x00000040, 0x22000d20, 0x00000062, 0x1218a000 },
+   { 0x00a02401, 0x20400232, 0x00b10080, 0x00000000 },
+   { 0x00a02801, 0x20500232, 0x00b10090, 0x00000000 },
+   { 0x00a02401, 0x20800232, 0x00b100c0, 0x00000000 },
+   { 0x00a02801, 0x20900232, 0x00b100d0, 0x00000000 },
+   { 0x00a02401, 0x20c00232, 0x00b10100, 0x00000000 },
+   { 0x00a02801, 0x20d00232, 0x00b10110, 0x00000000 },
+   { 0x00a02401, 0x21000232, 0x00b10140, 0x00000000 },
+   { 0x00a02801, 0x21100232, 0x00b10150, 0x00000000 },
+   { 0x01600031, 0x27800021, 0x508d07c0, 0x00000200 },
+   { 0x00000401, 0x20280062, 0x00000000, 0x0007000f },
+   { 0x0000080c, 0x20243c22, 0x000007c4, 0x00010001 },
+   { 0x00000040, 0x22001c00, 0x00000200, 0xf8000001 },
+   { 0x00800001, 0x40400232, 0x00b10180, 0x00000000 },
+   { 0x00800001, 0x40410232, 0x00b101c0, 0x00000000 },
+   { 0x00800001, 0x40600232, 0x00b10190, 0x00000000 },
+   { 0x00800001, 0x40610232, 0x00b101d0, 0x00000000 },
+   { 0x00800001, 0x40800232, 0x00b101a0, 0x00000000 },
+   { 0x00800001, 0x40810232, 0x00b101e0, 0x00000000 },
+   { 0x00800001, 0x40a00232, 0x00b101b0, 0x00000000 },
+   { 0x00800001, 0x40a10232, 0x00b101f0, 0x00000000 },
+   { 0x01600031, 0x27a00001, 0x508d0000, 0x00000200 },
+   { 0x00000001, 0x27800021, 0x00000780, 0x00000000 },
+   { 0x00000001, 0x27a00021, 0x000007a0, 0x00000000 },
+   { 0x01600031, 0x20001c20, 0x708d0000, 0x82000010 },
+   { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x20400021, 0x008d0000, 0x00000000 },
+   { 0x00000005, 0x20203e2d, 0x00000061, 0x001f001f },
+   { 0x00200009, 0x20643e2d, 0x00450064, 0x00040004 },
+   { 0x00000001, 0x203a01ed, 0x00000000, 0x00000000 },
+   { 0x01000010, 0x20003dac, 0x00000020, 0x00160016 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x0000001e },
+   { 0x01200010, 0x20003e2c, 0x0200006c, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000024 },
+   { 0x00800009, 0x25403dad, 0x00050064, 0x00020002 },
+   { 0x00000001, 0x203a01ed, 0x00000000, 0x00180018 },
+   { 0x00200040, 0x45483dad, 0x00660548, 0x00100010 },
+   { 0x00200040, 0x45523dad, 0x00660552, 0x00100010 },
+   { 0x00400040, 0x25583dad, 0x00690558, 0x00100010 },
+   { 0x00800040, 0x208035ad, 0x00b10080, 0x00b10540 },
+   { 0x00600040, 0x45403dad, 0x00ae0540, 0x00200020 },
+   { 0x00800040, 0x20a035ad, 0x00b100a0, 0x00b10540 },
+   { 0x00600040, 0x45423dad, 0x00ae0542, 0x00200020 },
+   { 0x00800040, 0x20e035ad, 0x00b100e0, 0x00b10540 },
+   { 0x00600040, 0x45403dad, 0x00ae0540, 0xffe0ffe0 },
+   { 0x00800040, 0x20c035ad, 0x00b100c0, 0x00b10540 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000014 },
+   { 0x06600010, 0x20003dac, 0x02000020, 0x00030003 },
+   { 0x00000001, 0x206c0171, 0x00000000, 0x00000000 },
+   { 0x00610001, 0x208000a5, 0x02050080, 0x00000000 },
+   { 0x00610001, 0x212000a5, 0x02090100, 0x00000000 },
+   { 0x00410001, 0x211000a5, 0x02690100, 0x00000000 },
+   { 0x00200009, 0x25403dad, 0x00450064, 0x00020002 },
+   { 0x00800040, 0x208035ad, 0x00b10080, 0x00050540 },
+   { 0x00200040, 0x40883dad, 0x00660088, 0x00200020 },
+   { 0x00200040, 0x40923dad, 0x00660092, 0x00200020 },
+   { 0x00400040, 0x20983dad, 0x00690098, 0x00200020 },
+   { 0x00200401, 0x22080060, 0x00000000, 0x03400140 },
+   { 0x00000c01, 0x220c0060, 0x00000000, 0x04400080 },
+   { 0x00000801, 0x22040060, 0x00000000, 0x01000070 },
+   { 0x00000001, 0x20200169, 0x00000000, 0x00000000 },
+   { 0x0000000c, 0x2458262d, 0x0000006d, 0x00000020 },
+   { 0x00000001, 0x210e0169, 0x00000000, 0x00010001 },
+   { 0x00000005, 0x24583dad, 0x00000458, 0x00030003 },
+   { 0x01000010, 0x200035ac, 0x0000010e, 0x00000458 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x000002fe },
+   { 0x01000010, 0x20003dac, 0x0200010e, 0x00010001 },
+   { 0x00010401, 0x24540061, 0x02000000, 0x06c00640 },
+   { 0x00010805, 0x245a3e2d, 0x02008800, 0x007f007f },
+   { 0x00110401, 0x24540061, 0x02000000, 0x04800780 },
+   { 0x00110805, 0x245a3e2d, 0x02008804, 0x007f007f },
+   { 0x0000000c, 0x211e362d, 0x0000006c, 0x00000020 },
+   { 0x00000040, 0x20780d21, 0x0000045a, 0x0208a002 },
+   { 0x02000005, 0x20003dac, 0x0200011e, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000001be },
+   { 0x01000005, 0x20003dac, 0x00009800, 0x00030003 },
+   { 0x00200005, 0x24443dad, 0x00459800, 0x00030003 },
+   { 0x0020000c, 0x24403dad, 0x00459800, 0x00020002 },
+   { 0x01600006, 0x20003dac, 0x02000446, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000012 },
+   { 0x00010040, 0x22000c20, 0x02000078, 0x00400000 },
+   { 0x00110040, 0x22000c20, 0x02000078, 0x00700000 },
+   { 0x00310040, 0x20401da5, 0x02450440, 0xfffffffe },
+   { 0x00110001, 0x20480061, 0x02000000, 0x000c000c },
+   { 0x00010040, 0x20401da5, 0x02000440, 0xfffffffe },
+   { 0x00010001, 0x204401a5, 0x02000442, 0x00000000 },
+   { 0x00010001, 0x20480061, 0x02000000, 0x0007000c },
+   { 0x01600031, 0x25600021, 0x408d0040, 0x00000200 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00010040, 0x22000c20, 0x02000078, 0x00200000 },
+   { 0x00110040, 0x22000c20, 0x02000078, 0x00400000 },
+   { 0x00200001, 0x204001a5, 0x00450440, 0x00000000 },
+   { 0x00110040, 0x20441da5, 0x02000442, 0xfffffffe },
+   { 0x00110001, 0x20480061, 0x02000000, 0x000c0007 },
+   { 0x00010001, 0x20480061, 0x02000000, 0x00070007 },
+   { 0x01600031, 0x25600021, 0x408d0040, 0x00000200 },
+   { 0x0020040c, 0x24403dad, 0x00459800, 0x00030003 },
+   { 0x00200805, 0x24483dad, 0x00459800, 0x00070007 },
+   { 0x01600006, 0x20003dac, 0x0000044a, 0x00000000 },
+   { 0x00200001, 0x204001a5, 0x00450440, 0x00000000 },
+   { 0x00000009, 0x20403ca5, 0x00000040, 0x00010001 },
+   { 0x00010040, 0x22000c20, 0x00000078, 0x00200010 },
+   { 0x00110040, 0x22000c20, 0x00000078, 0x00300010 },
+   { 0x00010001, 0x20480061, 0x00000000, 0x00030009 },
+   { 0x00110001, 0x20480061, 0x00000000, 0x00040009 },
+   { 0x02600031, 0x27000021, 0x408d0040, 0x00000200 },
+   { 0x01000005, 0x20003dac, 0x00009800, 0x00030003 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x0000001a },
+   { 0x00600001, 0x26220231, 0x008d05c0, 0x00000000 },
+   { 0x00600401, 0x26120231, 0x008d05b8, 0x00000000 },
+   { 0x00600801, 0x26020231, 0x008d05b0, 0x00000000 },
+   { 0x00600401, 0x25f20231, 0x008d05a8, 0x00000000 },
+   { 0x00600801, 0x25e20231, 0x008d05a0, 0x00000000 },
+   { 0x00600401, 0x25d20231, 0x008d0598, 0x00000000 },
+   { 0x00600801, 0x25c20231, 0x008d0590, 0x00000000 },
+   { 0x00600401, 0x25b20231, 0x008d0588, 0x00000000 },
+   { 0x00600801, 0x25a20231, 0x008d0580, 0x00000000 },
+   { 0x00600401, 0x25920231, 0x008d0578, 0x00000000 },
+   { 0x00600801, 0x25820231, 0x008d0570, 0x00000000 },
+   { 0x00600001, 0x25720231, 0x008d0568, 0x00000000 },
+   { 0x00600001, 0x25620231, 0x008d0560, 0x00000000 },
+   { 0x00110001, 0x220001ec, 0x02000000, 0x05820582 },
+   { 0x00010001, 0x220001ec, 0x02000000, 0x05620562 },
+   { 0x00000001, 0x220e0128, 0x00000454, 0x00000000 },
+   { 0x01800006, 0x200035ac, 0x02000446, 0x00000444 },
+   { 0x00810001, 0xbc000229, 0x02ad8000, 0x00000000 },
+   { 0x00810001, 0xbc200229, 0x02ad8020, 0x00000000 },
+   { 0x00810001, 0xbc400229, 0x02ad8040, 0x00000000 },
+   { 0x00810001, 0xbc600229, 0x02ad8060, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000012a },
+   { 0x00600001, 0x2024018d, 0x008d0200, 0x00000000 },
+   { 0x01000041, 0x245e35ad, 0x00000446, 0x00000444 },
+   { 0x00000040, 0x22022d08, 0x00000200, 0x00100010 },
+   { 0x02000005, 0x20003db0, 0x0200045e, 0x00010001 },
+   { 0x00200040, 0x22042d08, 0x00450200, 0x00200020 },
+   { 0x00400001, 0x244c0109, 0x00690200, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000066 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000064 },
+   { 0x00000040, 0x22003d08, 0x00000200, 0xffdeffde },
+   { 0x00000440, 0x22023d08, 0x00000202, 0xffeeffee },
+   { 0x00000801, 0x220c0060, 0x00000000, 0x04a00480 },
+   { 0x01000010, 0x20003db0, 0x0000045e, 0x00040004 },
+   { 0x00a02040, 0x2400462c, 0x00ad8000, 0x00ad8005 },
+   { 0x00a02048, 0x24003e2c, 0x00ad8001, 0xfffbfffb },
+   { 0x00a02048, 0x24003e2c, 0x00ad8002, 0x00140014 },
+   { 0x00a02048, 0x24003e2c, 0x00ad8003, 0x00140014 },
+   { 0x00a02048, 0xb8003e2d, 0x00ad8004, 0xfffbfffb },
+   { 0x00a02040, 0x2400462c, 0x00ad8040, 0x00ad8045 },
+   { 0x00a02048, 0x24003e2c, 0x00ad8041, 0xfffbfffb },
+   { 0x00a02048, 0x24003e2c, 0x00ad8042, 0x00140014 },
+   { 0x00a02048, 0x24003e2c, 0x00ad8043, 0x00140014 },
+   { 0x00a02048, 0xb8403e2d, 0x00ad8044, 0xfffbfffb },
+   { 0x00a02040, 0x2400462c, 0x00ad8080, 0x00ad8085 },
+   { 0x00a02048, 0x24003e2c, 0x00ad8081, 0xfffbfffb },
+   { 0x00a02048, 0x24003e2c, 0x00ad8082, 0x00140014 },
+   { 0x00a02048, 0x24003e2c, 0x00ad8083, 0x00140014 },
+   { 0x00a02048, 0xb8803e2d, 0x00ad8084, 0xfffbfffb },
+   { 0x00600040, 0x2400462c, 0x008d80c0, 0x008d80c5 },
+   { 0x00600048, 0x24003e2c, 0x008d80c1, 0xfffbfffb },
+   { 0x00600048, 0x24003e2c, 0x008d80c2, 0x00140014 },
+   { 0x00600048, 0x24003e2c, 0x008d80c3, 0x00140014 },
+   { 0x00600048, 0xb8c03e2d, 0x008d80c4, 0xfffbfffb },
+   { 0x00000401, 0x22000060, 0x00000000, 0x04a00480 },
+   { 0x00000c01, 0x22040060, 0x00000000, 0x04a00490 },
+   { 0x00010802, 0x220c2d28, 0x00000454, 0x04800480 },
+   { 0x00a02040, 0x24003dac, 0x00b18000, 0x02000200 },
+   { 0x00800048, 0x24003dac, 0x01ed8800, 0xfffbfffb },
+   { 0x00800048, 0x24203dac, 0x01ed8820, 0xfffbfffb },
+   { 0x00a02048, 0x24003dac, 0x00b18020, 0x00140014 },
+   { 0x00800048, 0x24003dac, 0x01ed8820, 0x00140014 },
+   { 0x00800048, 0x24203dac, 0x01ed8840, 0x00140014 },
+   { 0x00a02048, 0x24003dac, 0x00b18040, 0xfffbfffb },
+   { 0x00800048, 0x24003dac, 0x01ed8840, 0x00010001 },
+   { 0x00800048, 0x24203dac, 0x01ed8860, 0x00010001 },
+   { 0x8080000c, 0xd8003d91, 0x00b10400, 0x000a000a },
+   { 0x8080100c, 0xd8203d91, 0x00b10420, 0x000a000a },
+   { 0x00a02040, 0x24003dac, 0x00b18040, 0x02000200 },
+   { 0x00800048, 0x24003dac, 0x01ed8840, 0xfffbfffb },
+   { 0x00800048, 0x24203dac, 0x01ed8860, 0xfffbfffb },
+   { 0x00a02048, 0x24003dac, 0x00b18060, 0x00140014 },
+   { 0x00800048, 0x24003dac, 0x01ed8860, 0x00140014 },
+   { 0x00800048, 0x24203dac, 0x01ed8880, 0x00140014 },
+   { 0x00a02048, 0x24003dac, 0x00b18080, 0xfffbfffb },
+   { 0x00800048, 0x24003dac, 0x01ed8880, 0x00010001 },
+   { 0x00800048, 0x24203dac, 0x01ed88a0, 0x00010001 },
+   { 0x8080000c, 0xd8403d91, 0x00b10400, 0x000a000a },
+   { 0x8080100c, 0xd8603d91, 0x00b10420, 0x000a000a },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x000000b4 },
+   { 0x01000010, 0x20003db0, 0x00000444, 0x00000000 },
+   { 0x01000010, 0x20003db0, 0x02000446, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x0000004c },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000004a },
+   { 0x00400040, 0x22003d28, 0x0069044c, 0xfffefffe },
+   { 0x03400010, 0x20003dac, 0x00000446, 0x00020002 },
+   { 0x01000010, 0x20003db0, 0x02000444, 0x00020002 },
+   { 0x00410040, 0x22002d08, 0x00690200, 0x00100010 },
+   { 0x01000010, 0x20003dac, 0x00000446, 0x00000000 },
+   { 0x00010002, 0x220c2d28, 0x02000454, 0x04800480 },
+   { 0x00800040, 0x24003e2c, 0x01ed8000, 0x00100010 },
+   { 0x00800040, 0x24203e2c, 0x01ed8020, 0x00100010 },
+   { 0x00800048, 0x24003e2c, 0x01ed8001, 0xfffbfffb },
+   { 0x00800048, 0x24203e2c, 0x01ed8021, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01ed8002, 0x00140014 },
+   { 0x00800048, 0x24203e2c, 0x01ed8022, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01ed8003, 0x00140014 },
+   { 0x00800048, 0x24203e2c, 0x01ed8023, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01ed8004, 0xfffbfffb },
+   { 0x00800048, 0x24203e2c, 0x01ed8024, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01ed8005, 0x00010001 },
+   { 0x00800048, 0x24203e2c, 0x01ed8025, 0x00010001 },
+   { 0x8080000c, 0xd8003d91, 0x00b10400, 0x00050005 },
+   { 0x8080100c, 0xd8203d91, 0x00b10420, 0x00050005 },
+   { 0x00800040, 0x24003e2c, 0x01ed8040, 0x00100010 },
+   { 0x00800040, 0x24203e2c, 0x01ed8060, 0x00100010 },
+   { 0x00800048, 0x24003e2c, 0x01ed8041, 0xfffbfffb },
+   { 0x00800048, 0x24203e2c, 0x01ed8061, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01ed8042, 0x00140014 },
+   { 0x00800048, 0x24203e2c, 0x01ed8062, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01ed8043, 0x00140014 },
+   { 0x00800048, 0x24203e2c, 0x01ed8063, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01ed8044, 0xfffbfffb },
+   { 0x00800048, 0x24203e2c, 0x01ed8064, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01ed8045, 0x00010001 },
+   { 0x00800048, 0x24203e2c, 0x01ed8065, 0x00010001 },
+   { 0x8080000c, 0xd8403d91, 0x00b10400, 0x00050005 },
+   { 0x8080100c, 0xd8603d91, 0x00b10420, 0x00050005 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000004 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x0000005c },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000062 },
+   { 0x01000010, 0x20003db0, 0x00000446, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000042 },
+   { 0x00200401, 0x22000128, 0x0066044c, 0x00000000 },
+   { 0x00200c01, 0x22040128, 0x0045044e, 0x00000000 },
+   { 0x00000801, 0x220c0128, 0x00000454, 0x00000000 },
+   { 0x03400010, 0x20003dac, 0x02000444, 0x00020002 },
+   { 0x01000010, 0x20003dac, 0x00000444, 0x00000000 },
+   { 0x00410040, 0x22002d08, 0x02690200, 0x00010001 },
+   { 0x01000010, 0x20003db0, 0x02000446, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000002 },
+   { 0x00110001, 0x220c0168, 0x02000000, 0x04800480 },
+   { 0x00a02040, 0x24003e2c, 0x00ad83e0, 0x00100010 },
+   { 0x00800048, 0x24003e2c, 0x01ed8be0, 0xfffbfffb },
+   { 0x00800048, 0x24203e2c, 0x01ed8800, 0xfffbfffb },
+   { 0x00a02048, 0x24003e2c, 0x00ad8000, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01ed8800, 0x00140014 },
+   { 0x00800048, 0x24203e2c, 0x01ed8820, 0x00140014 },
+   { 0x00a02048, 0x24003e2c, 0x00ad8020, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01ed8820, 0x00010001 },
+   { 0x00800048, 0x24203e2c, 0x01ed8840, 0x00010001 },
+   { 0x8080000c, 0xd8003d91, 0x00b10400, 0x00050005 },
+   { 0x8080100c, 0xd8203d91, 0x00b10420, 0x00050005 },
+   { 0x00a02040, 0x24003e2c, 0x00ad8020, 0x00100010 },
+   { 0x00800048, 0x24003e2c, 0x01ed8820, 0xfffbfffb },
+   { 0x00800048, 0x24203e2c, 0x01ed8840, 0xfffbfffb },
+   { 0x00a02048, 0x24003e2c, 0x00ad8040, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01ed8840, 0x00140014 },
+   { 0x00800048, 0x24203e2c, 0x01ed8860, 0x00140014 },
+   { 0x00a02048, 0x24003e2c, 0x00ad8060, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01ed8860, 0x00010001 },
+   { 0x00800048, 0x24203e2c, 0x01ed8880, 0x00010001 },
+   { 0x8080000c, 0xd8403d91, 0x00b10400, 0x00050005 },
+   { 0x8080100c, 0xd8603d91, 0x00b10420, 0x00050005 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000016 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001c },
+   { 0x00200401, 0x22000128, 0x0045044c, 0x00000000 },
+   { 0x00000801, 0x220c0128, 0x00000454, 0x00000000 },
+   { 0x01200010, 0x20003dac, 0x00000444, 0x00030003 },
+   { 0x01200010, 0x20003dac, 0x02000446, 0x00030003 },
+   { 0x00210040, 0x22002d08, 0x00450200, 0x00010001 },
+   { 0x00210040, 0x22002d08, 0x02450200, 0x00100010 },
+   { 0x00800001, 0xb8000229, 0x01ed8000, 0x00000000 },
+   { 0x00800001, 0xb8200229, 0x01ed8020, 0x00000000 },
+   { 0x00800001, 0xb8400229, 0x01ed8040, 0x00000000 },
+   { 0x00800001, 0xb8600229, 0x01ed8060, 0x00000000 },
+   { 0x80800042, 0xd8004631, 0x00d29800, 0x00d20480 },
+   { 0x80800042, 0xd8204631, 0x00d29820, 0x00d204a0 },
+   { 0x80800042, 0xd8404631, 0x00d29840, 0x00d204c0 },
+   { 0x80800042, 0xd8604631, 0x00d29860, 0x00d204e0 },
+   { 0x00600001, 0x220001ac, 0x008d0024, 0x00000000 },
+   { 0x00200040, 0x244c3dad, 0x00450448, 0xfff8fff8 },
+   { 0x00000401, 0x220001ec, 0x00000000, 0x07000700 },
+   { 0x00000c01, 0x220201e8, 0x00000000, 0x07100710 },
+   { 0x00000801, 0x220e0128, 0x00000456, 0x00000000 },
+   { 0x00000441, 0x254635ad, 0x00000448, 0x0000044a },
+   { 0x00000c41, 0x254025ad, 0x0000444c, 0x0000444e },
+   { 0x00000c41, 0x254225ad, 0x00000448, 0x0000444e },
+   { 0x00000841, 0x254435ad, 0x0000444c, 0x0000044a },
+   { 0x00200040, 0x244c3d09, 0x00450200, 0x00100010 },
+   { 0x00800041, 0x24002628, 0x00ad8000, 0x00000540 },
+   { 0x00800041, 0x24202628, 0x00ad8020, 0x00000540 },
+   { 0x00800048, 0x24002628, 0x00ad8002, 0x00000542 },
+   { 0x00800048, 0x24202628, 0x00ad8022, 0x00000542 },
+   { 0x00200001, 0x22000128, 0x0045044c, 0x00000000 },
+   { 0x00800048, 0x24002628, 0x01ed8000, 0x00000544 },
+   { 0x00800048, 0x24202628, 0x01ed8020, 0x00000544 },
+   { 0x00800048, 0xbc002629, 0x01ed8002, 0x00000546 },
+   { 0x00801048, 0xbc202629, 0x01ed8022, 0x00000546 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000126 },
+   { 0x00000001, 0x20220169, 0x00000000, 0x00040004 },
+   { 0x00200005, 0x24443dad, 0x00459800, 0x00030003 },
+   { 0x0020000c, 0x24403dad, 0x00459800, 0x00020002 },
+   { 0x01600006, 0x20003dac, 0x02000446, 0x00000000 },
+   { 0x00010040, 0x22000c20, 0x02000078, 0x00200000 },
+   { 0x00110040, 0x22000c20, 0x02000078, 0x00500000 },
+   { 0x00310040, 0x20401da5, 0x02450440, 0xfffffffe },
+   { 0x00110001, 0x20480061, 0x02000000, 0x00080008 },
+   { 0x00010040, 0x20401da5, 0x02000440, 0xfffffffe },
+   { 0x00010001, 0x204401a5, 0x02000442, 0x00000000 },
+   { 0x00010001, 0x20480061, 0x02000000, 0x00030008 },
+   { 0x01600031, 0x25600021, 0x408d0040, 0x00000200 },
+   { 0x00000040, 0x22000c20, 0x00000078, 0x00100010 },
+   { 0x0020040c, 0x24403dad, 0x00459800, 0x00030003 },
+   { 0x00200805, 0x24483dad, 0x00459800, 0x00070007 },
+   { 0x00200001, 0x204001a5, 0x00450440, 0x00000000 },
+   { 0x00000009, 0x20403ca5, 0x00000040, 0x00010001 },
+   { 0x00000001, 0x20480061, 0x00000000, 0x00020005 },
+   { 0x02600031, 0x27000021, 0x408d0040, 0x00000200 },
+   { 0x00110001, 0x220001ec, 0x02000000, 0x05820582 },
+   { 0x00010001, 0x220001ec, 0x02000000, 0x05620562 },
+   { 0x00000001, 0x220e0128, 0x00000454, 0x00000000 },
+   { 0x01800006, 0x200035ac, 0x02000446, 0x00000444 },
+   { 0x00410001, 0xbc000229, 0x02698000, 0x00000000 },
+   { 0x00410001, 0xbc100229, 0x02698010, 0x00000000 },
+   { 0x00410001, 0xbc200229, 0x02698020, 0x00000000 },
+   { 0x00410001, 0xbc300229, 0x02698030, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000be },
+   { 0x00600001, 0x2024018d, 0x008d0200, 0x00000000 },
+   { 0x01000041, 0x245e35ad, 0x00000446, 0x00000444 },
+   { 0x02000005, 0x20003db0, 0x0200045e, 0x00010001 },
+   { 0x00000040, 0x22022d08, 0x00000200, 0x00100010 },
+   { 0x00200040, 0x22042d08, 0x00450200, 0x00200020 },
+   { 0x00400001, 0x244c0109, 0x00690200, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000044 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000042 },
+   { 0x00000040, 0x22003d08, 0x00000200, 0xffdeffde },
+   { 0x00000440, 0x22023d08, 0x00000202, 0xffeeffee },
+   { 0x00000801, 0x220c0060, 0x00000000, 0x04d004c0 },
+   { 0x01000010, 0x20003db0, 0x0000045e, 0x00040004 },
+   { 0x00802040, 0x2400462c, 0x00a98000, 0x00a98005 },
+   { 0x00802048, 0x24003e2c, 0x00a98001, 0xfffbfffb },
+   { 0x00802048, 0x24003e2c, 0x00a98002, 0x00140014 },
+   { 0x00802048, 0x24003e2c, 0x00a98003, 0x00140014 },
+   { 0x00802048, 0xb8003e2d, 0x00a98004, 0xfffbfffb },
+   { 0x00802040, 0x2400462c, 0x00a98040, 0x00a98045 },
+   { 0x00802048, 0x24003e2c, 0x00a98041, 0xfffbfffb },
+   { 0x00802048, 0x24003e2c, 0x00a98042, 0x00140014 },
+   { 0x00802048, 0x24003e2c, 0x00a98043, 0x00140014 },
+   { 0x00802048, 0xb8203e2d, 0x00a98044, 0xfffbfffb },
+   { 0x00400040, 0x2400462c, 0x00698080, 0x00698085 },
+   { 0x00400048, 0x24003e2c, 0x00698081, 0xfffbfffb },
+   { 0x00400048, 0x24003e2c, 0x00698082, 0x00140014 },
+   { 0x00400048, 0x24003e2c, 0x00698083, 0x00140014 },
+   { 0x00400048, 0xb8403e2d, 0x00698084, 0xfffbfffb },
+   { 0x00000401, 0x220c0060, 0x00000000, 0x04e004d0 },
+   { 0x00000c01, 0x22000060, 0x00000000, 0x04d004c8 },
+   { 0x00000c01, 0x22040060, 0x00000000, 0x04e004d8 },
+   { 0x00000801, 0x22080060, 0x00000000, 0x04f004e8 },
+   { 0x00800040, 0x24003dac, 0x00b104c0, 0x02000200 },
+   { 0x00800048, 0x24003dac, 0x00b104e0, 0xfffbfffb },
+   { 0x00800048, 0x24003dac, 0x01ed9800, 0x00140014 },
+   { 0x00010001, 0x220c0168, 0x00000000, 0x05400540 },
+   { 0x00110001, 0x220c0168, 0x00000000, 0x04c004c0 },
+   { 0x00800048, 0x24003dac, 0x01e98000, 0xfffbfffb },
+   { 0x00800048, 0x24003dac, 0x01e98020, 0x00010001 },
+   { 0x00800048, 0x24003dac, 0x01e98800, 0x00140014 },
+   { 0x8080000c, 0xd8003d91, 0x00b10400, 0x000a000a },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000060 },
+   { 0x01000010, 0x20003db0, 0x00000444, 0x00000000 },
+   { 0x01000010, 0x20003db0, 0x02000446, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000022 },
+   { 0x00400040, 0x22003d28, 0x0069044c, 0xfffefffe },
+   { 0x03400010, 0x20003dac, 0x00000446, 0x00020002 },
+   { 0x01000010, 0x20003db0, 0x02000444, 0x00020002 },
+   { 0x00410040, 0x22002d08, 0x00690200, 0x00100010 },
+   { 0x01000010, 0x20003dac, 0x00000446, 0x00000000 },
+   { 0x00010001, 0x220e0168, 0x02000000, 0x05400540 },
+   { 0x00110001, 0x220e0168, 0x02000000, 0x04c004c0 },
+   { 0x00800040, 0x24003e2c, 0x01e98000, 0x00100010 },
+   { 0x00800048, 0x24003e2c, 0x01e98001, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01e98002, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01e98003, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01e98004, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01e98005, 0x00010001 },
+   { 0x8080000c, 0xdc003d91, 0x00b10400, 0x00050005 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000004 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000036 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000036 },
+   { 0x01000010, 0x20003db0, 0x00000446, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x03600010, 0x20003dac, 0x02000444, 0x00020002 },
+   { 0x00400401, 0x22000128, 0x0069044c, 0x00000000 },
+   { 0x00400840, 0x22083d2c, 0x0069044c, 0x00100010 },
+   { 0x00610040, 0x22002d08, 0x02690200, 0x00010001 },
+   { 0x01000010, 0x20003dac, 0x00000444, 0x00000000 },
+   { 0x01000010, 0x20003db0, 0x02000446, 0x00020002 },
+   { 0x00800040, 0x24003e2c, 0x01e983e0, 0x00100010 },
+   { 0x00800048, 0x24003e2c, 0x01e98000, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01e98020, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01e993e0, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01e99000, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01e99020, 0x00010001 },
+   { 0x00000001, 0x220e0168, 0x00000000, 0x05400540 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000002 },
+   { 0x00110001, 0x220e0168, 0x02000000, 0x04c004c0 },
+   { 0x8080000c, 0xdc003d91, 0x00b10400, 0x00050005 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000000e },
+   { 0x00400001, 0x22000128, 0x0069044c, 0x00000000 },
+   { 0x01400010, 0x20003dac, 0x00000444, 0x00030003 },
+   { 0x01400010, 0x20003dac, 0x02000446, 0x00030003 },
+   { 0x00410040, 0x22002d08, 0x00690200, 0x00010001 },
+   { 0x00410040, 0x22002d08, 0x02690200, 0x00100010 },
+   { 0x00800001, 0x25400229, 0x01e98000, 0x00000000 },
+   { 0x80800042, 0x45404631, 0x00d20540, 0x00d204c0 },
+   { 0x00000001, 0x220c0128, 0x00000454, 0x00000000 },
+   { 0x00400001, 0xd8000231, 0x008a0540, 0x00000000 },
+   { 0x00400001, 0xd8100231, 0x008a0548, 0x00000000 },
+   { 0x00400001, 0xd8200231, 0x008a0550, 0x00000000 },
+   { 0x00400001, 0xd8300231, 0x008a0558, 0x00000000 },
+   { 0x00600001, 0x220001ac, 0x008d0024, 0x00000000 },
+   { 0x00200040, 0x244c3dad, 0x00450448, 0xfff8fff8 },
+   { 0x00000401, 0x220001ec, 0x00000000, 0x07000700 },
+   { 0x00000801, 0x220e0128, 0x00000456, 0x00000000 },
+   { 0x00000441, 0x254635ad, 0x00000448, 0x0000044a },
+   { 0x00000c41, 0x254025ad, 0x0000444c, 0x0000444e },
+   { 0x00000c41, 0x254225ad, 0x00000448, 0x0000444e },
+   { 0x00000841, 0x254435ad, 0x0000444c, 0x0000044a },
+   { 0x00600041, 0x24002628, 0x00898000, 0x00000540 },
+   { 0x00600048, 0x24002628, 0x00898002, 0x00000542 },
+   { 0x00600048, 0x24002628, 0x00898008, 0x00000544 },
+   { 0x00600048, 0x2540262d, 0x0089800a, 0x00000546 },
+   { 0x00400401, 0xbc0001a9, 0x00690540, 0x00000000 },
+   { 0x00400801, 0xbc1001a9, 0x00690548, 0x00000000 },
+   { 0x01000010, 0x20003d2c, 0x00000022, 0x00030003 },
+   { 0x01000040, 0x20223d29, 0x02000022, 0xffffffff },
+   { 0x00000040, 0x220c3d8c, 0x0000020c, 0x00080008 },
+   { 0x00110040, 0x24540c21, 0x00000454, 0x00080008 },
+   { 0x00010040, 0x24540c21, 0x00000454, 0x00180038 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0xfffffee6 },
+   { 0x01800010, 0x20003dac, 0x0200010e, 0x00010001 },
+   { 0x00000040, 0x220c3d8c, 0x0000020c, 0xffe0ffe0 },
+   { 0x00010001, 0x24540061, 0x02000000, 0x06c00640 },
+   { 0x00110001, 0x24540061, 0x02000000, 0x04800780 },
+   { 0x00000001, 0x220e0128, 0x00000456, 0x00000000 },
+   { 0x00800040, 0x24003dac, 0x00b19c00, 0x00200020 },
+   { 0x00800040, 0x24203dac, 0x00b19c20, 0x00200020 },
+   { 0x8080000c, 0xdc003d91, 0x00b10400, 0x00060006 },
+   { 0x8080000c, 0xdc203d91, 0x00b10420, 0x00060006 },
+   { 0x02000040, 0x210e3d29, 0x0200010e, 0xffffffff },
+   { 0x00000040, 0x220c3d8c, 0x0000020c, 0x00040004 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0xfffffcf8 },
+   { 0x01000005, 0x25643e2d, 0x00000060, 0x00c000c0 },
+   { 0x01000010, 0x20003dac, 0x02000458, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000016 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000000c },
+   { 0x01000010, 0x20003dac, 0x00000458, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000086 },
+   { 0x00a02001, 0x46400231, 0x00d20780, 0x00000000 },
+   { 0x00a02001, 0x46800231, 0x00d207c0, 0x00000000 },
+   { 0x00a02001, 0x46c00231, 0x00d20480, 0x00000000 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000007e },
+   { 0x80a02042, 0x46404631, 0x00d20640, 0x00d20780 },
+   { 0x80a02042, 0x46804631, 0x00d20680, 0x00d207c0 },
+   { 0x80a02042, 0x46c04631, 0x00d206c0, 0x00d20480 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000076 },
+   { 0x01000010, 0x20003dac, 0x02000564, 0x00800080 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x0000000e },
+   { 0x01000010, 0x20003dac, 0x00000458, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0xffffffe4 },
+   { 0x00200401, 0x256801ed, 0x00000000, 0x00200020 },
+   { 0x00200801, 0x256c01ed, 0x00000000, 0x00060006 },
+   { 0x00400001, 0x258001ed, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x45a001ad, 0x00058c00, 0x00000000 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000028 },
+   { 0x02600005, 0x20006e28, 0x02008c0c, 0x88848421 },
+   { 0x0220000c, 0x25663dad, 0x00000458, 0x00010001 },
+   { 0x0000000c, 0x25643dad, 0x00000564, 0x00060006 },
+   { 0x00310001, 0x25c401ad, 0x00000458, 0x00000000 },
+   { 0x00210001, 0x25c00061, 0x00000000, 0x00010001 },
+   { 0x00310040, 0x25c03dad, 0x004545c4, 0x00010001 },
+   { 0x00610001, 0x25a00061, 0x02000000, 0x00000080 },
+   { 0x00710001, 0x45a002ad, 0x02ae8c00, 0x00000000 },
+   { 0x00710001, 0x45a202ad, 0x02ae8c01, 0x00000000 },
+   { 0x00800041, 0x25a035ad, 0x00b105a0, 0x000905c0 },
+   { 0x01200010, 0x20003e2c, 0x0245006e, 0x00000000 },
+   { 0x00310009, 0x244c45ad, 0x02000564, 0x0045006e },
+   { 0x00210001, 0x244c01ed, 0x02000000, 0x00000000 },
+   { 0x0031000c, 0x244c3dad, 0x0245044c, 0x00010001 },
+   { 0x00200009, 0x256835ad, 0x0045044c, 0x00000566 },
+   { 0x00210040, 0x256835ad, 0x02450568, 0x00000566 },
+   { 0x00200040, 0x256c362d, 0x0045006e, 0x00000566 },
+   { 0x00400040, 0x240035ac, 0x00ab05a2, 0x00ab05a6 },
+   { 0x00400040, 0x2400358c, 0x00690400, 0x00000566 },
+   { 0x0040000c, 0x2580358d, 0x00690400, 0x00000566 },
+   { 0x00800041, 0x2400362c, 0x00d20640, 0x000005a0 },
+   { 0x00800041, 0x2420362c, 0x00d20660, 0x000005a0 },
+   { 0x00800048, 0x2400362c, 0x00d20780, 0x000005a4 },
+   { 0x00800048, 0x2420362c, 0x00d207a0, 0x000005a4 },
+   { 0x00800040, 0x2400358c, 0x00b10400, 0x00000568 },
+   { 0x00800040, 0x2420358c, 0x00b10420, 0x00000568 },
+   { 0x0080000c, 0x24c0358d, 0x00b10400, 0x0000056c },
+   { 0x0080000c, 0x24e0358d, 0x00b10420, 0x0000056c },
+   { 0x80800040, 0x464035b1, 0x00b104c0, 0x00000580 },
+   { 0x80800040, 0x466035b1, 0x00b104e0, 0x00000580 },
+   { 0x00800041, 0x2400362c, 0x00d20680, 0x000005a0 },
+   { 0x00800041, 0x2420362c, 0x00d206a0, 0x000005a0 },
+   { 0x00800048, 0x2400362c, 0x00d207c0, 0x000005a4 },
+   { 0x00800048, 0x2420362c, 0x00d207e0, 0x000005a4 },
+   { 0x00800040, 0x2400358c, 0x00b10400, 0x00000568 },
+   { 0x00800040, 0x2420358c, 0x00b10420, 0x00000568 },
+   { 0x0080000c, 0x24c0358d, 0x00b10400, 0x0000056c },
+   { 0x0080000c, 0x24e0358d, 0x00b10420, 0x0000056c },
+   { 0x80800040, 0x468035b1, 0x00b104c0, 0x00000580 },
+   { 0x80800040, 0x46a035b1, 0x00b104e0, 0x00000580 },
+   { 0x00800041, 0x2400362c, 0x00d206c0, 0x000705a8 },
+   { 0x00800041, 0x2420362c, 0x00d206e0, 0x000705a8 },
+   { 0x00800048, 0x2400362c, 0x00d20480, 0x000705ac },
+   { 0x00800048, 0x2420362c, 0x00d204a0, 0x000705ac },
+   { 0x00800040, 0x2400358c, 0x00b10400, 0x0000056a },
+   { 0x00800040, 0x2420358c, 0x00b10420, 0x0000056a },
+   { 0x0080000c, 0x24c0358d, 0x00b10400, 0x0000056e },
+   { 0x0080000c, 0x24e0358d, 0x00b10420, 0x0000056e },
+   { 0x80800040, 0x46c035b1, 0x00b104c0, 0x00050582 },
+   { 0x80800040, 0x46e035b1, 0x00b104e0, 0x00050582 },
+   { 0x01800005, 0x20003d2c, 0x02000020, 0x00020002 },
+   { 0x80800040, 0xd00045b1, 0x00b19000, 0x00d20640 },
+   { 0x80800040, 0xd02045b1, 0x00b19020, 0x00d20660 },
+   { 0x80800040, 0xd04045b1, 0x00b19040, 0x00d20680 },
+   { 0x80800040, 0xd06045b1, 0x00b19060, 0x00d206a0 },
+   { 0x00000040, 0x22083d8c, 0x00000208, 0x00800080 },
+   { 0x80400040, 0xd40045b1, 0x00699400, 0x00ab06c0 },
+   { 0x80400040, 0xd48045b1, 0x00699480, 0x00ab06c2 },
+   { 0x80400040, 0xd42045b1, 0x00699420, 0x00ab06e0 },
+   { 0x80400040, 0xd4a045b1, 0x006994a0, 0x00ab06e2 },
+   { 0x80400040, 0xd41045b1, 0x00699410, 0x00ab06d0 },
+   { 0x80400040, 0xd49045b1, 0x00699490, 0x00ab06d2 },
+   { 0x80400040, 0xd43045b1, 0x00699430, 0x00ab06f0 },
+   { 0x80400040, 0xd4b045b1, 0x006994b0, 0x00ab06f2 },
+   { 0x00000040, 0x220a3d8c, 0x0000020a, 0x00080008 },
+   { 0x00110040, 0x220a3d8c, 0x0200020a, 0x00300030 },
+   { 0x01000010, 0x20003d2c, 0x02000020, 0x00060006 },
+   { 0x00000040, 0x20203d29, 0x00000020, 0x00020002 },
+   { 0x00000040, 0x22040c00, 0x00000204, 0x00100001 },
+   { 0x00000040, 0x220c358c, 0x0000020c, 0x0000003a },
+   { 0x00110220, 0x34001c00, 0x02001400, 0xfffffc36 },
+   { 0x00000001, 0x220001ec, 0x00000000, 0x01400140 },
+   { 0x00000001, 0x220201ec, 0x00000000, 0x01c001c0 },
+   { 0x00800401, 0x20400236, 0x01ee8000, 0x00000000 },
+   { 0x00800801, 0x20500236, 0x01ee8010, 0x00000000 },
+   { 0x00800401, 0x20600236, 0x01ee8020, 0x00000000 },
+   { 0x00800801, 0x20700236, 0x01ee8030, 0x00000000 },
+   { 0x00800401, 0x20800236, 0x01ee8040, 0x00000000 },
+   { 0x00800801, 0x20900236, 0x01ee8050, 0x00000000 },
+   { 0x00800401, 0x20a00236, 0x01ee8060, 0x00000000 },
+   { 0x00800801, 0x20b00236, 0x01ee8070, 0x00000000 },
+   { 0x00800401, 0x20c00236, 0x01ee8100, 0x00000000 },
+   { 0x00800801, 0x20d00236, 0x01ee8110, 0x00000000 },
+   { 0x00800401, 0x20e00236, 0x01ee8120, 0x00000000 },
+   { 0x00800801, 0x20f00236, 0x01ee8130, 0x00000000 },
+   { 0x00800401, 0x21000236, 0x01ee8140, 0x00000000 },
+   { 0x00800801, 0x21100236, 0x01ee8150, 0x00000000 },
+   { 0x00800401, 0x21200236, 0x01ee8160, 0x00000000 },
+   { 0x00800801, 0x21300236, 0x01ee8170, 0x00000000 },
+   { 0x00200401, 0x204001a5, 0x00450064, 0x00000000 },
+   { 0x00000801, 0x20480061, 0x00000000, 0x000f000f },
+   { 0x01600031, 0x21401c21, 0x508d0040, 0x1218a000 },
+   { 0x00000001, 0x220001ec, 0x00000000, 0x03400340 },
+   { 0x00800401, 0x41600236, 0x00d28000, 0x00000000 },
+   { 0x00800801, 0x41610236, 0x00d28080, 0x00000000 },
+   { 0x00800401, 0x41800236, 0x00d28020, 0x00000000 },
+   { 0x00800801, 0x41810236, 0x00d280a0, 0x00000000 },
+   { 0x00800401, 0x41a00236, 0x00d28040, 0x00000000 },
+   { 0x00800801, 0x41a10236, 0x00d280c0, 0x00000000 },
+   { 0x00800401, 0x41c00236, 0x00d28060, 0x00000000 },
+   { 0x00800801, 0x41c10236, 0x00d280e0, 0x00000000 },
+   { 0x0000040c, 0x20443ca5, 0x00000044, 0x00010001 },
+   { 0x00000801, 0x20480061, 0x00000000, 0x0007000f },
+   { 0x0a600031, 0x21601c21, 0x508d0040, 0x0a18a001 },
+   { 0x00000001, 0x21400021, 0x00000140, 0x00000000 },
+   { 0x00000001, 0x21600021, 0x00000160, 0x00000000 },
+   { 0x01600031, 0x20001c20, 0x708d0000, 0x82000010 },
+   { 0x00600001, 0x20400021, 0x008d0000, 0x00000000 },
+   { 0x00000005, 0x20203e2d, 0x00000061, 0x001f001f },
+   { 0x00200009, 0x20643e2d, 0x00450064, 0x00040004 },
+   { 0x00000001, 0x203a01ed, 0x00000000, 0x00000000 },
+   { 0x01000010, 0x20003dac, 0x00000020, 0x00160016 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x0000001e },
+   { 0x01200010, 0x20003e2c, 0x0200006c, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000024 },
+   { 0x00800009, 0x25403dad, 0x00050064, 0x00020002 },
+   { 0x00000001, 0x203a01ed, 0x00000000, 0x00180018 },
+   { 0x00200040, 0x45483dad, 0x00660548, 0x00100010 },
+   { 0x00200040, 0x45523dad, 0x00660552, 0x00100010 },
+   { 0x00400040, 0x25583dad, 0x00690558, 0x00100010 },
+   { 0x00800040, 0x208035ad, 0x00b10080, 0x00b10540 },
+   { 0x00600040, 0x45403dad, 0x00ae0540, 0x00200020 },
+   { 0x00800040, 0x20a035ad, 0x00b100a0, 0x00b10540 },
+   { 0x00600040, 0x45423dad, 0x00ae0542, 0x00200020 },
+   { 0x00800040, 0x20e035ad, 0x00b100e0, 0x00b10540 },
+   { 0x00600040, 0x45403dad, 0x00ae0540, 0xffe0ffe0 },
+   { 0x00800040, 0x20c035ad, 0x00b100c0, 0x00b10540 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000014 },
+   { 0x06600010, 0x20003dac, 0x02000020, 0x00030003 },
+   { 0x00000001, 0x206c0171, 0x00000000, 0x00000000 },
+   { 0x00610001, 0x208000a5, 0x02050080, 0x00000000 },
+   { 0x00610001, 0x212000a5, 0x02090100, 0x00000000 },
+   { 0x00410001, 0x211000a5, 0x02690100, 0x00000000 },
+   { 0x00200009, 0x25403dad, 0x00450064, 0x00020002 },
+   { 0x00800040, 0x208035ad, 0x00b10080, 0x00050540 },
+   { 0x00200040, 0x40883dad, 0x00660088, 0x00200020 },
+   { 0x00200040, 0x40923dad, 0x00660092, 0x00200020 },
+   { 0x00400040, 0x20983dad, 0x00690098, 0x00200020 },
+   { 0x00200401, 0x22080060, 0x00000000, 0x03400140 },
+   { 0x00000c01, 0x220c0060, 0x00000000, 0x04400080 },
+   { 0x00000801, 0x22040060, 0x00000000, 0x01000070 },
+   { 0x00000005, 0x203e2e29, 0x00000063, 0x00010001 },
+   { 0x00000001, 0x20200169, 0x00000000, 0x00000000 },
+   { 0x0000000c, 0x2458262d, 0x0000006d, 0x00000020 },
+   { 0x00000001, 0x210e0169, 0x00000000, 0x00010001 },
+   { 0x00000005, 0x24583dad, 0x00000458, 0x00030003 },
+   { 0x01000010, 0x200035ac, 0x0000010e, 0x00000458 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000316 },
+   { 0x01000010, 0x20003dac, 0x0200010e, 0x00010001 },
+   { 0x00010401, 0x24540061, 0x02000000, 0x06c00640 },
+   { 0x00010805, 0x245a3e2d, 0x02008800, 0x007f007f },
+   { 0x00110401, 0x24540061, 0x02000000, 0x04800780 },
+   { 0x00110805, 0x245a3e2d, 0x02008804, 0x007f007f },
+   { 0x00010005, 0x245c3e2d, 0x02008800, 0x00800080 },
+   { 0x00110005, 0x245c3e2d, 0x02008804, 0x00800080 },
+   { 0x00000009, 0x245c3dad, 0x0000045c, 0x00010001 },
+   { 0x0000000c, 0x211e362d, 0x0000006c, 0x00000020 },
+   { 0x00000040, 0x240035ac, 0x0000003e, 0x0000045c },
+   { 0x01000010, 0x20003d8c, 0x00210400, 0x00010001 },
+   { 0x01000010, 0x20003d8c, 0x02210400, 0x01000100 },
+   { 0x00000001, 0x203c01ed, 0x00000000, 0x00000000 },
+   { 0x00010001, 0x203c01ed, 0x00000000, 0x00020002 },
+   { 0x00010001, 0x203c01ed, 0x02000000, 0xfffefffe },
+   { 0x00000040, 0x20780d21, 0x0000045a, 0x0208e602 },
+   { 0x00000040, 0x20782421, 0x00000078, 0x0000045c },
+   { 0x02000005, 0x20003dac, 0x0200011e, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000001c0 },
+   { 0x01000005, 0x20003dac, 0x00009800, 0x00030003 },
+   { 0x00200005, 0x24443dad, 0x00459800, 0x00030003 },
+   { 0x0020000c, 0x24403dad, 0x00459800, 0x00020002 },
+   { 0x01600006, 0x20003dac, 0x02000446, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000012 },
+   { 0x00010040, 0x22000c20, 0x02000078, 0x00400000 },
+   { 0x00110040, 0x22000c20, 0x02000078, 0x00700000 },
+   { 0x00310040, 0x20401da5, 0x02450440, 0xfffffffe },
+   { 0x00110001, 0x20480061, 0x02000000, 0x000c000c },
+   { 0x00010040, 0x20401da5, 0x02000440, 0xfffffffe },
+   { 0x00010001, 0x204401a5, 0x02000442, 0x00000000 },
+   { 0x00010001, 0x20480061, 0x02000000, 0x0007000c },
+   { 0x01600031, 0x25600021, 0x408d0040, 0x00000200 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00010040, 0x22000c20, 0x02000078, 0x00200000 },
+   { 0x00110040, 0x22000c20, 0x02000078, 0x00400000 },
+   { 0x00200001, 0x204001a5, 0x00450440, 0x00000000 },
+   { 0x00110040, 0x20441da5, 0x02000442, 0xfffffffe },
+   { 0x00110001, 0x20480061, 0x02000000, 0x000c0007 },
+   { 0x00010001, 0x20480061, 0x02000000, 0x00070007 },
+   { 0x01600031, 0x25600021, 0x408d0040, 0x00000200 },
+   { 0x00000040, 0xb80235ad, 0x00009802, 0x0000003c },
+   { 0x0020040c, 0x24403dad, 0x00459800, 0x00030003 },
+   { 0x00200805, 0x24483dad, 0x00459800, 0x00070007 },
+   { 0x01600006, 0x20003dac, 0x0000044a, 0x00000000 },
+   { 0x00200001, 0x204001a5, 0x00450440, 0x00000000 },
+   { 0x00000009, 0x20403ca5, 0x00000040, 0x00010001 },
+   { 0x00010040, 0x22000c20, 0x00000078, 0x00200010 },
+   { 0x00110040, 0x22000c20, 0x00000078, 0x00300010 },
+   { 0x00010001, 0x20480061, 0x00000000, 0x00030009 },
+   { 0x00110001, 0x20480061, 0x00000000, 0x00040009 },
+   { 0x02600031, 0x27000021, 0x408d0040, 0x00000200 },
+   { 0x01000005, 0x20003dac, 0x00009800, 0x00030003 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x0000001a },
+   { 0x00600001, 0x26220231, 0x008d05c0, 0x00000000 },
+   { 0x00600401, 0x26120231, 0x008d05b8, 0x00000000 },
+   { 0x00600801, 0x26020231, 0x008d05b0, 0x00000000 },
+   { 0x00600401, 0x25f20231, 0x008d05a8, 0x00000000 },
+   { 0x00600801, 0x25e20231, 0x008d05a0, 0x00000000 },
+   { 0x00600401, 0x25d20231, 0x008d0598, 0x00000000 },
+   { 0x00600801, 0x25c20231, 0x008d0590, 0x00000000 },
+   { 0x00600401, 0x25b20231, 0x008d0588, 0x00000000 },
+   { 0x00600801, 0x25a20231, 0x008d0580, 0x00000000 },
+   { 0x00600401, 0x25920231, 0x008d0578, 0x00000000 },
+   { 0x00600801, 0x25820231, 0x008d0570, 0x00000000 },
+   { 0x00600001, 0x25720231, 0x008d0568, 0x00000000 },
+   { 0x00600001, 0x25620231, 0x008d0560, 0x00000000 },
+   { 0x00110001, 0x220001ec, 0x02000000, 0x05820582 },
+   { 0x00010001, 0x220001ec, 0x02000000, 0x05620562 },
+   { 0x00000001, 0x220e0128, 0x00000454, 0x00000000 },
+   { 0x01800006, 0x200035ac, 0x02000446, 0x00000444 },
+   { 0x00810001, 0xbc000229, 0x02ad8000, 0x00000000 },
+   { 0x00810001, 0xbc200229, 0x02ad8020, 0x00000000 },
+   { 0x00810001, 0xbc400229, 0x02ad8040, 0x00000000 },
+   { 0x00810001, 0xbc600229, 0x02ad8060, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000012a },
+   { 0x00600001, 0x2024018d, 0x008d0200, 0x00000000 },
+   { 0x01000041, 0x245e35ad, 0x00000446, 0x00000444 },
+   { 0x00000040, 0x22022d08, 0x00000200, 0x00100010 },
+   { 0x02000005, 0x20003db0, 0x0200045e, 0x00010001 },
+   { 0x00200040, 0x22042d08, 0x00450200, 0x00200020 },
+   { 0x00400001, 0x244c0109, 0x00690200, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000066 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000064 },
+   { 0x00000040, 0x22003d08, 0x00000200, 0xffdeffde },
+   { 0x00000440, 0x22023d08, 0x00000202, 0xffeeffee },
+   { 0x00000801, 0x220c0060, 0x00000000, 0x04a00480 },
+   { 0x01000010, 0x20003db0, 0x0000045e, 0x00040004 },
+   { 0x00a02040, 0x2400462c, 0x00ad8000, 0x00ad8005 },
+   { 0x00a02048, 0x24003e2c, 0x00ad8001, 0xfffbfffb },
+   { 0x00a02048, 0x24003e2c, 0x00ad8002, 0x00140014 },
+   { 0x00a02048, 0x24003e2c, 0x00ad8003, 0x00140014 },
+   { 0x00a02048, 0xb8003e2d, 0x00ad8004, 0xfffbfffb },
+   { 0x00a02040, 0x2400462c, 0x00ad8040, 0x00ad8045 },
+   { 0x00a02048, 0x24003e2c, 0x00ad8041, 0xfffbfffb },
+   { 0x00a02048, 0x24003e2c, 0x00ad8042, 0x00140014 },
+   { 0x00a02048, 0x24003e2c, 0x00ad8043, 0x00140014 },
+   { 0x00a02048, 0xb8403e2d, 0x00ad8044, 0xfffbfffb },
+   { 0x00a02040, 0x2400462c, 0x00ad8080, 0x00ad8085 },
+   { 0x00a02048, 0x24003e2c, 0x00ad8081, 0xfffbfffb },
+   { 0x00a02048, 0x24003e2c, 0x00ad8082, 0x00140014 },
+   { 0x00a02048, 0x24003e2c, 0x00ad8083, 0x00140014 },
+   { 0x00a02048, 0xb8803e2d, 0x00ad8084, 0xfffbfffb },
+   { 0x00600040, 0x2400462c, 0x008d80c0, 0x008d80c5 },
+   { 0x00600048, 0x24003e2c, 0x008d80c1, 0xfffbfffb },
+   { 0x00600048, 0x24003e2c, 0x008d80c2, 0x00140014 },
+   { 0x00600048, 0x24003e2c, 0x008d80c3, 0x00140014 },
+   { 0x00600048, 0xb8c03e2d, 0x008d80c4, 0xfffbfffb },
+   { 0x00000401, 0x22000060, 0x00000000, 0x04a00480 },
+   { 0x00000c01, 0x22040060, 0x00000000, 0x04a00490 },
+   { 0x00010802, 0x220c2d28, 0x00000454, 0x04800480 },
+   { 0x00a02040, 0x24003dac, 0x00b18000, 0x02000200 },
+   { 0x00800048, 0x24003dac, 0x01ed8800, 0xfffbfffb },
+   { 0x00800048, 0x24203dac, 0x01ed8820, 0xfffbfffb },
+   { 0x00a02048, 0x24003dac, 0x00b18020, 0x00140014 },
+   { 0x00800048, 0x24003dac, 0x01ed8820, 0x00140014 },
+   { 0x00800048, 0x24203dac, 0x01ed8840, 0x00140014 },
+   { 0x00a02048, 0x24003dac, 0x00b18040, 0xfffbfffb },
+   { 0x00800048, 0x24003dac, 0x01ed8840, 0x00010001 },
+   { 0x00800048, 0x24203dac, 0x01ed8860, 0x00010001 },
+   { 0x8080000c, 0xd8003d91, 0x00b10400, 0x000a000a },
+   { 0x8080100c, 0xd8203d91, 0x00b10420, 0x000a000a },
+   { 0x00a02040, 0x24003dac, 0x00b18040, 0x02000200 },
+   { 0x00800048, 0x24003dac, 0x01ed8840, 0xfffbfffb },
+   { 0x00800048, 0x24203dac, 0x01ed8860, 0xfffbfffb },
+   { 0x00a02048, 0x24003dac, 0x00b18060, 0x00140014 },
+   { 0x00800048, 0x24003dac, 0x01ed8860, 0x00140014 },
+   { 0x00800048, 0x24203dac, 0x01ed8880, 0x00140014 },
+   { 0x00a02048, 0x24003dac, 0x00b18080, 0xfffbfffb },
+   { 0x00800048, 0x24003dac, 0x01ed8880, 0x00010001 },
+   { 0x00800048, 0x24203dac, 0x01ed88a0, 0x00010001 },
+   { 0x8080000c, 0xd8403d91, 0x00b10400, 0x000a000a },
+   { 0x8080100c, 0xd8603d91, 0x00b10420, 0x000a000a },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x000000b4 },
+   { 0x01000010, 0x20003db0, 0x00000444, 0x00000000 },
+   { 0x01000010, 0x20003db0, 0x02000446, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x0000004c },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000004a },
+   { 0x00400040, 0x22003d28, 0x0069044c, 0xfffefffe },
+   { 0x03400010, 0x20003dac, 0x00000446, 0x00020002 },
+   { 0x01000010, 0x20003db0, 0x02000444, 0x00020002 },
+   { 0x00410040, 0x22002d08, 0x00690200, 0x00100010 },
+   { 0x01000010, 0x20003dac, 0x00000446, 0x00000000 },
+   { 0x00010002, 0x220c2d28, 0x02000454, 0x04800480 },
+   { 0x00800040, 0x24003e2c, 0x01ed8000, 0x00100010 },
+   { 0x00800040, 0x24203e2c, 0x01ed8020, 0x00100010 },
+   { 0x00800048, 0x24003e2c, 0x01ed8001, 0xfffbfffb },
+   { 0x00800048, 0x24203e2c, 0x01ed8021, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01ed8002, 0x00140014 },
+   { 0x00800048, 0x24203e2c, 0x01ed8022, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01ed8003, 0x00140014 },
+   { 0x00800048, 0x24203e2c, 0x01ed8023, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01ed8004, 0xfffbfffb },
+   { 0x00800048, 0x24203e2c, 0x01ed8024, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01ed8005, 0x00010001 },
+   { 0x00800048, 0x24203e2c, 0x01ed8025, 0x00010001 },
+   { 0x8080000c, 0xd8003d91, 0x00b10400, 0x00050005 },
+   { 0x8080100c, 0xd8203d91, 0x00b10420, 0x00050005 },
+   { 0x00800040, 0x24003e2c, 0x01ed8040, 0x00100010 },
+   { 0x00800040, 0x24203e2c, 0x01ed8060, 0x00100010 },
+   { 0x00800048, 0x24003e2c, 0x01ed8041, 0xfffbfffb },
+   { 0x00800048, 0x24203e2c, 0x01ed8061, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01ed8042, 0x00140014 },
+   { 0x00800048, 0x24203e2c, 0x01ed8062, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01ed8043, 0x00140014 },
+   { 0x00800048, 0x24203e2c, 0x01ed8063, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01ed8044, 0xfffbfffb },
+   { 0x00800048, 0x24203e2c, 0x01ed8064, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01ed8045, 0x00010001 },
+   { 0x00800048, 0x24203e2c, 0x01ed8065, 0x00010001 },
+   { 0x8080000c, 0xd8403d91, 0x00b10400, 0x00050005 },
+   { 0x8080100c, 0xd8603d91, 0x00b10420, 0x00050005 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000004 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x0000005c },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000062 },
+   { 0x01000010, 0x20003db0, 0x00000446, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000042 },
+   { 0x00200401, 0x22000128, 0x0066044c, 0x00000000 },
+   { 0x00200c01, 0x22040128, 0x0045044e, 0x00000000 },
+   { 0x00000801, 0x220c0128, 0x00000454, 0x00000000 },
+   { 0x03400010, 0x20003dac, 0x02000444, 0x00020002 },
+   { 0x01000010, 0x20003dac, 0x00000444, 0x00000000 },
+   { 0x00410040, 0x22002d08, 0x02690200, 0x00010001 },
+   { 0x01000010, 0x20003db0, 0x02000446, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000002 },
+   { 0x00110001, 0x220c0168, 0x02000000, 0x04800480 },
+   { 0x00a02040, 0x24003e2c, 0x00ad83e0, 0x00100010 },
+   { 0x00800048, 0x24003e2c, 0x01ed8be0, 0xfffbfffb },
+   { 0x00800048, 0x24203e2c, 0x01ed8800, 0xfffbfffb },
+   { 0x00a02048, 0x24003e2c, 0x00ad8000, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01ed8800, 0x00140014 },
+   { 0x00800048, 0x24203e2c, 0x01ed8820, 0x00140014 },
+   { 0x00a02048, 0x24003e2c, 0x00ad8020, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01ed8820, 0x00010001 },
+   { 0x00800048, 0x24203e2c, 0x01ed8840, 0x00010001 },
+   { 0x8080000c, 0xd8003d91, 0x00b10400, 0x00050005 },
+   { 0x8080100c, 0xd8203d91, 0x00b10420, 0x00050005 },
+   { 0x00a02040, 0x24003e2c, 0x00ad8020, 0x00100010 },
+   { 0x00800048, 0x24003e2c, 0x01ed8820, 0xfffbfffb },
+   { 0x00800048, 0x24203e2c, 0x01ed8840, 0xfffbfffb },
+   { 0x00a02048, 0x24003e2c, 0x00ad8040, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01ed8840, 0x00140014 },
+   { 0x00800048, 0x24203e2c, 0x01ed8860, 0x00140014 },
+   { 0x00a02048, 0x24003e2c, 0x00ad8060, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01ed8860, 0x00010001 },
+   { 0x00800048, 0x24203e2c, 0x01ed8880, 0x00010001 },
+   { 0x8080000c, 0xd8403d91, 0x00b10400, 0x00050005 },
+   { 0x8080100c, 0xd8603d91, 0x00b10420, 0x00050005 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000016 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001c },
+   { 0x00200401, 0x22000128, 0x0045044c, 0x00000000 },
+   { 0x00000801, 0x220c0128, 0x00000454, 0x00000000 },
+   { 0x01200010, 0x20003dac, 0x00000444, 0x00030003 },
+   { 0x01200010, 0x20003dac, 0x02000446, 0x00030003 },
+   { 0x00210040, 0x22002d08, 0x00450200, 0x00010001 },
+   { 0x00210040, 0x22002d08, 0x02450200, 0x00100010 },
+   { 0x00800001, 0xb8000229, 0x01ed8000, 0x00000000 },
+   { 0x00800001, 0xb8200229, 0x01ed8020, 0x00000000 },
+   { 0x00800001, 0xb8400229, 0x01ed8040, 0x00000000 },
+   { 0x00800001, 0xb8600229, 0x01ed8060, 0x00000000 },
+   { 0x80800042, 0xd8004631, 0x00d29800, 0x00d20480 },
+   { 0x80800042, 0xd8204631, 0x00d29820, 0x00d204a0 },
+   { 0x80800042, 0xd8404631, 0x00d29840, 0x00d204c0 },
+   { 0x80800042, 0xd8604631, 0x00d29860, 0x00d204e0 },
+   { 0x00600001, 0x220001ac, 0x008d0024, 0x00000000 },
+   { 0x00200040, 0x244c3dad, 0x00450448, 0xfff8fff8 },
+   { 0x00000401, 0x220001ec, 0x00000000, 0x07000700 },
+   { 0x00000c01, 0x220201e8, 0x00000000, 0x07100710 },
+   { 0x00000801, 0x220e0128, 0x00000456, 0x00000000 },
+   { 0x00000441, 0x254635ad, 0x00000448, 0x0000044a },
+   { 0x00000c41, 0x254025ad, 0x0000444c, 0x0000444e },
+   { 0x00000c41, 0x254225ad, 0x00000448, 0x0000444e },
+   { 0x00000841, 0x254435ad, 0x0000444c, 0x0000044a },
+   { 0x00200040, 0x244c3d09, 0x00450200, 0x00100010 },
+   { 0x00800041, 0x24002628, 0x00ad8000, 0x00000540 },
+   { 0x00800041, 0x24202628, 0x00ad8020, 0x00000540 },
+   { 0x00800048, 0x24002628, 0x00ad8002, 0x00000542 },
+   { 0x00800048, 0x24202628, 0x00ad8022, 0x00000542 },
+   { 0x00200001, 0x22000128, 0x0045044c, 0x00000000 },
+   { 0x00800048, 0x24002628, 0x01ed8000, 0x00000544 },
+   { 0x00800048, 0x24202628, 0x01ed8020, 0x00000544 },
+   { 0x00800048, 0xbc002629, 0x01ed8002, 0x00000546 },
+   { 0x00801048, 0xbc202629, 0x01ed8022, 0x00000546 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000128 },
+   { 0x00000001, 0x20220169, 0x00000000, 0x00040004 },
+   { 0x00200005, 0x24443dad, 0x00459800, 0x00030003 },
+   { 0x0020000c, 0x24403dad, 0x00459800, 0x00020002 },
+   { 0x01600006, 0x20003dac, 0x02000446, 0x00000000 },
+   { 0x00010040, 0x22000c20, 0x02000078, 0x00200000 },
+   { 0x00110040, 0x22000c20, 0x02000078, 0x00500000 },
+   { 0x00310040, 0x20401da5, 0x02450440, 0xfffffffe },
+   { 0x00110001, 0x20480061, 0x02000000, 0x00080008 },
+   { 0x00010040, 0x20401da5, 0x02000440, 0xfffffffe },
+   { 0x00010001, 0x204401a5, 0x02000442, 0x00000000 },
+   { 0x00010001, 0x20480061, 0x02000000, 0x00030008 },
+   { 0x01600031, 0x25600021, 0x408d0040, 0x00000200 },
+   { 0x00000040, 0xb80235ad, 0x00009802, 0x0000003c },
+   { 0x00000040, 0x22000c20, 0x00000078, 0x00100010 },
+   { 0x0020040c, 0x24403dad, 0x00459800, 0x00030003 },
+   { 0x00200805, 0x24483dad, 0x00459800, 0x00070007 },
+   { 0x00200001, 0x204001a5, 0x00450440, 0x00000000 },
+   { 0x00000009, 0x20403ca5, 0x00000040, 0x00010001 },
+   { 0x00000001, 0x20480061, 0x00000000, 0x00020005 },
+   { 0x02600031, 0x27000021, 0x408d0040, 0x00000200 },
+   { 0x00110001, 0x220001ec, 0x02000000, 0x05820582 },
+   { 0x00010001, 0x220001ec, 0x02000000, 0x05620562 },
+   { 0x00000001, 0x220e0128, 0x00000454, 0x00000000 },
+   { 0x01800006, 0x200035ac, 0x02000446, 0x00000444 },
+   { 0x00410001, 0xbc000229, 0x02698000, 0x00000000 },
+   { 0x00410001, 0xbc100229, 0x02698010, 0x00000000 },
+   { 0x00410001, 0xbc200229, 0x02698020, 0x00000000 },
+   { 0x00410001, 0xbc300229, 0x02698030, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000be },
+   { 0x00600001, 0x2024018d, 0x008d0200, 0x00000000 },
+   { 0x01000041, 0x245e35ad, 0x00000446, 0x00000444 },
+   { 0x02000005, 0x20003db0, 0x0200045e, 0x00010001 },
+   { 0x00000040, 0x22022d08, 0x00000200, 0x00100010 },
+   { 0x00200040, 0x22042d08, 0x00450200, 0x00200020 },
+   { 0x00400001, 0x244c0109, 0x00690200, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000044 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000042 },
+   { 0x00000040, 0x22003d08, 0x00000200, 0xffdeffde },
+   { 0x00000440, 0x22023d08, 0x00000202, 0xffeeffee },
+   { 0x00000801, 0x220c0060, 0x00000000, 0x04d004c0 },
+   { 0x01000010, 0x20003db0, 0x0000045e, 0x00040004 },
+   { 0x00802040, 0x2400462c, 0x00a98000, 0x00a98005 },
+   { 0x00802048, 0x24003e2c, 0x00a98001, 0xfffbfffb },
+   { 0x00802048, 0x24003e2c, 0x00a98002, 0x00140014 },
+   { 0x00802048, 0x24003e2c, 0x00a98003, 0x00140014 },
+   { 0x00802048, 0xb8003e2d, 0x00a98004, 0xfffbfffb },
+   { 0x00802040, 0x2400462c, 0x00a98040, 0x00a98045 },
+   { 0x00802048, 0x24003e2c, 0x00a98041, 0xfffbfffb },
+   { 0x00802048, 0x24003e2c, 0x00a98042, 0x00140014 },
+   { 0x00802048, 0x24003e2c, 0x00a98043, 0x00140014 },
+   { 0x00802048, 0xb8203e2d, 0x00a98044, 0xfffbfffb },
+   { 0x00400040, 0x2400462c, 0x00698080, 0x00698085 },
+   { 0x00400048, 0x24003e2c, 0x00698081, 0xfffbfffb },
+   { 0x00400048, 0x24003e2c, 0x00698082, 0x00140014 },
+   { 0x00400048, 0x24003e2c, 0x00698083, 0x00140014 },
+   { 0x00400048, 0xb8403e2d, 0x00698084, 0xfffbfffb },
+   { 0x00000401, 0x220c0060, 0x00000000, 0x04e004d0 },
+   { 0x00000c01, 0x22000060, 0x00000000, 0x04d004c8 },
+   { 0x00000c01, 0x22040060, 0x00000000, 0x04e004d8 },
+   { 0x00000801, 0x22080060, 0x00000000, 0x04f004e8 },
+   { 0x00800040, 0x24003dac, 0x00b104c0, 0x02000200 },
+   { 0x00800048, 0x24003dac, 0x00b104e0, 0xfffbfffb },
+   { 0x00800048, 0x24003dac, 0x01ed9800, 0x00140014 },
+   { 0x00010001, 0x220c0168, 0x00000000, 0x05400540 },
+   { 0x00110001, 0x220c0168, 0x00000000, 0x04c004c0 },
+   { 0x00800048, 0x24003dac, 0x01e98000, 0xfffbfffb },
+   { 0x00800048, 0x24003dac, 0x01e98020, 0x00010001 },
+   { 0x00800048, 0x24003dac, 0x01e98800, 0x00140014 },
+   { 0x8080000c, 0xd8003d91, 0x00b10400, 0x000a000a },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000060 },
+   { 0x01000010, 0x20003db0, 0x00000444, 0x00000000 },
+   { 0x01000010, 0x20003db0, 0x02000446, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000022 },
+   { 0x00400040, 0x22003d28, 0x0069044c, 0xfffefffe },
+   { 0x03400010, 0x20003dac, 0x00000446, 0x00020002 },
+   { 0x01000010, 0x20003db0, 0x02000444, 0x00020002 },
+   { 0x00410040, 0x22002d08, 0x00690200, 0x00100010 },
+   { 0x01000010, 0x20003dac, 0x00000446, 0x00000000 },
+   { 0x00010001, 0x220e0168, 0x02000000, 0x05400540 },
+   { 0x00110001, 0x220e0168, 0x02000000, 0x04c004c0 },
+   { 0x00800040, 0x24003e2c, 0x01e98000, 0x00100010 },
+   { 0x00800048, 0x24003e2c, 0x01e98001, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01e98002, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01e98003, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01e98004, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01e98005, 0x00010001 },
+   { 0x8080000c, 0xdc003d91, 0x00b10400, 0x00050005 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000004 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000036 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000036 },
+   { 0x01000010, 0x20003db0, 0x00000446, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x03600010, 0x20003dac, 0x02000444, 0x00020002 },
+   { 0x00400401, 0x22000128, 0x0069044c, 0x00000000 },
+   { 0x00400840, 0x22083d2c, 0x0069044c, 0x00100010 },
+   { 0x00610040, 0x22002d08, 0x02690200, 0x00010001 },
+   { 0x01000010, 0x20003dac, 0x00000444, 0x00000000 },
+   { 0x01000010, 0x20003db0, 0x02000446, 0x00020002 },
+   { 0x00800040, 0x24003e2c, 0x01e983e0, 0x00100010 },
+   { 0x00800048, 0x24003e2c, 0x01e98000, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01e98020, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01e993e0, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01e99000, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01e99020, 0x00010001 },
+   { 0x00000001, 0x220e0168, 0x00000000, 0x05400540 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000002 },
+   { 0x00110001, 0x220e0168, 0x02000000, 0x04c004c0 },
+   { 0x8080000c, 0xdc003d91, 0x00b10400, 0x00050005 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000000e },
+   { 0x00400001, 0x22000128, 0x0069044c, 0x00000000 },
+   { 0x01400010, 0x20003dac, 0x00000444, 0x00030003 },
+   { 0x01400010, 0x20003dac, 0x02000446, 0x00030003 },
+   { 0x00410040, 0x22002d08, 0x00690200, 0x00010001 },
+   { 0x00410040, 0x22002d08, 0x02690200, 0x00100010 },
+   { 0x00800001, 0x25400229, 0x01e98000, 0x00000000 },
+   { 0x80800042, 0x45404631, 0x00d20540, 0x00d204c0 },
+   { 0x00000001, 0x220c0128, 0x00000454, 0x00000000 },
+   { 0x00400001, 0xd8000231, 0x008a0540, 0x00000000 },
+   { 0x00400001, 0xd8100231, 0x008a0548, 0x00000000 },
+   { 0x00400001, 0xd8200231, 0x008a0550, 0x00000000 },
+   { 0x00400001, 0xd8300231, 0x008a0558, 0x00000000 },
+   { 0x00600001, 0x220001ac, 0x008d0024, 0x00000000 },
+   { 0x00200040, 0x244c3dad, 0x00450448, 0xfff8fff8 },
+   { 0x00000401, 0x220001ec, 0x00000000, 0x07000700 },
+   { 0x00000801, 0x220e0128, 0x00000456, 0x00000000 },
+   { 0x00000441, 0x254635ad, 0x00000448, 0x0000044a },
+   { 0x00000c41, 0x254025ad, 0x0000444c, 0x0000444e },
+   { 0x00000c41, 0x254225ad, 0x00000448, 0x0000444e },
+   { 0x00000841, 0x254435ad, 0x0000444c, 0x0000044a },
+   { 0x00600041, 0x24002628, 0x00898000, 0x00000540 },
+   { 0x00600048, 0x24002628, 0x00898002, 0x00000542 },
+   { 0x00600048, 0x24002628, 0x00898008, 0x00000544 },
+   { 0x00600048, 0x2540262d, 0x0089800a, 0x00000546 },
+   { 0x00400401, 0xbc0001a9, 0x00690540, 0x00000000 },
+   { 0x00400801, 0xbc1001a9, 0x00690548, 0x00000000 },
+   { 0x01000010, 0x20003d2c, 0x00000022, 0x00030003 },
+   { 0x01000040, 0x20223d29, 0x02000022, 0xffffffff },
+   { 0x00000040, 0x220c3d8c, 0x0000020c, 0x00080008 },
+   { 0x00110040, 0x24540c21, 0x00000454, 0x00080008 },
+   { 0x00010040, 0x24540c21, 0x00000454, 0x00180038 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0xfffffee4 },
+   { 0x01800010, 0x20003dac, 0x0200010e, 0x00010001 },
+   { 0x00000040, 0x220c3d8c, 0x0000020c, 0xffe0ffe0 },
+   { 0x00010001, 0x24540061, 0x02000000, 0x06c00640 },
+   { 0x00110001, 0x24540061, 0x02000000, 0x04800780 },
+   { 0x00000001, 0x220e0128, 0x00000456, 0x00000000 },
+   { 0x00800040, 0x24003dac, 0x00b19c00, 0x00200020 },
+   { 0x00800040, 0x24203dac, 0x00b19c20, 0x00200020 },
+   { 0x8080000c, 0xdc003d91, 0x00b10400, 0x00060006 },
+   { 0x8080000c, 0xdc203d91, 0x00b10420, 0x00060006 },
+   { 0x02000040, 0x210e3d29, 0x0200010e, 0xffffffff },
+   { 0x00000040, 0x220c3d8c, 0x0000020c, 0x00040004 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0xfffffce0 },
+   { 0x01000005, 0x25643e2d, 0x00000060, 0x00c000c0 },
+   { 0x01000010, 0x20003dac, 0x02000458, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000016 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000000c },
+   { 0x01000010, 0x20003dac, 0x00000458, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000086 },
+   { 0x00a02001, 0x46400231, 0x00d20780, 0x00000000 },
+   { 0x00a02001, 0x46800231, 0x00d207c0, 0x00000000 },
+   { 0x00a02001, 0x46c00231, 0x00d20480, 0x00000000 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000007e },
+   { 0x80a02042, 0x46404631, 0x00d20640, 0x00d20780 },
+   { 0x80a02042, 0x46804631, 0x00d20680, 0x00d207c0 },
+   { 0x80a02042, 0x46c04631, 0x00d206c0, 0x00d20480 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000076 },
+   { 0x01000010, 0x20003dac, 0x02000564, 0x00800080 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x0000000e },
+   { 0x01000010, 0x20003dac, 0x00000458, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0xffffffe4 },
+   { 0x00200401, 0x256801ed, 0x00000000, 0x00200020 },
+   { 0x00200801, 0x256c01ed, 0x00000000, 0x00060006 },
+   { 0x00400001, 0x258001ed, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x45a001ad, 0x00058c00, 0x00000000 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000028 },
+   { 0x02600005, 0x20006e28, 0x02008c0c, 0x88848421 },
+   { 0x0220000c, 0x25663dad, 0x00000458, 0x00010001 },
+   { 0x0000000c, 0x25643dad, 0x00000564, 0x00060006 },
+   { 0x00310001, 0x25c401ad, 0x00000458, 0x00000000 },
+   { 0x00210001, 0x25c00061, 0x00000000, 0x00010001 },
+   { 0x00310040, 0x25c03dad, 0x004545c4, 0x00010001 },
+   { 0x00610001, 0x25a00061, 0x02000000, 0x00000080 },
+   { 0x00710001, 0x45a002ad, 0x02ae8c00, 0x00000000 },
+   { 0x00710001, 0x45a202ad, 0x02ae8c01, 0x00000000 },
+   { 0x00800041, 0x25a035ad, 0x00b105a0, 0x000905c0 },
+   { 0x01200010, 0x20003e2c, 0x0245006e, 0x00000000 },
+   { 0x00310009, 0x244c45ad, 0x02000564, 0x0045006e },
+   { 0x00210001, 0x244c01ed, 0x02000000, 0x00000000 },
+   { 0x0031000c, 0x244c3dad, 0x0245044c, 0x00010001 },
+   { 0x00200009, 0x256835ad, 0x0045044c, 0x00000566 },
+   { 0x00210040, 0x256835ad, 0x02450568, 0x00000566 },
+   { 0x00200040, 0x256c362d, 0x0045006e, 0x00000566 },
+   { 0x00400040, 0x240035ac, 0x00ab05a2, 0x00ab05a6 },
+   { 0x00400040, 0x2400358c, 0x00690400, 0x00000566 },
+   { 0x0040000c, 0x2580358d, 0x00690400, 0x00000566 },
+   { 0x00800041, 0x2400362c, 0x00d20640, 0x000005a0 },
+   { 0x00800041, 0x2420362c, 0x00d20660, 0x000005a0 },
+   { 0x00800048, 0x2400362c, 0x00d20780, 0x000005a4 },
+   { 0x00800048, 0x2420362c, 0x00d207a0, 0x000005a4 },
+   { 0x00800040, 0x2400358c, 0x00b10400, 0x00000568 },
+   { 0x00800040, 0x2420358c, 0x00b10420, 0x00000568 },
+   { 0x0080000c, 0x24c0358d, 0x00b10400, 0x0000056c },
+   { 0x0080000c, 0x24e0358d, 0x00b10420, 0x0000056c },
+   { 0x80800040, 0x464035b1, 0x00b104c0, 0x00000580 },
+   { 0x80800040, 0x466035b1, 0x00b104e0, 0x00000580 },
+   { 0x00800041, 0x2400362c, 0x00d20680, 0x000005a0 },
+   { 0x00800041, 0x2420362c, 0x00d206a0, 0x000005a0 },
+   { 0x00800048, 0x2400362c, 0x00d207c0, 0x000005a4 },
+   { 0x00800048, 0x2420362c, 0x00d207e0, 0x000005a4 },
+   { 0x00800040, 0x2400358c, 0x00b10400, 0x00000568 },
+   { 0x00800040, 0x2420358c, 0x00b10420, 0x00000568 },
+   { 0x0080000c, 0x24c0358d, 0x00b10400, 0x0000056c },
+   { 0x0080000c, 0x24e0358d, 0x00b10420, 0x0000056c },
+   { 0x80800040, 0x468035b1, 0x00b104c0, 0x00000580 },
+   { 0x80800040, 0x46a035b1, 0x00b104e0, 0x00000580 },
+   { 0x00800041, 0x2400362c, 0x00d206c0, 0x000705a8 },
+   { 0x00800041, 0x2420362c, 0x00d206e0, 0x000705a8 },
+   { 0x00800048, 0x2400362c, 0x00d20480, 0x000705ac },
+   { 0x00800048, 0x2420362c, 0x00d204a0, 0x000705ac },
+   { 0x00800040, 0x2400358c, 0x00b10400, 0x0000056a },
+   { 0x00800040, 0x2420358c, 0x00b10420, 0x0000056a },
+   { 0x0080000c, 0x24c0358d, 0x00b10400, 0x0000056e },
+   { 0x0080000c, 0x24e0358d, 0x00b10420, 0x0000056e },
+   { 0x80800040, 0x46c035b1, 0x00b104c0, 0x00050582 },
+   { 0x80800040, 0x46e035b1, 0x00b104e0, 0x00050582 },
+   { 0x01800005, 0x20003d2c, 0x02000020, 0x00020002 },
+   { 0x80800040, 0xd00045b1, 0x00b19000, 0x00d20640 },
+   { 0x80800040, 0xd02045b1, 0x00b19020, 0x00d20660 },
+   { 0x80800040, 0xd04045b1, 0x00b19040, 0x00d20680 },
+   { 0x80800040, 0xd06045b1, 0x00b19060, 0x00d206a0 },
+   { 0x00000040, 0x22083d8c, 0x00000208, 0x00800080 },
+   { 0x80400040, 0xd40045b1, 0x00699400, 0x00ab06c0 },
+   { 0x80400040, 0xd48045b1, 0x00699480, 0x00ab06c2 },
+   { 0x80400040, 0xd42045b1, 0x00699420, 0x00ab06e0 },
+   { 0x80400040, 0xd4a045b1, 0x006994a0, 0x00ab06e2 },
+   { 0x80400040, 0xd41045b1, 0x00699410, 0x00ab06d0 },
+   { 0x80400040, 0xd49045b1, 0x00699490, 0x00ab06d2 },
+   { 0x80400040, 0xd43045b1, 0x00699430, 0x00ab06f0 },
+   { 0x80400040, 0xd4b045b1, 0x006994b0, 0x00ab06f2 },
+   { 0x00000040, 0x220a3d8c, 0x0000020a, 0x00080008 },
+   { 0x00110040, 0x220a3d8c, 0x0200020a, 0x00300030 },
+   { 0x01000010, 0x20003d2c, 0x02000020, 0x00060006 },
+   { 0x00000040, 0x20203d29, 0x00000020, 0x00020002 },
+   { 0x00000040, 0x22040c00, 0x00000204, 0x00100001 },
+   { 0x00000040, 0x220c358c, 0x0000020c, 0x0000003a },
+   { 0x00110220, 0x34001c00, 0x02001400, 0xfffffc1e },
+   { 0x00000001, 0x220001ec, 0x00000000, 0x01400140 },
+   { 0x00000001, 0x220201ec, 0x00000000, 0x01c001c0 },
+   { 0x00800401, 0x20400236, 0x01ee8000, 0x00000000 },
+   { 0x00800801, 0x20500236, 0x01ee8010, 0x00000000 },
+   { 0x00800401, 0x20600236, 0x01ee8020, 0x00000000 },
+   { 0x00800801, 0x20700236, 0x01ee8030, 0x00000000 },
+   { 0x00800401, 0x20800236, 0x01ee8040, 0x00000000 },
+   { 0x00800801, 0x20900236, 0x01ee8050, 0x00000000 },
+   { 0x00800401, 0x20a00236, 0x01ee8060, 0x00000000 },
+   { 0x00800801, 0x20b00236, 0x01ee8070, 0x00000000 },
+   { 0x00800401, 0x20c00236, 0x01ee8100, 0x00000000 },
+   { 0x00800801, 0x20d00236, 0x01ee8110, 0x00000000 },
+   { 0x00800401, 0x20e00236, 0x01ee8120, 0x00000000 },
+   { 0x00800801, 0x20f00236, 0x01ee8130, 0x00000000 },
+   { 0x00800401, 0x21000236, 0x01ee8140, 0x00000000 },
+   { 0x00800801, 0x21100236, 0x01ee8150, 0x00000000 },
+   { 0x00800401, 0x21200236, 0x01ee8160, 0x00000000 },
+   { 0x00800801, 0x21300236, 0x01ee8170, 0x00000000 },
+   { 0x00000040, 0x22000d20, 0x00000062, 0x1218a600 },
+   { 0x00200401, 0x204001a5, 0x00450064, 0x00000000 },
+   { 0x00000801, 0x20480061, 0x00000000, 0x000f000f },
+   { 0x01600031, 0x21400021, 0x508d0040, 0x00000200 },
+   { 0x00000001, 0x220001ec, 0x00000000, 0x03400340 },
+   { 0x00800401, 0x41600236, 0x00d28000, 0x00000000 },
+   { 0x00800801, 0x41610236, 0x00d28080, 0x00000000 },
+   { 0x00800401, 0x41800236, 0x00d28020, 0x00000000 },
+   { 0x00800801, 0x41810236, 0x00d280a0, 0x00000000 },
+   { 0x00800401, 0x41a00236, 0x00d28040, 0x00000000 },
+   { 0x00800801, 0x41a10236, 0x00d280c0, 0x00000000 },
+   { 0x00800401, 0x41c00236, 0x00d28060, 0x00000000 },
+   { 0x00800801, 0x41c10236, 0x00d280e0, 0x00000000 },
+   { 0x00000040, 0x22000d20, 0x00000062, 0x0a18a601 },
+   { 0x0000040c, 0x20443ca5, 0x00000044, 0x00010001 },
+   { 0x00000801, 0x20480061, 0x00000000, 0x0007000f },
+   { 0x0a600031, 0x21600021, 0x508d0040, 0x00000200 },
+   { 0x00000001, 0x21400021, 0x00000140, 0x00000000 },
+   { 0x00000001, 0x21600021, 0x00000160, 0x00000000 },
+   { 0x01600031, 0x20001c20, 0x708d0000, 0x82000010 },
+   { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x20400021, 0x008d0000, 0x00000000 },
+   { 0x00000005, 0x20203e2d, 0x00000061, 0x001f001f },
+   { 0x00200009, 0x20643e2d, 0x00450064, 0x00040004 },
+   { 0x00000001, 0x203a01ed, 0x00000000, 0x00000000 },
+   { 0x01000010, 0x20003dac, 0x00000020, 0x00160016 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x0000001e },
+   { 0x01200010, 0x20003e2c, 0x0200006c, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000024 },
+   { 0x00800009, 0x25403dad, 0x00050064, 0x00020002 },
+   { 0x00000001, 0x203a01ed, 0x00000000, 0x00180018 },
+   { 0x00200040, 0x45483dad, 0x00660548, 0x00100010 },
+   { 0x00200040, 0x45523dad, 0x00660552, 0x00100010 },
+   { 0x00400040, 0x25583dad, 0x00690558, 0x00100010 },
+   { 0x00800040, 0x208035ad, 0x00b10080, 0x00b10540 },
+   { 0x00600040, 0x45403dad, 0x00ae0540, 0x00200020 },
+   { 0x00800040, 0x20a035ad, 0x00b100a0, 0x00b10540 },
+   { 0x00600040, 0x45423dad, 0x00ae0542, 0x00200020 },
+   { 0x00800040, 0x20e035ad, 0x00b100e0, 0x00b10540 },
+   { 0x00600040, 0x45403dad, 0x00ae0540, 0xffe0ffe0 },
+   { 0x00800040, 0x20c035ad, 0x00b100c0, 0x00b10540 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000014 },
+   { 0x06600010, 0x20003dac, 0x02000020, 0x00030003 },
+   { 0x00000001, 0x206c0171, 0x00000000, 0x00000000 },
+   { 0x00610001, 0x208000a5, 0x02050080, 0x00000000 },
+   { 0x00610001, 0x212000a5, 0x02090100, 0x00000000 },
+   { 0x00410001, 0x211000a5, 0x02690100, 0x00000000 },
+   { 0x00200009, 0x25403dad, 0x00450064, 0x00020002 },
+   { 0x00800040, 0x208035ad, 0x00b10080, 0x00050540 },
+   { 0x00200040, 0x40883dad, 0x00660088, 0x00200020 },
+   { 0x00200040, 0x40923dad, 0x00660092, 0x00200020 },
+   { 0x00400040, 0x20983dad, 0x00690098, 0x00200020 },
+   { 0x00200401, 0x22080060, 0x00000000, 0x03400140 },
+   { 0x00000c01, 0x220c0060, 0x00000000, 0x04400080 },
+   { 0x00000801, 0x22040060, 0x00000000, 0x01000070 },
+   { 0x02000005, 0x20002e28, 0x00000061, 0x00400040 },
+   { 0x00010005, 0x203e2e29, 0x00000063, 0x00010001 },
+   { 0x00110001, 0x203e0169, 0x00000000, 0x00030003 },
+   { 0x00000001, 0x20200169, 0x00000000, 0x00000000 },
+   { 0x0000000c, 0x2458262d, 0x0000006d, 0x00000020 },
+   { 0x00000001, 0x210e0169, 0x00000000, 0x00010001 },
+   { 0x00000005, 0x24583dad, 0x00000458, 0x00030003 },
+   { 0x01000010, 0x200035ac, 0x0000010e, 0x00000458 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x0000031a },
+   { 0x01000010, 0x20003dac, 0x0200010e, 0x00010001 },
+   { 0x00010401, 0x24540061, 0x02000000, 0x06c00640 },
+   { 0x00010805, 0x245a3e2d, 0x02008800, 0x007f007f },
+   { 0x00110401, 0x24540061, 0x02000000, 0x04800780 },
+   { 0x00110805, 0x245a3e2d, 0x02008804, 0x007f007f },
+   { 0x00010005, 0x245c3e2d, 0x02008800, 0x00800080 },
+   { 0x00110005, 0x245c3e2d, 0x02008804, 0x00800080 },
+   { 0x00000009, 0x245c3dad, 0x0000045c, 0x00010001 },
+   { 0x0000000c, 0x211e362d, 0x0000006c, 0x00000020 },
+   { 0x00000040, 0x240035ac, 0x0000003e, 0x0000045c },
+   { 0x01000010, 0x20003d8c, 0x00210400, 0x00010001 },
+   { 0x01000010, 0x20003d8c, 0x02210400, 0x01000100 },
+   { 0x00000001, 0x203c01ed, 0x00000000, 0x00000000 },
+   { 0x00010001, 0x203c01ed, 0x00000000, 0x00020002 },
+   { 0x00010001, 0x203c01ed, 0x02000000, 0xfffefffe },
+   { 0x02000005, 0x20002e28, 0x00000061, 0x00400040 },
+   { 0x00010040, 0x244c0d21, 0x0000045a, 0x0208e602 },
+   { 0x00110040, 0x20780d21, 0x0000045a, 0x0208a002 },
+   { 0x00010040, 0x20782421, 0x0000044c, 0x0000045c },
+   { 0x02000005, 0x20003dac, 0x0200011e, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000001c0 },
+   { 0x01000005, 0x20003dac, 0x00009800, 0x00030003 },
+   { 0x00200005, 0x24443dad, 0x00459800, 0x00030003 },
+   { 0x0020000c, 0x24403dad, 0x00459800, 0x00020002 },
+   { 0x01600006, 0x20003dac, 0x02000446, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000012 },
+   { 0x00010040, 0x22000c20, 0x02000078, 0x00400000 },
+   { 0x00110040, 0x22000c20, 0x02000078, 0x00700000 },
+   { 0x00310040, 0x20401da5, 0x02450440, 0xfffffffe },
+   { 0x00110001, 0x20480061, 0x02000000, 0x000c000c },
+   { 0x00010040, 0x20401da5, 0x02000440, 0xfffffffe },
+   { 0x00010001, 0x204401a5, 0x02000442, 0x00000000 },
+   { 0x00010001, 0x20480061, 0x02000000, 0x0007000c },
+   { 0x01600031, 0x25600021, 0x408d0040, 0x00000200 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00010040, 0x22000c20, 0x02000078, 0x00200000 },
+   { 0x00110040, 0x22000c20, 0x02000078, 0x00400000 },
+   { 0x00200001, 0x204001a5, 0x00450440, 0x00000000 },
+   { 0x00110040, 0x20441da5, 0x02000442, 0xfffffffe },
+   { 0x00110001, 0x20480061, 0x02000000, 0x000c0007 },
+   { 0x00010001, 0x20480061, 0x02000000, 0x00070007 },
+   { 0x01600031, 0x25600021, 0x408d0040, 0x00000200 },
+   { 0x00000040, 0xb80235ad, 0x00009802, 0x0000003c },
+   { 0x0020040c, 0x24403dad, 0x00459800, 0x00030003 },
+   { 0x00200805, 0x24483dad, 0x00459800, 0x00070007 },
+   { 0x01600006, 0x20003dac, 0x0000044a, 0x00000000 },
+   { 0x00200001, 0x204001a5, 0x00450440, 0x00000000 },
+   { 0x00000009, 0x20403ca5, 0x00000040, 0x00010001 },
+   { 0x00010040, 0x22000c20, 0x00000078, 0x00200010 },
+   { 0x00110040, 0x22000c20, 0x00000078, 0x00300010 },
+   { 0x00010001, 0x20480061, 0x00000000, 0x00030009 },
+   { 0x00110001, 0x20480061, 0x00000000, 0x00040009 },
+   { 0x02600031, 0x27000021, 0x408d0040, 0x00000200 },
+   { 0x01000005, 0x20003dac, 0x00009800, 0x00030003 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x0000001a },
+   { 0x00600001, 0x26220231, 0x008d05c0, 0x00000000 },
+   { 0x00600401, 0x26120231, 0x008d05b8, 0x00000000 },
+   { 0x00600801, 0x26020231, 0x008d05b0, 0x00000000 },
+   { 0x00600401, 0x25f20231, 0x008d05a8, 0x00000000 },
+   { 0x00600801, 0x25e20231, 0x008d05a0, 0x00000000 },
+   { 0x00600401, 0x25d20231, 0x008d0598, 0x00000000 },
+   { 0x00600801, 0x25c20231, 0x008d0590, 0x00000000 },
+   { 0x00600401, 0x25b20231, 0x008d0588, 0x00000000 },
+   { 0x00600801, 0x25a20231, 0x008d0580, 0x00000000 },
+   { 0x00600401, 0x25920231, 0x008d0578, 0x00000000 },
+   { 0x00600801, 0x25820231, 0x008d0570, 0x00000000 },
+   { 0x00600001, 0x25720231, 0x008d0568, 0x00000000 },
+   { 0x00600001, 0x25620231, 0x008d0560, 0x00000000 },
+   { 0x00110001, 0x220001ec, 0x02000000, 0x05820582 },
+   { 0x00010001, 0x220001ec, 0x02000000, 0x05620562 },
+   { 0x00000001, 0x220e0128, 0x00000454, 0x00000000 },
+   { 0x01800006, 0x200035ac, 0x02000446, 0x00000444 },
+   { 0x00810001, 0xbc000229, 0x02ad8000, 0x00000000 },
+   { 0x00810001, 0xbc200229, 0x02ad8020, 0x00000000 },
+   { 0x00810001, 0xbc400229, 0x02ad8040, 0x00000000 },
+   { 0x00810001, 0xbc600229, 0x02ad8060, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000012a },
+   { 0x00600001, 0x2024018d, 0x008d0200, 0x00000000 },
+   { 0x01000041, 0x245e35ad, 0x00000446, 0x00000444 },
+   { 0x00000040, 0x22022d08, 0x00000200, 0x00100010 },
+   { 0x02000005, 0x20003db0, 0x0200045e, 0x00010001 },
+   { 0x00200040, 0x22042d08, 0x00450200, 0x00200020 },
+   { 0x00400001, 0x244c0109, 0x00690200, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000066 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000064 },
+   { 0x00000040, 0x22003d08, 0x00000200, 0xffdeffde },
+   { 0x00000440, 0x22023d08, 0x00000202, 0xffeeffee },
+   { 0x00000801, 0x220c0060, 0x00000000, 0x04a00480 },
+   { 0x01000010, 0x20003db0, 0x0000045e, 0x00040004 },
+   { 0x00a02040, 0x2400462c, 0x00ad8000, 0x00ad8005 },
+   { 0x00a02048, 0x24003e2c, 0x00ad8001, 0xfffbfffb },
+   { 0x00a02048, 0x24003e2c, 0x00ad8002, 0x00140014 },
+   { 0x00a02048, 0x24003e2c, 0x00ad8003, 0x00140014 },
+   { 0x00a02048, 0xb8003e2d, 0x00ad8004, 0xfffbfffb },
+   { 0x00a02040, 0x2400462c, 0x00ad8040, 0x00ad8045 },
+   { 0x00a02048, 0x24003e2c, 0x00ad8041, 0xfffbfffb },
+   { 0x00a02048, 0x24003e2c, 0x00ad8042, 0x00140014 },
+   { 0x00a02048, 0x24003e2c, 0x00ad8043, 0x00140014 },
+   { 0x00a02048, 0xb8403e2d, 0x00ad8044, 0xfffbfffb },
+   { 0x00a02040, 0x2400462c, 0x00ad8080, 0x00ad8085 },
+   { 0x00a02048, 0x24003e2c, 0x00ad8081, 0xfffbfffb },
+   { 0x00a02048, 0x24003e2c, 0x00ad8082, 0x00140014 },
+   { 0x00a02048, 0x24003e2c, 0x00ad8083, 0x00140014 },
+   { 0x00a02048, 0xb8803e2d, 0x00ad8084, 0xfffbfffb },
+   { 0x00600040, 0x2400462c, 0x008d80c0, 0x008d80c5 },
+   { 0x00600048, 0x24003e2c, 0x008d80c1, 0xfffbfffb },
+   { 0x00600048, 0x24003e2c, 0x008d80c2, 0x00140014 },
+   { 0x00600048, 0x24003e2c, 0x008d80c3, 0x00140014 },
+   { 0x00600048, 0xb8c03e2d, 0x008d80c4, 0xfffbfffb },
+   { 0x00000401, 0x22000060, 0x00000000, 0x04a00480 },
+   { 0x00000c01, 0x22040060, 0x00000000, 0x04a00490 },
+   { 0x00010802, 0x220c2d28, 0x00000454, 0x04800480 },
+   { 0x00a02040, 0x24003dac, 0x00b18000, 0x02000200 },
+   { 0x00800048, 0x24003dac, 0x01ed8800, 0xfffbfffb },
+   { 0x00800048, 0x24203dac, 0x01ed8820, 0xfffbfffb },
+   { 0x00a02048, 0x24003dac, 0x00b18020, 0x00140014 },
+   { 0x00800048, 0x24003dac, 0x01ed8820, 0x00140014 },
+   { 0x00800048, 0x24203dac, 0x01ed8840, 0x00140014 },
+   { 0x00a02048, 0x24003dac, 0x00b18040, 0xfffbfffb },
+   { 0x00800048, 0x24003dac, 0x01ed8840, 0x00010001 },
+   { 0x00800048, 0x24203dac, 0x01ed8860, 0x00010001 },
+   { 0x8080000c, 0xd8003d91, 0x00b10400, 0x000a000a },
+   { 0x8080100c, 0xd8203d91, 0x00b10420, 0x000a000a },
+   { 0x00a02040, 0x24003dac, 0x00b18040, 0x02000200 },
+   { 0x00800048, 0x24003dac, 0x01ed8840, 0xfffbfffb },
+   { 0x00800048, 0x24203dac, 0x01ed8860, 0xfffbfffb },
+   { 0x00a02048, 0x24003dac, 0x00b18060, 0x00140014 },
+   { 0x00800048, 0x24003dac, 0x01ed8860, 0x00140014 },
+   { 0x00800048, 0x24203dac, 0x01ed8880, 0x00140014 },
+   { 0x00a02048, 0x24003dac, 0x00b18080, 0xfffbfffb },
+   { 0x00800048, 0x24003dac, 0x01ed8880, 0x00010001 },
+   { 0x00800048, 0x24203dac, 0x01ed88a0, 0x00010001 },
+   { 0x8080000c, 0xd8403d91, 0x00b10400, 0x000a000a },
+   { 0x8080100c, 0xd8603d91, 0x00b10420, 0x000a000a },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x000000b4 },
+   { 0x01000010, 0x20003db0, 0x00000444, 0x00000000 },
+   { 0x01000010, 0x20003db0, 0x02000446, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x0000004c },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000004a },
+   { 0x00400040, 0x22003d28, 0x0069044c, 0xfffefffe },
+   { 0x03400010, 0x20003dac, 0x00000446, 0x00020002 },
+   { 0x01000010, 0x20003db0, 0x02000444, 0x00020002 },
+   { 0x00410040, 0x22002d08, 0x00690200, 0x00100010 },
+   { 0x01000010, 0x20003dac, 0x00000446, 0x00000000 },
+   { 0x00010002, 0x220c2d28, 0x02000454, 0x04800480 },
+   { 0x00800040, 0x24003e2c, 0x01ed8000, 0x00100010 },
+   { 0x00800040, 0x24203e2c, 0x01ed8020, 0x00100010 },
+   { 0x00800048, 0x24003e2c, 0x01ed8001, 0xfffbfffb },
+   { 0x00800048, 0x24203e2c, 0x01ed8021, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01ed8002, 0x00140014 },
+   { 0x00800048, 0x24203e2c, 0x01ed8022, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01ed8003, 0x00140014 },
+   { 0x00800048, 0x24203e2c, 0x01ed8023, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01ed8004, 0xfffbfffb },
+   { 0x00800048, 0x24203e2c, 0x01ed8024, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01ed8005, 0x00010001 },
+   { 0x00800048, 0x24203e2c, 0x01ed8025, 0x00010001 },
+   { 0x8080000c, 0xd8003d91, 0x00b10400, 0x00050005 },
+   { 0x8080100c, 0xd8203d91, 0x00b10420, 0x00050005 },
+   { 0x00800040, 0x24003e2c, 0x01ed8040, 0x00100010 },
+   { 0x00800040, 0x24203e2c, 0x01ed8060, 0x00100010 },
+   { 0x00800048, 0x24003e2c, 0x01ed8041, 0xfffbfffb },
+   { 0x00800048, 0x24203e2c, 0x01ed8061, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01ed8042, 0x00140014 },
+   { 0x00800048, 0x24203e2c, 0x01ed8062, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01ed8043, 0x00140014 },
+   { 0x00800048, 0x24203e2c, 0x01ed8063, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01ed8044, 0xfffbfffb },
+   { 0x00800048, 0x24203e2c, 0x01ed8064, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01ed8045, 0x00010001 },
+   { 0x00800048, 0x24203e2c, 0x01ed8065, 0x00010001 },
+   { 0x8080000c, 0xd8403d91, 0x00b10400, 0x00050005 },
+   { 0x8080100c, 0xd8603d91, 0x00b10420, 0x00050005 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000004 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x0000005c },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000062 },
+   { 0x01000010, 0x20003db0, 0x00000446, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000042 },
+   { 0x00200401, 0x22000128, 0x0066044c, 0x00000000 },
+   { 0x00200c01, 0x22040128, 0x0045044e, 0x00000000 },
+   { 0x00000801, 0x220c0128, 0x00000454, 0x00000000 },
+   { 0x03400010, 0x20003dac, 0x02000444, 0x00020002 },
+   { 0x01000010, 0x20003dac, 0x00000444, 0x00000000 },
+   { 0x00410040, 0x22002d08, 0x02690200, 0x00010001 },
+   { 0x01000010, 0x20003db0, 0x02000446, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000002 },
+   { 0x00110001, 0x220c0168, 0x02000000, 0x04800480 },
+   { 0x00a02040, 0x24003e2c, 0x00ad83e0, 0x00100010 },
+   { 0x00800048, 0x24003e2c, 0x01ed8be0, 0xfffbfffb },
+   { 0x00800048, 0x24203e2c, 0x01ed8800, 0xfffbfffb },
+   { 0x00a02048, 0x24003e2c, 0x00ad8000, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01ed8800, 0x00140014 },
+   { 0x00800048, 0x24203e2c, 0x01ed8820, 0x00140014 },
+   { 0x00a02048, 0x24003e2c, 0x00ad8020, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01ed8820, 0x00010001 },
+   { 0x00800048, 0x24203e2c, 0x01ed8840, 0x00010001 },
+   { 0x8080000c, 0xd8003d91, 0x00b10400, 0x00050005 },
+   { 0x8080100c, 0xd8203d91, 0x00b10420, 0x00050005 },
+   { 0x00a02040, 0x24003e2c, 0x00ad8020, 0x00100010 },
+   { 0x00800048, 0x24003e2c, 0x01ed8820, 0xfffbfffb },
+   { 0x00800048, 0x24203e2c, 0x01ed8840, 0xfffbfffb },
+   { 0x00a02048, 0x24003e2c, 0x00ad8040, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01ed8840, 0x00140014 },
+   { 0x00800048, 0x24203e2c, 0x01ed8860, 0x00140014 },
+   { 0x00a02048, 0x24003e2c, 0x00ad8060, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01ed8860, 0x00010001 },
+   { 0x00800048, 0x24203e2c, 0x01ed8880, 0x00010001 },
+   { 0x8080000c, 0xd8403d91, 0x00b10400, 0x00050005 },
+   { 0x8080100c, 0xd8603d91, 0x00b10420, 0x00050005 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000016 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001c },
+   { 0x00200401, 0x22000128, 0x0045044c, 0x00000000 },
+   { 0x00000801, 0x220c0128, 0x00000454, 0x00000000 },
+   { 0x01200010, 0x20003dac, 0x00000444, 0x00030003 },
+   { 0x01200010, 0x20003dac, 0x02000446, 0x00030003 },
+   { 0x00210040, 0x22002d08, 0x00450200, 0x00010001 },
+   { 0x00210040, 0x22002d08, 0x02450200, 0x00100010 },
+   { 0x00800001, 0xb8000229, 0x01ed8000, 0x00000000 },
+   { 0x00800001, 0xb8200229, 0x01ed8020, 0x00000000 },
+   { 0x00800001, 0xb8400229, 0x01ed8040, 0x00000000 },
+   { 0x00800001, 0xb8600229, 0x01ed8060, 0x00000000 },
+   { 0x80800042, 0xd8004631, 0x00d29800, 0x00d20480 },
+   { 0x80800042, 0xd8204631, 0x00d29820, 0x00d204a0 },
+   { 0x80800042, 0xd8404631, 0x00d29840, 0x00d204c0 },
+   { 0x80800042, 0xd8604631, 0x00d29860, 0x00d204e0 },
+   { 0x00600001, 0x220001ac, 0x008d0024, 0x00000000 },
+   { 0x00200040, 0x244c3dad, 0x00450448, 0xfff8fff8 },
+   { 0x00000401, 0x220001ec, 0x00000000, 0x07000700 },
+   { 0x00000c01, 0x220201e8, 0x00000000, 0x07100710 },
+   { 0x00000801, 0x220e0128, 0x00000456, 0x00000000 },
+   { 0x00000441, 0x254635ad, 0x00000448, 0x0000044a },
+   { 0x00000c41, 0x254025ad, 0x0000444c, 0x0000444e },
+   { 0x00000c41, 0x254225ad, 0x00000448, 0x0000444e },
+   { 0x00000841, 0x254435ad, 0x0000444c, 0x0000044a },
+   { 0x00200040, 0x244c3d09, 0x00450200, 0x00100010 },
+   { 0x00800041, 0x24002628, 0x00ad8000, 0x00000540 },
+   { 0x00800041, 0x24202628, 0x00ad8020, 0x00000540 },
+   { 0x00800048, 0x24002628, 0x00ad8002, 0x00000542 },
+   { 0x00800048, 0x24202628, 0x00ad8022, 0x00000542 },
+   { 0x00200001, 0x22000128, 0x0045044c, 0x00000000 },
+   { 0x00800048, 0x24002628, 0x01ed8000, 0x00000544 },
+   { 0x00800048, 0x24202628, 0x01ed8020, 0x00000544 },
+   { 0x00800048, 0xbc002629, 0x01ed8002, 0x00000546 },
+   { 0x00801048, 0xbc202629, 0x01ed8022, 0x00000546 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000128 },
+   { 0x00000001, 0x20220169, 0x00000000, 0x00040004 },
+   { 0x00200005, 0x24443dad, 0x00459800, 0x00030003 },
+   { 0x0020000c, 0x24403dad, 0x00459800, 0x00020002 },
+   { 0x01600006, 0x20003dac, 0x02000446, 0x00000000 },
+   { 0x00010040, 0x22000c20, 0x02000078, 0x00200000 },
+   { 0x00110040, 0x22000c20, 0x02000078, 0x00500000 },
+   { 0x00310040, 0x20401da5, 0x02450440, 0xfffffffe },
+   { 0x00110001, 0x20480061, 0x02000000, 0x00080008 },
+   { 0x00010040, 0x20401da5, 0x02000440, 0xfffffffe },
+   { 0x00010001, 0x204401a5, 0x02000442, 0x00000000 },
+   { 0x00010001, 0x20480061, 0x02000000, 0x00030008 },
+   { 0x01600031, 0x25600021, 0x408d0040, 0x00000200 },
+   { 0x00000040, 0xb80235ad, 0x00009802, 0x0000003c },
+   { 0x00000040, 0x22000c20, 0x00000078, 0x00100010 },
+   { 0x0020040c, 0x24403dad, 0x00459800, 0x00030003 },
+   { 0x00200805, 0x24483dad, 0x00459800, 0x00070007 },
+   { 0x00200001, 0x204001a5, 0x00450440, 0x00000000 },
+   { 0x00000009, 0x20403ca5, 0x00000040, 0x00010001 },
+   { 0x00000001, 0x20480061, 0x00000000, 0x00020005 },
+   { 0x02600031, 0x27000021, 0x408d0040, 0x00000200 },
+   { 0x00110001, 0x220001ec, 0x02000000, 0x05820582 },
+   { 0x00010001, 0x220001ec, 0x02000000, 0x05620562 },
+   { 0x00000001, 0x220e0128, 0x00000454, 0x00000000 },
+   { 0x01800006, 0x200035ac, 0x02000446, 0x00000444 },
+   { 0x00410001, 0xbc000229, 0x02698000, 0x00000000 },
+   { 0x00410001, 0xbc100229, 0x02698010, 0x00000000 },
+   { 0x00410001, 0xbc200229, 0x02698020, 0x00000000 },
+   { 0x00410001, 0xbc300229, 0x02698030, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000be },
+   { 0x00600001, 0x2024018d, 0x008d0200, 0x00000000 },
+   { 0x01000041, 0x245e35ad, 0x00000446, 0x00000444 },
+   { 0x02000005, 0x20003db0, 0x0200045e, 0x00010001 },
+   { 0x00000040, 0x22022d08, 0x00000200, 0x00100010 },
+   { 0x00200040, 0x22042d08, 0x00450200, 0x00200020 },
+   { 0x00400001, 0x244c0109, 0x00690200, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000044 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000042 },
+   { 0x00000040, 0x22003d08, 0x00000200, 0xffdeffde },
+   { 0x00000440, 0x22023d08, 0x00000202, 0xffeeffee },
+   { 0x00000801, 0x220c0060, 0x00000000, 0x04d004c0 },
+   { 0x01000010, 0x20003db0, 0x0000045e, 0x00040004 },
+   { 0x00802040, 0x2400462c, 0x00a98000, 0x00a98005 },
+   { 0x00802048, 0x24003e2c, 0x00a98001, 0xfffbfffb },
+   { 0x00802048, 0x24003e2c, 0x00a98002, 0x00140014 },
+   { 0x00802048, 0x24003e2c, 0x00a98003, 0x00140014 },
+   { 0x00802048, 0xb8003e2d, 0x00a98004, 0xfffbfffb },
+   { 0x00802040, 0x2400462c, 0x00a98040, 0x00a98045 },
+   { 0x00802048, 0x24003e2c, 0x00a98041, 0xfffbfffb },
+   { 0x00802048, 0x24003e2c, 0x00a98042, 0x00140014 },
+   { 0x00802048, 0x24003e2c, 0x00a98043, 0x00140014 },
+   { 0x00802048, 0xb8203e2d, 0x00a98044, 0xfffbfffb },
+   { 0x00400040, 0x2400462c, 0x00698080, 0x00698085 },
+   { 0x00400048, 0x24003e2c, 0x00698081, 0xfffbfffb },
+   { 0x00400048, 0x24003e2c, 0x00698082, 0x00140014 },
+   { 0x00400048, 0x24003e2c, 0x00698083, 0x00140014 },
+   { 0x00400048, 0xb8403e2d, 0x00698084, 0xfffbfffb },
+   { 0x00000401, 0x220c0060, 0x00000000, 0x04e004d0 },
+   { 0x00000c01, 0x22000060, 0x00000000, 0x04d004c8 },
+   { 0x00000c01, 0x22040060, 0x00000000, 0x04e004d8 },
+   { 0x00000801, 0x22080060, 0x00000000, 0x04f004e8 },
+   { 0x00800040, 0x24003dac, 0x00b104c0, 0x02000200 },
+   { 0x00800048, 0x24003dac, 0x00b104e0, 0xfffbfffb },
+   { 0x00800048, 0x24003dac, 0x01ed9800, 0x00140014 },
+   { 0x00010001, 0x220c0168, 0x00000000, 0x05400540 },
+   { 0x00110001, 0x220c0168, 0x00000000, 0x04c004c0 },
+   { 0x00800048, 0x24003dac, 0x01e98000, 0xfffbfffb },
+   { 0x00800048, 0x24003dac, 0x01e98020, 0x00010001 },
+   { 0x00800048, 0x24003dac, 0x01e98800, 0x00140014 },
+   { 0x8080000c, 0xd8003d91, 0x00b10400, 0x000a000a },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000060 },
+   { 0x01000010, 0x20003db0, 0x00000444, 0x00000000 },
+   { 0x01000010, 0x20003db0, 0x02000446, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000022 },
+   { 0x00400040, 0x22003d28, 0x0069044c, 0xfffefffe },
+   { 0x03400010, 0x20003dac, 0x00000446, 0x00020002 },
+   { 0x01000010, 0x20003db0, 0x02000444, 0x00020002 },
+   { 0x00410040, 0x22002d08, 0x00690200, 0x00100010 },
+   { 0x01000010, 0x20003dac, 0x00000446, 0x00000000 },
+   { 0x00010001, 0x220e0168, 0x02000000, 0x05400540 },
+   { 0x00110001, 0x220e0168, 0x02000000, 0x04c004c0 },
+   { 0x00800040, 0x24003e2c, 0x01e98000, 0x00100010 },
+   { 0x00800048, 0x24003e2c, 0x01e98001, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01e98002, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01e98003, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01e98004, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01e98005, 0x00010001 },
+   { 0x8080000c, 0xdc003d91, 0x00b10400, 0x00050005 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000004 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000036 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000036 },
+   { 0x01000010, 0x20003db0, 0x00000446, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x03600010, 0x20003dac, 0x02000444, 0x00020002 },
+   { 0x00400401, 0x22000128, 0x0069044c, 0x00000000 },
+   { 0x00400840, 0x22083d2c, 0x0069044c, 0x00100010 },
+   { 0x00610040, 0x22002d08, 0x02690200, 0x00010001 },
+   { 0x01000010, 0x20003dac, 0x00000444, 0x00000000 },
+   { 0x01000010, 0x20003db0, 0x02000446, 0x00020002 },
+   { 0x00800040, 0x24003e2c, 0x01e983e0, 0x00100010 },
+   { 0x00800048, 0x24003e2c, 0x01e98000, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01e98020, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01e993e0, 0xfffbfffb },
+   { 0x00800048, 0x24003e2c, 0x01e99000, 0x00140014 },
+   { 0x00800048, 0x24003e2c, 0x01e99020, 0x00010001 },
+   { 0x00000001, 0x220e0168, 0x00000000, 0x05400540 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000002 },
+   { 0x00110001, 0x220e0168, 0x02000000, 0x04c004c0 },
+   { 0x8080000c, 0xdc003d91, 0x00b10400, 0x00050005 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000000e },
+   { 0x00400001, 0x22000128, 0x0069044c, 0x00000000 },
+   { 0x01400010, 0x20003dac, 0x00000444, 0x00030003 },
+   { 0x01400010, 0x20003dac, 0x02000446, 0x00030003 },
+   { 0x00410040, 0x22002d08, 0x00690200, 0x00010001 },
+   { 0x00410040, 0x22002d08, 0x02690200, 0x00100010 },
+   { 0x00800001, 0x25400229, 0x01e98000, 0x00000000 },
+   { 0x80800042, 0x45404631, 0x00d20540, 0x00d204c0 },
+   { 0x00000001, 0x220c0128, 0x00000454, 0x00000000 },
+   { 0x00400001, 0xd8000231, 0x008a0540, 0x00000000 },
+   { 0x00400001, 0xd8100231, 0x008a0548, 0x00000000 },
+   { 0x00400001, 0xd8200231, 0x008a0550, 0x00000000 },
+   { 0x00400001, 0xd8300231, 0x008a0558, 0x00000000 },
+   { 0x00600001, 0x220001ac, 0x008d0024, 0x00000000 },
+   { 0x00200040, 0x244c3dad, 0x00450448, 0xfff8fff8 },
+   { 0x00000401, 0x220001ec, 0x00000000, 0x07000700 },
+   { 0x00000801, 0x220e0128, 0x00000456, 0x00000000 },
+   { 0x00000441, 0x254635ad, 0x00000448, 0x0000044a },
+   { 0x00000c41, 0x254025ad, 0x0000444c, 0x0000444e },
+   { 0x00000c41, 0x254225ad, 0x00000448, 0x0000444e },
+   { 0x00000841, 0x254435ad, 0x0000444c, 0x0000044a },
+   { 0x00600041, 0x24002628, 0x00898000, 0x00000540 },
+   { 0x00600048, 0x24002628, 0x00898002, 0x00000542 },
+   { 0x00600048, 0x24002628, 0x00898008, 0x00000544 },
+   { 0x00600048, 0x2540262d, 0x0089800a, 0x00000546 },
+   { 0x00400401, 0xbc0001a9, 0x00690540, 0x00000000 },
+   { 0x00400801, 0xbc1001a9, 0x00690548, 0x00000000 },
+   { 0x01000010, 0x20003d2c, 0x00000022, 0x00030003 },
+   { 0x01000040, 0x20223d29, 0x02000022, 0xffffffff },
+   { 0x00000040, 0x220c3d8c, 0x0000020c, 0x00080008 },
+   { 0x00110040, 0x24540c21, 0x00000454, 0x00080008 },
+   { 0x00010040, 0x24540c21, 0x00000454, 0x00180038 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0xfffffee4 },
+   { 0x01800010, 0x20003dac, 0x0200010e, 0x00010001 },
+   { 0x00000040, 0x220c3d8c, 0x0000020c, 0xffe0ffe0 },
+   { 0x00010001, 0x24540061, 0x02000000, 0x06c00640 },
+   { 0x00110001, 0x24540061, 0x02000000, 0x04800780 },
+   { 0x00000001, 0x220e0128, 0x00000456, 0x00000000 },
+   { 0x00800040, 0x24003dac, 0x00b19c00, 0x00200020 },
+   { 0x00800040, 0x24203dac, 0x00b19c20, 0x00200020 },
+   { 0x8080000c, 0xdc003d91, 0x00b10400, 0x00060006 },
+   { 0x8080000c, 0xdc203d91, 0x00b10420, 0x00060006 },
+   { 0x02000040, 0x210e3d29, 0x0200010e, 0xffffffff },
+   { 0x00000040, 0x220c3d8c, 0x0000020c, 0x00040004 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0xfffffcdc },
+   { 0x01000005, 0x25643e2d, 0x00000060, 0x00c000c0 },
+   { 0x01000010, 0x20003dac, 0x02000458, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000016 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000000c },
+   { 0x01000010, 0x20003dac, 0x00000458, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000086 },
+   { 0x00a02001, 0x46400231, 0x00d20780, 0x00000000 },
+   { 0x00a02001, 0x46800231, 0x00d207c0, 0x00000000 },
+   { 0x00a02001, 0x46c00231, 0x00d20480, 0x00000000 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000007e },
+   { 0x80a02042, 0x46404631, 0x00d20640, 0x00d20780 },
+   { 0x80a02042, 0x46804631, 0x00d20680, 0x00d207c0 },
+   { 0x80a02042, 0x46c04631, 0x00d206c0, 0x00d20480 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000076 },
+   { 0x01000010, 0x20003dac, 0x02000564, 0x00800080 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x0000000e },
+   { 0x01000010, 0x20003dac, 0x00000458, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0xffffffe4 },
+   { 0x00200401, 0x256801ed, 0x00000000, 0x00200020 },
+   { 0x00200801, 0x256c01ed, 0x00000000, 0x00060006 },
+   { 0x00400001, 0x258001ed, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x45a001ad, 0x00058c00, 0x00000000 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000028 },
+   { 0x02600005, 0x20006e28, 0x02008c0c, 0x88848421 },
+   { 0x0220000c, 0x25663dad, 0x00000458, 0x00010001 },
+   { 0x0000000c, 0x25643dad, 0x00000564, 0x00060006 },
+   { 0x00310001, 0x25c401ad, 0x00000458, 0x00000000 },
+   { 0x00210001, 0x25c00061, 0x00000000, 0x00010001 },
+   { 0x00310040, 0x25c03dad, 0x004545c4, 0x00010001 },
+   { 0x00610001, 0x25a00061, 0x02000000, 0x00000080 },
+   { 0x00710001, 0x45a002ad, 0x02ae8c00, 0x00000000 },
+   { 0x00710001, 0x45a202ad, 0x02ae8c01, 0x00000000 },
+   { 0x00800041, 0x25a035ad, 0x00b105a0, 0x000905c0 },
+   { 0x01200010, 0x20003e2c, 0x0245006e, 0x00000000 },
+   { 0x00310009, 0x244c45ad, 0x02000564, 0x0045006e },
+   { 0x00210001, 0x244c01ed, 0x02000000, 0x00000000 },
+   { 0x0031000c, 0x244c3dad, 0x0245044c, 0x00010001 },
+   { 0x00200009, 0x256835ad, 0x0045044c, 0x00000566 },
+   { 0x00210040, 0x256835ad, 0x02450568, 0x00000566 },
+   { 0x00200040, 0x256c362d, 0x0045006e, 0x00000566 },
+   { 0x00400040, 0x240035ac, 0x00ab05a2, 0x00ab05a6 },
+   { 0x00400040, 0x2400358c, 0x00690400, 0x00000566 },
+   { 0x0040000c, 0x2580358d, 0x00690400, 0x00000566 },
+   { 0x00800041, 0x2400362c, 0x00d20640, 0x000005a0 },
+   { 0x00800041, 0x2420362c, 0x00d20660, 0x000005a0 },
+   { 0x00800048, 0x2400362c, 0x00d20780, 0x000005a4 },
+   { 0x00800048, 0x2420362c, 0x00d207a0, 0x000005a4 },
+   { 0x00800040, 0x2400358c, 0x00b10400, 0x00000568 },
+   { 0x00800040, 0x2420358c, 0x00b10420, 0x00000568 },
+   { 0x0080000c, 0x24c0358d, 0x00b10400, 0x0000056c },
+   { 0x0080000c, 0x24e0358d, 0x00b10420, 0x0000056c },
+   { 0x80800040, 0x464035b1, 0x00b104c0, 0x00000580 },
+   { 0x80800040, 0x466035b1, 0x00b104e0, 0x00000580 },
+   { 0x00800041, 0x2400362c, 0x00d20680, 0x000005a0 },
+   { 0x00800041, 0x2420362c, 0x00d206a0, 0x000005a0 },
+   { 0x00800048, 0x2400362c, 0x00d207c0, 0x000005a4 },
+   { 0x00800048, 0x2420362c, 0x00d207e0, 0x000005a4 },
+   { 0x00800040, 0x2400358c, 0x00b10400, 0x00000568 },
+   { 0x00800040, 0x2420358c, 0x00b10420, 0x00000568 },
+   { 0x0080000c, 0x24c0358d, 0x00b10400, 0x0000056c },
+   { 0x0080000c, 0x24e0358d, 0x00b10420, 0x0000056c },
+   { 0x80800040, 0x468035b1, 0x00b104c0, 0x00000580 },
+   { 0x80800040, 0x46a035b1, 0x00b104e0, 0x00000580 },
+   { 0x00800041, 0x2400362c, 0x00d206c0, 0x000705a8 },
+   { 0x00800041, 0x2420362c, 0x00d206e0, 0x000705a8 },
+   { 0x00800048, 0x2400362c, 0x00d20480, 0x000705ac },
+   { 0x00800048, 0x2420362c, 0x00d204a0, 0x000705ac },
+   { 0x00800040, 0x2400358c, 0x00b10400, 0x0000056a },
+   { 0x00800040, 0x2420358c, 0x00b10420, 0x0000056a },
+   { 0x0080000c, 0x24c0358d, 0x00b10400, 0x0000056e },
+   { 0x0080000c, 0x24e0358d, 0x00b10420, 0x0000056e },
+   { 0x80800040, 0x46c035b1, 0x00b104c0, 0x00050582 },
+   { 0x80800040, 0x46e035b1, 0x00b104e0, 0x00050582 },
+   { 0x01800005, 0x20003d2c, 0x02000020, 0x00020002 },
+   { 0x80800040, 0xd00045b1, 0x00b19000, 0x00d20640 },
+   { 0x80800040, 0xd02045b1, 0x00b19020, 0x00d20660 },
+   { 0x80800040, 0xd04045b1, 0x00b19040, 0x00d20680 },
+   { 0x80800040, 0xd06045b1, 0x00b19060, 0x00d206a0 },
+   { 0x00000040, 0x22083d8c, 0x00000208, 0x00800080 },
+   { 0x80400040, 0xd40045b1, 0x00699400, 0x00ab06c0 },
+   { 0x80400040, 0xd48045b1, 0x00699480, 0x00ab06c2 },
+   { 0x80400040, 0xd42045b1, 0x00699420, 0x00ab06e0 },
+   { 0x80400040, 0xd4a045b1, 0x006994a0, 0x00ab06e2 },
+   { 0x80400040, 0xd41045b1, 0x00699410, 0x00ab06d0 },
+   { 0x80400040, 0xd49045b1, 0x00699490, 0x00ab06d2 },
+   { 0x80400040, 0xd43045b1, 0x00699430, 0x00ab06f0 },
+   { 0x80400040, 0xd4b045b1, 0x006994b0, 0x00ab06f2 },
+   { 0x00000040, 0x220a3d8c, 0x0000020a, 0x00080008 },
+   { 0x00110040, 0x220a3d8c, 0x0200020a, 0x00300030 },
+   { 0x01000010, 0x20003d2c, 0x02000020, 0x00060006 },
+   { 0x00000040, 0x20203d29, 0x00000020, 0x00020002 },
+   { 0x00000040, 0x22040c00, 0x00000204, 0x00100001 },
+   { 0x00000040, 0x220c358c, 0x0000020c, 0x0000003a },
+   { 0x00110220, 0x34001c00, 0x02001400, 0xfffffc1a },
+   { 0x00000001, 0x220001ec, 0x00000000, 0x01400140 },
+   { 0x00000001, 0x220201ec, 0x00000000, 0x01c001c0 },
+   { 0x00800401, 0x20400236, 0x01ee8000, 0x00000000 },
+   { 0x00800801, 0x20500236, 0x01ee8010, 0x00000000 },
+   { 0x00800401, 0x20600236, 0x01ee8020, 0x00000000 },
+   { 0x00800801, 0x20700236, 0x01ee8030, 0x00000000 },
+   { 0x00800401, 0x20800236, 0x01ee8040, 0x00000000 },
+   { 0x00800801, 0x20900236, 0x01ee8050, 0x00000000 },
+   { 0x00800401, 0x20a00236, 0x01ee8060, 0x00000000 },
+   { 0x00800801, 0x20b00236, 0x01ee8070, 0x00000000 },
+   { 0x00800401, 0x20c00236, 0x01ee8100, 0x00000000 },
+   { 0x00800801, 0x20d00236, 0x01ee8110, 0x00000000 },
+   { 0x00800401, 0x20e00236, 0x01ee8120, 0x00000000 },
+   { 0x00800801, 0x20f00236, 0x01ee8130, 0x00000000 },
+   { 0x00800401, 0x21000236, 0x01ee8140, 0x00000000 },
+   { 0x00800801, 0x21100236, 0x01ee8150, 0x00000000 },
+   { 0x00800401, 0x21200236, 0x01ee8160, 0x00000000 },
+   { 0x00800801, 0x21300236, 0x01ee8170, 0x00000000 },
+   { 0x00000040, 0x22000d20, 0x00000062, 0x1218a000 },
+   { 0x00200401, 0x204001a5, 0x00450064, 0x00000000 },
+   { 0x00000801, 0x20480061, 0x00000000, 0x000f000f },
+   { 0x01600031, 0x21400021, 0x508d0040, 0x00000200 },
+   { 0x00000001, 0x220001ec, 0x00000000, 0x03400340 },
+   { 0x00800401, 0x41600236, 0x00d28000, 0x00000000 },
+   { 0x00800801, 0x41610236, 0x00d28080, 0x00000000 },
+   { 0x00800401, 0x41800236, 0x00d28020, 0x00000000 },
+   { 0x00800801, 0x41810236, 0x00d280a0, 0x00000000 },
+   { 0x00800401, 0x41a00236, 0x00d28040, 0x00000000 },
+   { 0x00800801, 0x41a10236, 0x00d280c0, 0x00000000 },
+   { 0x00800401, 0x41c00236, 0x00d28060, 0x00000000 },
+   { 0x00800801, 0x41c10236, 0x00d280e0, 0x00000000 },
+   { 0x00000040, 0x22000d20, 0x00000062, 0x0a18a001 },
+   { 0x0000040c, 0x20443ca5, 0x00000044, 0x00010001 },
+   { 0x00000801, 0x20480061, 0x00000000, 0x0007000f },
+   { 0x0a600031, 0x21600021, 0x508d0040, 0x00000200 },
+   { 0x00000001, 0x21400021, 0x00000140, 0x00000000 },
+   { 0x00000001, 0x21600021, 0x00000160, 0x00000000 },
+   { 0x01600031, 0x20001c20, 0x708d0000, 0x82000010 },
+   { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x01000005, 0x20602d29, 0x02000042, 0x00070007 },
+   { 0x01000005, 0x20422d29, 0x00000042, 0xfff8fff8 },
+   { 0x00000001, 0x202001e9, 0x00000000, 0x100c100c },
+   { 0x00600001, 0x27e00021, 0x008d0000, 0x00000000 },
+   { 0x00000009, 0x27e82d21, 0x00000040, 0x00060006 },
+   { 0x00600001, 0x20a00022, 0x008d0000, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000100 },
+   { 0x00600001, 0x20200022, 0x008d07e0, 0x00000000 },
+   { 0x01800031, 0x20801d09, 0x408d0000, 0x02488400 },
+   { 0x00600001, 0x20400022, 0x008d07e0, 0x00000000 },
+   { 0x00000040, 0x20480c22, 0x000007e8, 0x00000080 },
+   { 0x02800031, 0x21001d09, 0x408d0000, 0x02488400 },
+   { 0x00600001, 0x20600022, 0x008d07e0, 0x00000000 },
+   { 0x00000040, 0x20680c22, 0x000007e8, 0x00000100 },
+   { 0x03800031, 0x21801d09, 0x408d0000, 0x02488400 },
+   { 0x00600001, 0x20800022, 0x008d07e0, 0x00000000 },
+   { 0x00000040, 0x20880c22, 0x000007e8, 0x00000180 },
+   { 0x04800031, 0x22001d09, 0x408d0000, 0x02488400 },
+   { 0x02600005, 0x20000c20, 0x02000090, 0x00002000 },
+   { 0x00000006, 0x20880c21, 0x00000088, 0x00200000 },
+   { 0x00200009, 0x20845529, 0x0000009c, 0x00450020 },
+   { 0x00200001, 0x40840231, 0x00450094, 0x00000000 },
+   { 0x00310001, 0x20840229, 0x02450094, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000000c },
+   { 0x00800041, 0x24002628, 0x00000095, 0x00000044 },
+   { 0x00800040, 0x24004508, 0x008d0400, 0x00000094 },
+   { 0x00000009, 0x20a82d02, 0x00000404, 0x00060006 },
+   { 0x00802001, 0x20c00022, 0x008d0080, 0x00000000 },
+   { 0x00802001, 0x20800061, 0x00000000, 0x00000000 },
+   { 0x05800031, 0x20001d0c, 0x508d0000, 0x06080300 },
+   { 0x02600005, 0x20000c20, 0x020000d0, 0x00002000 },
+   { 0x00000006, 0x20c80c21, 0x000000c8, 0x00200000 },
+   { 0x00200009, 0x20c45529, 0x000000dc, 0x00450020 },
+   { 0x00200001, 0x40c40231, 0x004500d4, 0x00000000 },
+   { 0x00310001, 0x20c40229, 0x024500d4, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000000c },
+   { 0x00800041, 0x24002628, 0x000000d5, 0x00000044 },
+   { 0x00800040, 0x24004508, 0x008d0400, 0x000000d4 },
+   { 0x00000009, 0x20a82d02, 0x00000404, 0x00060006 },
+   { 0x00802001, 0x20c00022, 0x008d00c0, 0x00000000 },
+   { 0x00802001, 0x20c00061, 0x00000000, 0x00000000 },
+   { 0x05800031, 0x20001d0c, 0x508d0000, 0x06080300 },
+   { 0x02600005, 0x20000c20, 0x02000110, 0x00002000 },
+   { 0x00000006, 0x21080c21, 0x00000108, 0x00200000 },
+   { 0x00200009, 0x21045529, 0x0000011c, 0x00450020 },
+   { 0x00200001, 0x41040231, 0x00450114, 0x00000000 },
+   { 0x00310001, 0x21040229, 0x02450114, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000000c },
+   { 0x00800041, 0x24002628, 0x00000115, 0x00000044 },
+   { 0x00800040, 0x24004508, 0x008d0400, 0x00000114 },
+   { 0x00000009, 0x20a82d02, 0x00000404, 0x00060006 },
+   { 0x00802001, 0x20c00022, 0x008d0100, 0x00000000 },
+   { 0x00802001, 0x21000061, 0x00000000, 0x00000000 },
+   { 0x05800031, 0x20001d0c, 0x508d0000, 0x06080300 },
+   { 0x02600005, 0x20000c20, 0x02000150, 0x00002000 },
+   { 0x00000006, 0x21480c21, 0x00000148, 0x00200000 },
+   { 0x00200009, 0x21445529, 0x0000015c, 0x00450020 },
+   { 0x00200001, 0x41440231, 0x00450154, 0x00000000 },
+   { 0x00310001, 0x21440229, 0x02450154, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000000c },
+   { 0x00800041, 0x24002628, 0x00000155, 0x00000044 },
+   { 0x00800040, 0x24004508, 0x008d0400, 0x00000154 },
+   { 0x00000009, 0x20a82d02, 0x00000404, 0x00060006 },
+   { 0x00802001, 0x20c00022, 0x008d0140, 0x00000000 },
+   { 0x00802001, 0x21400061, 0x00000000, 0x00000000 },
+   { 0x05800031, 0x20001d0c, 0x508d0000, 0x06080300 },
+   { 0x02600005, 0x20000c20, 0x02000190, 0x00002000 },
+   { 0x00000006, 0x21880c21, 0x00000188, 0x00200000 },
+   { 0x00200009, 0x21845529, 0x0000019c, 0x00450020 },
+   { 0x00200001, 0x41840231, 0x00450194, 0x00000000 },
+   { 0x00310001, 0x21840229, 0x02450194, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000000c },
+   { 0x00800041, 0x24002628, 0x00000195, 0x00000044 },
+   { 0x00800040, 0x24004508, 0x008d0400, 0x00000194 },
+   { 0x00000009, 0x20a82d02, 0x00000404, 0x00060006 },
+   { 0x00802001, 0x20c00022, 0x008d0180, 0x00000000 },
+   { 0x00802001, 0x21800061, 0x00000000, 0x00000000 },
+   { 0x05800031, 0x20001d0c, 0x508d0000, 0x06080300 },
+   { 0x02600005, 0x20000c20, 0x020001d0, 0x00002000 },
+   { 0x00000006, 0x21c80c21, 0x000001c8, 0x00200000 },
+   { 0x00200009, 0x21c45529, 0x000001dc, 0x00450020 },
+   { 0x00200001, 0x41c40231, 0x004501d4, 0x00000000 },
+   { 0x00310001, 0x21c40229, 0x024501d4, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000000c },
+   { 0x00800041, 0x24002628, 0x000001d5, 0x00000044 },
+   { 0x00800040, 0x24004508, 0x008d0400, 0x000001d4 },
+   { 0x00000009, 0x20a82d02, 0x00000404, 0x00060006 },
+   { 0x00802001, 0x20c00022, 0x008d01c0, 0x00000000 },
+   { 0x00802001, 0x21c00061, 0x00000000, 0x00000000 },
+   { 0x05800031, 0x20001d0c, 0x508d0000, 0x06080300 },
+   { 0x02600005, 0x20000c20, 0x02000210, 0x00002000 },
+   { 0x00000006, 0x22080c21, 0x00000208, 0x00200000 },
+   { 0x00200009, 0x22045529, 0x0000021c, 0x00450020 },
+   { 0x00200001, 0x42040231, 0x00450214, 0x00000000 },
+   { 0x00310001, 0x22040229, 0x02450214, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000000c },
+   { 0x00800041, 0x24002628, 0x00000215, 0x00000044 },
+   { 0x00800040, 0x24004508, 0x008d0400, 0x00000214 },
+   { 0x00000009, 0x20a82d02, 0x00000404, 0x00060006 },
+   { 0x00802001, 0x20c00022, 0x008d0200, 0x00000000 },
+   { 0x00802001, 0x22000061, 0x00000000, 0x00000000 },
+   { 0x05800031, 0x20001d0c, 0x508d0000, 0x06080300 },
+   { 0x02600005, 0x20000c20, 0x02000250, 0x00002000 },
+   { 0x00000006, 0x22480c21, 0x00000248, 0x00200000 },
+   { 0x00200009, 0x22445529, 0x0000025c, 0x00450020 },
+   { 0x00200001, 0x42440231, 0x00450254, 0x00000000 },
+   { 0x00310001, 0x22440229, 0x02450254, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000000c },
+   { 0x00800041, 0x24002628, 0x00000255, 0x00000044 },
+   { 0x00800040, 0x24004508, 0x008d0400, 0x00000254 },
+   { 0x00000009, 0x20a82d02, 0x00000404, 0x00060006 },
+   { 0x00802001, 0x20c00022, 0x008d0240, 0x00000000 },
+   { 0x00802001, 0x22400061, 0x00000000, 0x00000000 },
+   { 0x05800031, 0x20001d0c, 0x508d0000, 0x06080300 },
+   { 0x01000040, 0x20423dad, 0x00000042, 0xfff8fff8 },
+   { 0x00600001, 0x21600022, 0x008d07e0, 0x00000000 },
+   { 0x00802001, 0x21800022, 0x008d0080, 0x00000000 },
+   { 0x00802001, 0x21c00022, 0x008d00c0, 0x00000000 },
+   { 0x0b800031, 0x20001d0c, 0x508d0000, 0x0a080400 },
+   { 0x00600001, 0x20200022, 0x008d07e0, 0x00000000 },
+   { 0x00000040, 0x20280c22, 0x000007e8, 0x00000080 },
+   { 0x00802001, 0x20400022, 0x008d0100, 0x00000000 },
+   { 0x00802001, 0x20800022, 0x008d0140, 0x00000000 },
+   { 0x01800031, 0x20001d0c, 0x508d0000, 0x0a080400 },
+   { 0x00000040, 0x21680c22, 0x000007e8, 0x00000100 },
+   { 0x00802001, 0x21800022, 0x008d0180, 0x00000000 },
+   { 0x00802001, 0x21c00022, 0x008d01c0, 0x00000000 },
+   { 0x0b800031, 0x20001d0c, 0x508d0000, 0x0a080400 },
+   { 0x00000040, 0x20280c22, 0x000007e8, 0x00000180 },
+   { 0x00802001, 0x20400022, 0x008d0200, 0x00000000 },
+   { 0x00802001, 0x20800022, 0x008d0240, 0x00000000 },
+   { 0x01800031, 0x20001d0c, 0x508d0000, 0x0a080400 },
+   { 0x00000040, 0x27e80c21, 0x000007e8, 0x00000200 },
+   { 0x01000010, 0x20002da0, 0x02000060, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0xffffff00 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000040 },
+   { 0x00600001, 0x20200022, 0x008d07e0, 0x00000000 },
+   { 0x01800031, 0x20801d09, 0x408d0000, 0x02488400 },
+   { 0x00600001, 0x20400022, 0x008d07e0, 0x00000000 },
+   { 0x00000040, 0x20480c22, 0x000007e8, 0x00000080 },
+   { 0x02800031, 0x21001d09, 0x408d0000, 0x02488400 },
+   { 0x00600001, 0x20600022, 0x008d07e0, 0x00000000 },
+   { 0x00000040, 0x20680c22, 0x000007e8, 0x00000100 },
+   { 0x03800031, 0x21801d09, 0x408d0000, 0x02488400 },
+   { 0x00600001, 0x20800022, 0x008d07e0, 0x00000000 },
+   { 0x00000040, 0x20880c22, 0x000007e8, 0x00000180 },
+   { 0x04800031, 0x22001d09, 0x408d0000, 0x02488400 },
+   { 0x00000001, 0x220400e0, 0x00000000, 0x00a00080 },
+   { 0x00600001, 0x20a00022, 0x008d0000, 0x00000000 },
+   { 0x02600005, 0x20000c20, 0x02008810, 0x00002000 },
+   { 0x01000040, 0x20603dad, 0x00000060, 0xffffffff },
+   { 0x00000006, 0xa8080c21, 0x00008808, 0x00200000 },
+   { 0x00200009, 0xa8045529, 0x0000881c, 0x00450020 },
+   { 0x00200001, 0xc8040231, 0x00458814, 0x00000000 },
+   { 0x00310001, 0xa8040229, 0x02458814, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000000c },
+   { 0x00800041, 0x24002628, 0x00008815, 0x00000044 },
+   { 0x00800040, 0x24004508, 0x008d0400, 0x00008814 },
+   { 0x00000009, 0x20a82d02, 0x00000404, 0x00060006 },
+   { 0x00802001, 0x20c00022, 0x008d8800, 0x00000000 },
+   { 0x00802001, 0xa8000061, 0x00000000, 0x00000000 },
+   { 0x05800031, 0x20001d0c, 0x508d0000, 0x06080300 },
+   { 0x00600001, 0x21600022, 0x008d07e0, 0x00000000 },
+   { 0x00802001, 0x21800022, 0x008d8800, 0x00000000 },
+   { 0x0b800031, 0x20001d0c, 0x508d0000, 0x06080300 },
+   { 0x00000040, 0x27e80c21, 0x000007e8, 0x00000040 },
+   { 0x00000040, 0x22040c00, 0x00000204, 0x00400040 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0xffffffda },
+   { 0x01600031, 0x20001c20, 0x708d0000, 0x82000010 },
+   { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x01000005, 0x20602d29, 0x02000042, 0x00070007 },
+   { 0x01000005, 0x20422d29, 0x00000042, 0xfff8fff8 },
+   { 0x00600001, 0x27e00021, 0x008d0000, 0x00000000 },
+   { 0x00000009, 0x27e82d21, 0x00000040, 0x00060006 },
+   { 0x00600001, 0x20a00022, 0x008d0000, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000260 },
+   { 0x00600001, 0x20200022, 0x008d07e0, 0x00000000 },
+   { 0x01800031, 0x20801d09, 0x408d0000, 0x02488400 },
+   { 0x00600001, 0x20400022, 0x008d07e0, 0x00000000 },
+   { 0x00000040, 0x20480c22, 0x000007e8, 0x00000080 },
+   { 0x02800031, 0x21001d09, 0x408d0000, 0x02488400 },
+   { 0x00600001, 0x20600022, 0x008d07e0, 0x00000000 },
+   { 0x00000040, 0x20680c22, 0x000007e8, 0x00000100 },
+   { 0x03800031, 0x21801d09, 0x408d0000, 0x02488400 },
+   { 0x00600001, 0x20800022, 0x008d07e0, 0x00000000 },
+   { 0x00000040, 0x20880c22, 0x000007e8, 0x00000180 },
+   { 0x04800031, 0x22001d09, 0x408d0000, 0x02488400 },
+   { 0x00200001, 0x20640229, 0x00450094, 0x00000000 },
+   { 0x02600005, 0x20000c20, 0x02000090, 0x01000000 },
+   { 0x02600005, 0x20000c20, 0x00000090, 0x00004000 },
+   { 0x00600041, 0x24003e2c, 0x00000095, 0x00020002 },
+   { 0x00110001, 0x2066018d, 0x02000400, 0x00000000 },
+   { 0x00010040, 0x20663d8d, 0x02000400, 0x00010001 },
+   { 0x00110001, 0x2066022d, 0x00000095, 0x00000000 },
+   { 0x02600005, 0x20000c20, 0x00000090, 0x00002000 },
+   { 0x02600005, 0x20001d20, 0x02000066, 0x00000001 },
+   { 0x00000006, 0x20881c21, 0x00000088, 0x00200000 },
+   { 0x00200001, 0x20840129, 0x00450064, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00800041, 0x24002528, 0x00000066, 0x00000044 },
+   { 0x00800040, 0x24002508, 0x008d0400, 0x00000064 },
+   { 0x00000009, 0x20a82d02, 0x00000404, 0x00060006 },
+   { 0x00802001, 0x20c00022, 0x008d0080, 0x00000000 },
+   { 0x00802001, 0x20800061, 0x00000000, 0x00000000 },
+   { 0x05800031, 0x20001d0c, 0x508d0000, 0x06080300 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000001e },
+   { 0x02600005, 0x20000c20, 0x00000090, 0x00004000 },
+   { 0x00210002, 0x20700421, 0x02450038, 0x00450030 },
+   { 0x00000001, 0x20680129, 0x0000009c, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x02600005, 0x20001d20, 0x0000009c, 0x00000010 },
+   { 0x00210002, 0x20700421, 0x02450028, 0x00450020 },
+   { 0x01600005, 0x20001d20, 0x0200009c, 0x00000001 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000006 },
+   { 0x02610005, 0x20001c20, 0x02000090, 0x00008000 },
+   { 0x02610005, 0x20001d20, 0x0200009c, 0x00000010 },
+   { 0x00010006, 0x20681d29, 0x0200009c, 0x00000010 },
+   { 0x02800005, 0x2000452c, 0x02000068, 0x000d0070 },
+   { 0x00000009, 0x20843d09, 0x00000602, 0x000c000c },
+   { 0x00000005, 0x20862d09, 0x00000602, 0xf000f000 },
+   { 0x00200001, 0x40840231, 0x00660064, 0x00000000 },
+   { 0x00200001, 0x20640229, 0x004500d4, 0x00000000 },
+   { 0x02600005, 0x20000c20, 0x020000d0, 0x01000000 },
+   { 0x02600005, 0x20000c20, 0x000000d0, 0x00004000 },
+   { 0x00600041, 0x24003e2c, 0x000000d5, 0x00020002 },
+   { 0x00110001, 0x2066018d, 0x02000400, 0x00000000 },
+   { 0x00010040, 0x20663d8d, 0x02000400, 0x00010001 },
+   { 0x00110001, 0x2066022d, 0x000000d5, 0x00000000 },
+   { 0x02600005, 0x20000c20, 0x000000d0, 0x00002000 },
+   { 0x02600005, 0x20001d20, 0x02000066, 0x00000001 },
+   { 0x00000006, 0x20c81c21, 0x000000c8, 0x00200000 },
+   { 0x00200001, 0x20c40129, 0x00450064, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00800041, 0x24002528, 0x00000066, 0x00000044 },
+   { 0x00800040, 0x24002508, 0x008d0400, 0x00000064 },
+   { 0x00000009, 0x20a82d02, 0x00000404, 0x00060006 },
+   { 0x00802001, 0x20c00022, 0x008d00c0, 0x00000000 },
+   { 0x00802001, 0x20c00061, 0x00000000, 0x00000000 },
+   { 0x05800031, 0x20001d0c, 0x508d0000, 0x06080300 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000001e },
+   { 0x02600005, 0x20000c20, 0x000000d0, 0x00004000 },
+   { 0x00210002, 0x20700421, 0x02450038, 0x00450030 },
+   { 0x00000001, 0x20680129, 0x000000dc, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x02600005, 0x20001d20, 0x000000dc, 0x00000010 },
+   { 0x00210002, 0x20700421, 0x02450028, 0x00450020 },
+   { 0x01600005, 0x20001d20, 0x020000dc, 0x00000001 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000006 },
+   { 0x02610005, 0x20001c20, 0x020000d0, 0x00008000 },
+   { 0x02610005, 0x20001d20, 0x020000dc, 0x00000010 },
+   { 0x00010006, 0x20681d29, 0x020000dc, 0x00000010 },
+   { 0x02800005, 0x2000452c, 0x02000068, 0x000d0070 },
+   { 0x00000009, 0x20c43d09, 0x00000602, 0x000c000c },
+   { 0x00000005, 0x20c62d09, 0x00000602, 0xf000f000 },
+   { 0x00200001, 0x40c40231, 0x00660064, 0x00000000 },
+   { 0x00200001, 0x20640229, 0x00450114, 0x00000000 },
+   { 0x02600005, 0x20000c20, 0x02000110, 0x01000000 },
+   { 0x02600005, 0x20000c20, 0x00000110, 0x00004000 },
+   { 0x00600041, 0x24003e2c, 0x00000115, 0x00020002 },
+   { 0x00110001, 0x2066018d, 0x02000400, 0x00000000 },
+   { 0x00010040, 0x20663d8d, 0x02000400, 0x00010001 },
+   { 0x00110001, 0x2066022d, 0x00000115, 0x00000000 },
+   { 0x02600005, 0x20000c20, 0x00000110, 0x00002000 },
+   { 0x02600005, 0x20001d20, 0x02000066, 0x00000001 },
+   { 0x00000006, 0x21081c21, 0x00000108, 0x00200000 },
+   { 0x00200001, 0x21040129, 0x00450064, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00800041, 0x24002528, 0x00000066, 0x00000044 },
+   { 0x00800040, 0x24002508, 0x008d0400, 0x00000064 },
+   { 0x00000009, 0x20a82d02, 0x00000404, 0x00060006 },
+   { 0x00802001, 0x20c00022, 0x008d0100, 0x00000000 },
+   { 0x00802001, 0x21000061, 0x00000000, 0x00000000 },
+   { 0x05800031, 0x20001d0c, 0x508d0000, 0x06080300 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000001e },
+   { 0x02600005, 0x20000c20, 0x00000110, 0x00004000 },
+   { 0x00210002, 0x20700421, 0x02450038, 0x00450030 },
+   { 0x00000001, 0x20680129, 0x0000011c, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x02600005, 0x20001d20, 0x0000011c, 0x00000010 },
+   { 0x00210002, 0x20700421, 0x02450028, 0x00450020 },
+   { 0x01600005, 0x20001d20, 0x0200011c, 0x00000001 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000006 },
+   { 0x02610005, 0x20001c20, 0x02000110, 0x00008000 },
+   { 0x02610005, 0x20001d20, 0x0200011c, 0x00000010 },
+   { 0x00010006, 0x20681d29, 0x0200011c, 0x00000010 },
+   { 0x02800005, 0x2000452c, 0x02000068, 0x000d0070 },
+   { 0x00000009, 0x21043d09, 0x00000602, 0x000c000c },
+   { 0x00000005, 0x21062d09, 0x00000602, 0xf000f000 },
+   { 0x00200001, 0x41040231, 0x00660064, 0x00000000 },
+   { 0x00200001, 0x20640229, 0x00450154, 0x00000000 },
+   { 0x02600005, 0x20000c20, 0x02000150, 0x01000000 },
+   { 0x02600005, 0x20000c20, 0x00000150, 0x00004000 },
+   { 0x00600041, 0x24003e2c, 0x00000155, 0x00020002 },
+   { 0x00110001, 0x2066018d, 0x02000400, 0x00000000 },
+   { 0x00010040, 0x20663d8d, 0x02000400, 0x00010001 },
+   { 0x00110001, 0x2066022d, 0x00000155, 0x00000000 },
+   { 0x02600005, 0x20000c20, 0x00000150, 0x00002000 },
+   { 0x02600005, 0x20001d20, 0x02000066, 0x00000001 },
+   { 0x00000006, 0x21481c21, 0x00000148, 0x00200000 },
+   { 0x00200001, 0x21440129, 0x00450064, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00800041, 0x24002528, 0x00000066, 0x00000044 },
+   { 0x00800040, 0x24002508, 0x008d0400, 0x00000064 },
+   { 0x00000009, 0x20a82d02, 0x00000404, 0x00060006 },
+   { 0x00802001, 0x20c00022, 0x008d0140, 0x00000000 },
+   { 0x00802001, 0x21400061, 0x00000000, 0x00000000 },
+   { 0x05800031, 0x20001d0c, 0x508d0000, 0x06080300 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000001e },
+   { 0x02600005, 0x20000c20, 0x00000150, 0x00004000 },
+   { 0x00210002, 0x20700421, 0x02450038, 0x00450030 },
+   { 0x00000001, 0x20680129, 0x0000015c, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x02600005, 0x20001d20, 0x0000015c, 0x00000010 },
+   { 0x00210002, 0x20700421, 0x02450028, 0x00450020 },
+   { 0x01600005, 0x20001d20, 0x0200015c, 0x00000001 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000006 },
+   { 0x02610005, 0x20001c20, 0x02000150, 0x00008000 },
+   { 0x02610005, 0x20001d20, 0x0200015c, 0x00000010 },
+   { 0x00010006, 0x20681d29, 0x0200015c, 0x00000010 },
+   { 0x02800005, 0x2000452c, 0x02000068, 0x000d0070 },
+   { 0x00000009, 0x21443d09, 0x00000602, 0x000c000c },
+   { 0x00000005, 0x21462d09, 0x00000602, 0xf000f000 },
+   { 0x00200001, 0x41440231, 0x00660064, 0x00000000 },
+   { 0x00200001, 0x20640229, 0x00450194, 0x00000000 },
+   { 0x02600005, 0x20000c20, 0x02000190, 0x01000000 },
+   { 0x02600005, 0x20000c20, 0x00000190, 0x00004000 },
+   { 0x00600041, 0x24003e2c, 0x00000195, 0x00020002 },
+   { 0x00110001, 0x2066018d, 0x02000400, 0x00000000 },
+   { 0x00010040, 0x20663d8d, 0x02000400, 0x00010001 },
+   { 0x00110001, 0x2066022d, 0x00000195, 0x00000000 },
+   { 0x02600005, 0x20000c20, 0x00000190, 0x00002000 },
+   { 0x02600005, 0x20001d20, 0x02000066, 0x00000001 },
+   { 0x00000006, 0x21881c21, 0x00000188, 0x00200000 },
+   { 0x00200001, 0x21840129, 0x00450064, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00800041, 0x24002528, 0x00000066, 0x00000044 },
+   { 0x00800040, 0x24002508, 0x008d0400, 0x00000064 },
+   { 0x00000009, 0x20a82d02, 0x00000404, 0x00060006 },
+   { 0x00802001, 0x20c00022, 0x008d0180, 0x00000000 },
+   { 0x00802001, 0x21800061, 0x00000000, 0x00000000 },
+   { 0x05800031, 0x20001d0c, 0x508d0000, 0x06080300 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000001e },
+   { 0x02600005, 0x20000c20, 0x00000190, 0x00004000 },
+   { 0x00210002, 0x20700421, 0x02450038, 0x00450030 },
+   { 0x00000001, 0x20680129, 0x0000019c, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x02600005, 0x20001d20, 0x0000019c, 0x00000010 },
+   { 0x00210002, 0x20700421, 0x02450028, 0x00450020 },
+   { 0x01600005, 0x20001d20, 0x0200019c, 0x00000001 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000006 },
+   { 0x02610005, 0x20001c20, 0x02000190, 0x00008000 },
+   { 0x02610005, 0x20001d20, 0x0200019c, 0x00000010 },
+   { 0x00010006, 0x20681d29, 0x0200019c, 0x00000010 },
+   { 0x02800005, 0x2000452c, 0x02000068, 0x000d0070 },
+   { 0x00000009, 0x21843d09, 0x00000602, 0x000c000c },
+   { 0x00000005, 0x21862d09, 0x00000602, 0xf000f000 },
+   { 0x00200001, 0x41840231, 0x00660064, 0x00000000 },
+   { 0x00200001, 0x20640229, 0x004501d4, 0x00000000 },
+   { 0x02600005, 0x20000c20, 0x020001d0, 0x01000000 },
+   { 0x02600005, 0x20000c20, 0x000001d0, 0x00004000 },
+   { 0x00600041, 0x24003e2c, 0x000001d5, 0x00020002 },
+   { 0x00110001, 0x2066018d, 0x02000400, 0x00000000 },
+   { 0x00010040, 0x20663d8d, 0x02000400, 0x00010001 },
+   { 0x00110001, 0x2066022d, 0x000001d5, 0x00000000 },
+   { 0x02600005, 0x20000c20, 0x000001d0, 0x00002000 },
+   { 0x02600005, 0x20001d20, 0x02000066, 0x00000001 },
+   { 0x00000006, 0x21c81c21, 0x000001c8, 0x00200000 },
+   { 0x00200001, 0x21c40129, 0x00450064, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00800041, 0x24002528, 0x00000066, 0x00000044 },
+   { 0x00800040, 0x24002508, 0x008d0400, 0x00000064 },
+   { 0x00000009, 0x20a82d02, 0x00000404, 0x00060006 },
+   { 0x00802001, 0x20c00022, 0x008d01c0, 0x00000000 },
+   { 0x00802001, 0x21c00061, 0x00000000, 0x00000000 },
+   { 0x05800031, 0x20001d0c, 0x508d0000, 0x06080300 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000001e },
+   { 0x02600005, 0x20000c20, 0x000001d0, 0x00004000 },
+   { 0x00210002, 0x20700421, 0x02450038, 0x00450030 },
+   { 0x00000001, 0x20680129, 0x000001dc, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x02600005, 0x20001d20, 0x000001dc, 0x00000010 },
+   { 0x00210002, 0x20700421, 0x02450028, 0x00450020 },
+   { 0x01600005, 0x20001d20, 0x020001dc, 0x00000001 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000006 },
+   { 0x02610005, 0x20001c20, 0x020001d0, 0x00008000 },
+   { 0x02610005, 0x20001d20, 0x020001dc, 0x00000010 },
+   { 0x00010006, 0x20681d29, 0x020001dc, 0x00000010 },
+   { 0x02800005, 0x2000452c, 0x02000068, 0x000d0070 },
+   { 0x00000009, 0x21c43d09, 0x00000602, 0x000c000c },
+   { 0x00000005, 0x21c62d09, 0x00000602, 0xf000f000 },
+   { 0x00200001, 0x41c40231, 0x00660064, 0x00000000 },
+   { 0x00200001, 0x20640229, 0x00450214, 0x00000000 },
+   { 0x02600005, 0x20000c20, 0x02000210, 0x01000000 },
+   { 0x02600005, 0x20000c20, 0x00000210, 0x00004000 },
+   { 0x00600041, 0x24003e2c, 0x00000215, 0x00020002 },
+   { 0x00110001, 0x2066018d, 0x02000400, 0x00000000 },
+   { 0x00010040, 0x20663d8d, 0x02000400, 0x00010001 },
+   { 0x00110001, 0x2066022d, 0x00000215, 0x00000000 },
+   { 0x02600005, 0x20000c20, 0x00000210, 0x00002000 },
+   { 0x02600005, 0x20001d20, 0x02000066, 0x00000001 },
+   { 0x00000006, 0x22081c21, 0x00000208, 0x00200000 },
+   { 0x00200001, 0x22040129, 0x00450064, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00800041, 0x24002528, 0x00000066, 0x00000044 },
+   { 0x00800040, 0x24002508, 0x008d0400, 0x00000064 },
+   { 0x00000009, 0x20a82d02, 0x00000404, 0x00060006 },
+   { 0x00802001, 0x20c00022, 0x008d0200, 0x00000000 },
+   { 0x00802001, 0x22000061, 0x00000000, 0x00000000 },
+   { 0x05800031, 0x20001d0c, 0x508d0000, 0x06080300 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000001e },
+   { 0x02600005, 0x20000c20, 0x00000210, 0x00004000 },
+   { 0x00210002, 0x20700421, 0x02450038, 0x00450030 },
+   { 0x00000001, 0x20680129, 0x0000021c, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x02600005, 0x20001d20, 0x0000021c, 0x00000010 },
+   { 0x00210002, 0x20700421, 0x02450028, 0x00450020 },
+   { 0x01600005, 0x20001d20, 0x0200021c, 0x00000001 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000006 },
+   { 0x02610005, 0x20001c20, 0x02000210, 0x00008000 },
+   { 0x02610005, 0x20001d20, 0x0200021c, 0x00000010 },
+   { 0x00010006, 0x20681d29, 0x0200021c, 0x00000010 },
+   { 0x02800005, 0x2000452c, 0x02000068, 0x000d0070 },
+   { 0x00000009, 0x22043d09, 0x00000602, 0x000c000c },
+   { 0x00000005, 0x22062d09, 0x00000602, 0xf000f000 },
+   { 0x00200001, 0x42040231, 0x00660064, 0x00000000 },
+   { 0x00200001, 0x20640229, 0x00450254, 0x00000000 },
+   { 0x02600005, 0x20000c20, 0x02000250, 0x01000000 },
+   { 0x02600005, 0x20000c20, 0x00000250, 0x00004000 },
+   { 0x00600041, 0x24003e2c, 0x00000255, 0x00020002 },
+   { 0x00110001, 0x2066018d, 0x02000400, 0x00000000 },
+   { 0x00010040, 0x20663d8d, 0x02000400, 0x00010001 },
+   { 0x00110001, 0x2066022d, 0x00000255, 0x00000000 },
+   { 0x02600005, 0x20000c20, 0x00000250, 0x00002000 },
+   { 0x02600005, 0x20001d20, 0x02000066, 0x00000001 },
+   { 0x00000006, 0x22481c21, 0x00000248, 0x00200000 },
+   { 0x00200001, 0x22440129, 0x00450064, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00800041, 0x24002528, 0x00000066, 0x00000044 },
+   { 0x00800040, 0x24002508, 0x008d0400, 0x00000064 },
+   { 0x00000009, 0x20a82d02, 0x00000404, 0x00060006 },
+   { 0x00802001, 0x20c00022, 0x008d0240, 0x00000000 },
+   { 0x00802001, 0x22400061, 0x00000000, 0x00000000 },
+   { 0x05800031, 0x20001d0c, 0x508d0000, 0x06080300 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000001e },
+   { 0x02600005, 0x20000c20, 0x00000250, 0x00004000 },
+   { 0x00210002, 0x20700421, 0x02450038, 0x00450030 },
+   { 0x00000001, 0x20680129, 0x0000025c, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x02600005, 0x20001d20, 0x0000025c, 0x00000010 },
+   { 0x00210002, 0x20700421, 0x02450028, 0x00450020 },
+   { 0x01600005, 0x20001d20, 0x0200025c, 0x00000001 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000006 },
+   { 0x02610005, 0x20001c20, 0x02000250, 0x00008000 },
+   { 0x02610005, 0x20001d20, 0x0200025c, 0x00000010 },
+   { 0x00010006, 0x20681d29, 0x0200025c, 0x00000010 },
+   { 0x02800005, 0x2000452c, 0x02000068, 0x000d0070 },
+   { 0x00000009, 0x22443d09, 0x00000602, 0x000c000c },
+   { 0x00000005, 0x22462d09, 0x00000602, 0xf000f000 },
+   { 0x00200001, 0x42440231, 0x00660064, 0x00000000 },
+   { 0x01000040, 0x20423dad, 0x00000042, 0xfff8fff8 },
+   { 0x00600001, 0x21600022, 0x008d07e0, 0x00000000 },
+   { 0x00802001, 0x21800022, 0x008d0080, 0x00000000 },
+   { 0x00802001, 0x21c00022, 0x008d00c0, 0x00000000 },
+   { 0x0b800031, 0x20001d0c, 0x508d0000, 0x0a080400 },
+   { 0x00600001, 0x20200022, 0x008d07e0, 0x00000000 },
+   { 0x00000040, 0x20280c22, 0x000007e8, 0x00000080 },
+   { 0x00802001, 0x20400022, 0x008d0100, 0x00000000 },
+   { 0x00802001, 0x20800022, 0x008d0140, 0x00000000 },
+   { 0x01800031, 0x20001d0c, 0x508d0000, 0x0a080400 },
+   { 0x00000040, 0x21680c22, 0x000007e8, 0x00000100 },
+   { 0x00802001, 0x21800022, 0x008d0180, 0x00000000 },
+   { 0x00802001, 0x21c00022, 0x008d01c0, 0x00000000 },
+   { 0x0b800031, 0x20001d0c, 0x508d0000, 0x0a080400 },
+   { 0x00000040, 0x20280c22, 0x000007e8, 0x00000180 },
+   { 0x00802001, 0x20400022, 0x008d0200, 0x00000000 },
+   { 0x00802001, 0x20800022, 0x008d0240, 0x00000000 },
+   { 0x01800031, 0x20001d0c, 0x508d0000, 0x0a080400 },
+   { 0x00000040, 0x27e80c21, 0x000007e8, 0x00000200 },
+   { 0x01000010, 0x20002da0, 0x02000060, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0xfffffda0 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000006e },
+   { 0x00600001, 0x20200022, 0x008d07e0, 0x00000000 },
+   { 0x01800031, 0x20801d09, 0x408d0000, 0x02488400 },
+   { 0x00600001, 0x20400022, 0x008d07e0, 0x00000000 },
+   { 0x00000040, 0x20480c22, 0x000007e8, 0x00000080 },
+   { 0x02800031, 0x21001d09, 0x408d0000, 0x02488400 },
+   { 0x00600001, 0x20600022, 0x008d07e0, 0x00000000 },
+   { 0x00000040, 0x20680c22, 0x000007e8, 0x00000100 },
+   { 0x03800031, 0x21801d09, 0x408d0000, 0x02488400 },
+   { 0x00600001, 0x20800022, 0x008d07e0, 0x00000000 },
+   { 0x00000040, 0x20880c22, 0x000007e8, 0x00000180 },
+   { 0x04800031, 0x22001d09, 0x408d0000, 0x02488400 },
+   { 0x00000001, 0x220400e0, 0x00000000, 0x00a00080 },
+   { 0x00600001, 0x20a00022, 0x008d0000, 0x00000000 },
+   { 0x00200001, 0x20640229, 0x00458814, 0x00000000 },
+   { 0x02600005, 0x20000c20, 0x02008810, 0x01000000 },
+   { 0x02600005, 0x20000c20, 0x00008810, 0x00004000 },
+   { 0x00600041, 0x24003e2c, 0x00008815, 0x00020002 },
+   { 0x00110001, 0x2066018d, 0x02000400, 0x00000000 },
+   { 0x00010040, 0x20663d8d, 0x02000400, 0x00010001 },
+   { 0x00110001, 0x2066022d, 0x00008815, 0x00000000 },
+   { 0x02600005, 0x20000c20, 0x00008810, 0x00002000 },
+   { 0x01000040, 0x20603dad, 0x02000060, 0xffffffff },
+   { 0x00000006, 0xa8080c21, 0x00008808, 0x00200000 },
+   { 0x00200001, 0xa8040129, 0x00450064, 0x00000000 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00800041, 0x24002528, 0x00000066, 0x00000044 },
+   { 0x00800040, 0x24002508, 0x008d0400, 0x00000064 },
+   { 0x00000009, 0x20a82d02, 0x00000404, 0x00060006 },
+   { 0x00802001, 0x20c00022, 0x008d8800, 0x00000000 },
+   { 0x00802001, 0xa8000061, 0x00000000, 0x00000000 },
+   { 0x05800031, 0x20001d0c, 0x508d0000, 0x06080300 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000022 },
+   { 0x02600005, 0x20000d20, 0x02000066, 0x00000001 },
+   { 0x02600005, 0x20000c20, 0x00008810, 0x00004000 },
+   { 0x00000001, 0x20680129, 0x0000881c, 0x00000000 },
+   { 0x00210002, 0x20700421, 0x02450038, 0x00450030 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x02600005, 0x20001d20, 0x0000881c, 0x00000010 },
+   { 0x00210002, 0x20700421, 0x02450028, 0x00450020 },
+   { 0x01600005, 0x20001d20, 0x0200881c, 0x00000001 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000006 },
+   { 0x02610005, 0x20001c20, 0x02008810, 0x00008000 },
+   { 0x02610005, 0x20001d20, 0x0200881c, 0x00000010 },
+   { 0x00010006, 0x20681d29, 0x0200881c, 0x00000010 },
+   { 0x02800005, 0x2000452c, 0x00000068, 0x000d0070 },
+   { 0x01000040, 0x20603dad, 0x02000060, 0x00000000 },
+   { 0x00000009, 0xa8043d09, 0x00000600, 0x000c000c },
+   { 0x00000005, 0xa8062d09, 0x00000600, 0xf000f000 },
+   { 0x00200001, 0xc8040231, 0x00660064, 0x00000000 },
+   { 0x00600001, 0x21600022, 0x008d07e0, 0x00000000 },
+   { 0x00802001, 0x21800022, 0x008d8800, 0x00000000 },
+   { 0x0b800031, 0x20001d0c, 0x508d0000, 0x06080300 },
+   { 0x00000040, 0x27e80c21, 0x000007e8, 0x00000040 },
+   { 0x00000040, 0x22040c00, 0x00000204, 0x00400040 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0xffffffac },
+   { 0x01600031, 0x20001c20, 0x708d0000, 0x82000010 },
+   { 0x01600031, 0x20001c20, 0x708d0000, 0x82000010 },
+   { 0x01600031, 0x20001c20, 0x708d0000, 0x82000010 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
diff --git a/i965_drv_video/shaders/h264/mc/chromaMVAdjust.asm b/i965_drv_video/shaders/h264/mc/chromaMVAdjust.asm
new file mode 100644 (file)
index 0000000..063f554
--- /dev/null
@@ -0,0 +1,27 @@
+/*\r
+ * Adjust chrom MV\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Kernel name: ChromaMVAdjust.asm\r
+//\r
+//\r
+\r
+\r
+//#if !defined(__ChromaMVAdjust__)             // Make sure this is only included once\r
+//#define __ChromaMVAdjust__\r
+\r
+\r
+       // Chroma MV adjustment\r
+       add (1)         acc0:w                          gPARITY:w                                               gREFPARITY:w\r
+       cmp.e.f0.0 (1) null:w                   acc0:w                                                  0x1:w\r
+       cmp.e.f0.1 (1) null:w                   acc0:w                                                  0x100:w\r
+       mov (1)         gCHRMVADJ:w                     0:w\r
+       (f0.0) mov (1)  gCHRMVADJ:w             2:w     \r
+       (f0.1) mov (1)  gCHRMVADJ:w             -2:w\r
+        \r
+//#endif       // !defined(__ChromaMVAdjust__)\r
diff --git a/i965_drv_video/shaders/h264/mc/export.inc b/i965_drv_video/shaders/h264/mc/export.inc
new file mode 100644 (file)
index 0000000..1113841
--- /dev/null
@@ -0,0 +1,209 @@
+#define INTRA_16x16_IP 0
+#define INTRA_16x16_VERTICAL_IP 112
+#define INTRA_16x16_HORIZONTAL_IP 256
+#define INTRA_16x16_DC_IP 416
+#define INTRA_16x16_PLANE_IP 784
+#define End_intra_Pred_16x16_Y_IP 1328
+#define End_add_Error_16x16_Y_IP 1632
+#define load_Intra_Ref_Y_IP 1760
+#define decode_Chroma_Intra_IP 1904
+#define INTRA_CHROMA_DC_IP 2080
+#define INTRA_CHROMA_HORIZONTAL_IP 2560
+#define INTRA_CHROMA_VERTICAL_IP 2656
+#define INTRA_Chroma_PLANE_IP 2736
+#define End_of_intra_Pred_Chroma_IP 3136
+#define save_16x16_Y_IP 3488
+#define INTRA_8x8_IP 3712
+#define INTRA_8x8_BLK2_IP 4544
+#define intra_Pred_8x8_Y_IP 5120
+#define INTRA_8X8_VERTICAL_IP 5376
+#define INTRA_8X8_HORIZONTAL_IP 5456
+#define INTRA_8X8_DC_IP 5536
+#define INTRA_8X8_DIAG_DOWN_LEFT_IP 5792
+#define INTRA_8X8_DIAG_DOWN_RIGHT_IP 5952
+#define INTRA_8X8_VERT_RIGHT_IP 6176
+#define INTRA_8X8_HOR_DOWN_IP 6464
+#define INTRA_8X8_VERT_LEFT_IP 6736
+#define INTRA_8X8_HOR_UP_IP 6896
+#define save_8x8_Y_IP 7088
+#define INTRA_4x4_IP 7424
+#define intra_Pred_4x4_Y_4_IP 8496
+#define ADD_ERROR_SB0_IP 8592
+#define ADD_ERROR_SB1_IP 8704
+#define ADD_ERROR_SB2_IP 8864
+#define ADD_ERROR_SB3_IP 8992
+#define intra_Pred_4x4_Y_IP 9040
+#define INTRA_4X4_VERTICAL_IP 9040
+#define INTRA_4X4_HORIZONTAL_IP 9072
+#define INTRA_4X4_DC_IP 9104
+#define INTRA_4X4_DIAG_DOWN_LEFT_IP 9280
+#define INTRA_4X4_DIAG_DOWN_RIGHT_IP 9392
+#define INTRA_4X4_VERT_RIGHT_IP 9536
+#define INTRA_4X4_HOR_DOWN_IP 9744
+#define INTRA_4X4_VERT_LEFT_IP 9968
+#define INTRA_4X4_HOR_UP_IP 10080
+#define save_4x4_Y_IP 10208
+#define INTRA_PCM_IP 10560
+#define FRAME_MB_IP 11072
+#define INIT_MBPARA_FRM_IP 11120
+#define NOT_8x8_MODE_FRM_IP 11408
+#define CONVERT_MVS_FRM_IP 11488
+#define INIT_ADDRESS_REGS_FRM_IP 11568
+#define LOOP_SUBMB_FRM_IP 11632
+#define LOOP_DIR_FRM_IP 11680
+#define LOADREF_MVXZERO_FRM_IP 12080
+#define EXIT_LOADREF_Y_16x13_FRM_IP 12192
+#define Interpolate_Y_8x8_Func_FRM_IP 12352
+#define Interpolate_Y_8x8_Func2_FRM_IP 12592
+#define Interpolate_Y_H_8x8_FRM_IP 13664
+#define Interpolate_Y_V_8x8_FRM_IP 14320
+#define VFILTER_8x8_FRM_IP 14496
+#define Interpolate_Y_I_8x8_FRM_IP 14880
+#define Average_8x8_FRM_IP 15040
+#define Return_Interpolate_Y_8x8_FRM_IP 15104
+#define Exit_Interpolate_Y_8x8_FRM_IP 15120
+#define Interpolate_C_4x4_Func_FRM_IP 15120
+#define PROCESS4x4_FRM_IP 15424
+#define LOOP_SUBMBPT_FRM_IP 15440
+#define Interpolate_Y_H_4x4_FRM_IP 16528
+#define Interpolate_Y_V_4x4_FRM_IP 16864
+#define VFILTER_4x4_FRM_IP 17136
+#define Interpolate_Y_I_4x4_FRM_IP 17184
+#define Average_4x4_FRM_IP 17280
+#define Return_Interpolate_Y_4x4_FRM_IP 17296
+#define Exit_Interpolate_Y_4x4_FRM_IP 17392
+#define ROUND_SHIFT_C_FRM_IP 17776
+#define LOOP_DIR_CONTINUE_FRM_IP 17840
+#define Weighted_Prediction_FRM_IP 17888
+#define DefaultWeightedPred_UniPred_FRM_IP 17952
+#define DefaultWeightedPred_BiPred_FRM_IP 18048
+#define WeightedPred_FRM_IP 18112
+#define WeightedPred_Explicit_FRM_IP 18256
+#define WeightedPred_LOOP_FRM_IP 18576
+#define Return_WeightedPred_FRM_IP 19056
+#define EXIT_LOOP_FRM_IP 19392
+#define FIELD_MB_IP 19968
+#define INIT_MBPARA_FLD_IP 20016
+#define NOT_8x8_MODE_FLD_IP 20304
+#define CONVERT_MVS_FLD_IP 20384
+#define INIT_ADDRESS_REGS_FLD_IP 20464
+#define LOOP_SUBMB_FLD_IP 20544
+#define LOOP_DIR_FLD_IP 20592
+#define LOADREF_MVXZERO_FLD_IP 21152
+#define EXIT_LOADREF_Y_16x13_FLD_IP 21264
+#define Interpolate_Y_8x8_Func_FLD_IP 21440
+#define Interpolate_Y_8x8_Func2_FLD_IP 21680
+#define Interpolate_Y_H_8x8_FLD_IP 22752
+#define Interpolate_Y_V_8x8_FLD_IP 23408
+#define VFILTER_8x8_FLD_IP 23584
+#define Interpolate_Y_I_8x8_FLD_IP 23968
+#define Average_8x8_FLD_IP 24128
+#define Return_Interpolate_Y_8x8_FLD_IP 24192
+#define Exit_Interpolate_Y_8x8_FLD_IP 24208
+#define Interpolate_C_4x4_Func_FLD_IP 24208
+#define PROCESS4x4_FLD_IP 24512
+#define LOOP_SUBMBPT_FLD_IP 24528
+#define Interpolate_Y_H_4x4_FLD_IP 25632
+#define Interpolate_Y_V_4x4_FLD_IP 25968
+#define VFILTER_4x4_FLD_IP 26240
+#define Interpolate_Y_I_4x4_FLD_IP 26288
+#define Average_4x4_FLD_IP 26384
+#define Return_Interpolate_Y_4x4_FLD_IP 26400
+#define Exit_Interpolate_Y_4x4_FLD_IP 26496
+#define ROUND_SHIFT_C_FLD_IP 26880
+#define LOOP_DIR_CONTINUE_FLD_IP 26944
+#define Weighted_Prediction_FLD_IP 26992
+#define DefaultWeightedPred_UniPred_FLD_IP 27056
+#define DefaultWeightedPred_BiPred_FLD_IP 27152
+#define WeightedPred_FLD_IP 27216
+#define WeightedPred_Explicit_FLD_IP 27360
+#define WeightedPred_LOOP_FLD_IP 27680
+#define Return_WeightedPred_FLD_IP 28160
+#define EXIT_LOOP_FLD_IP 28496
+#define MBAFF_MB_IP 29120
+#define INIT_MBPARA_MBF_IP 29168
+#define NOT_8x8_MODE_MBF_IP 29456
+#define CONVERT_MVS_MBF_IP 29536
+#define INIT_ADDRESS_REGS_MBF_IP 29616
+#define LOOP_SUBMB_MBF_IP 29728
+#define LOOP_DIR_MBF_IP 29776
+#define LOADREF_MVXZERO_MBF_IP 30368
+#define EXIT_LOADREF_Y_16x13_MBF_IP 30480
+#define Interpolate_Y_8x8_Func_MBF_IP 30656
+#define Interpolate_Y_8x8_Func2_MBF_IP 30896
+#define Interpolate_Y_H_8x8_MBF_IP 31968
+#define Interpolate_Y_V_8x8_MBF_IP 32624
+#define VFILTER_8x8_MBF_IP 32800
+#define Interpolate_Y_I_8x8_MBF_IP 33184
+#define Average_8x8_MBF_IP 33344
+#define Return_Interpolate_Y_8x8_MBF_IP 33408
+#define Exit_Interpolate_Y_8x8_MBF_IP 33424
+#define Interpolate_C_4x4_Func_MBF_IP 33424
+#define PROCESS4x4_MBF_IP 33728
+#define LOOP_SUBMBPT_MBF_IP 33744
+#define Interpolate_Y_H_4x4_MBF_IP 34848
+#define Interpolate_Y_V_4x4_MBF_IP 35184
+#define VFILTER_4x4_MBF_IP 35456
+#define Interpolate_Y_I_4x4_MBF_IP 35504
+#define Average_4x4_MBF_IP 35600
+#define Return_Interpolate_Y_4x4_MBF_IP 35616
+#define Exit_Interpolate_Y_4x4_MBF_IP 35712
+#define ROUND_SHIFT_C_MBF_IP 36096
+#define LOOP_DIR_CONTINUE_MBF_IP 36160
+#define Weighted_Prediction_MBF_IP 36208
+#define DefaultWeightedPred_UniPred_MBF_IP 36272
+#define DefaultWeightedPred_BiPred_MBF_IP 36368
+#define WeightedPred_MBF_IP 36432
+#define WeightedPred_Explicit_MBF_IP 36576
+#define WeightedPred_LOOP_MBF_IP 36896
+#define Return_WeightedPred_MBF_IP 37376
+#define EXIT_LOOP_MBF_IP 37712
+#define SETHWSCOREBOARD_IP 38336
+#define SetHWScoreboard_Loop_IP 38448
+#define Parse_8_Loop_0_IP 38816
+#define Parse_8_Loop_2_IP 39008
+#define Parse_8_Loop_4_IP 39200
+#define Parse_8_Loop_6_IP 39392
+#define Parse_8_Loop_8_IP 39584
+#define Parse_8_Loop_10_IP 39776
+#define Parse_8_Loop_12_IP 39968
+#define Parse_8_Loop_14_IP 40160
+#define SetHWScoreboard_Remainder_IP 40496
+#define SetHWScoreboard_Remainder_Loop_IP 40720
+#define Output_Remainder_Intra_IP 40928
+#define SetHWScoreboard_Done_IP 41024
+#define SETHWSCOREBOARD_MBAFF_IP 41088
+#define SetHWScoreboard_MBAFF_Loop_IP 41184
+#define SET_SB_MBAFF_INTRA_0_IP 41664
+#define SET_SB_MBAFF_0_IP 41840
+#define NEXT_MB_MBAFF_0_IP 41904
+#define SET_SB_MBAFF_INTRA_2_IP 42208
+#define SET_SB_MBAFF_2_IP 42384
+#define NEXT_MB_MBAFF_2_IP 42448
+#define SET_SB_MBAFF_INTRA_4_IP 42752
+#define SET_SB_MBAFF_4_IP 42928
+#define NEXT_MB_MBAFF_4_IP 42992
+#define SET_SB_MBAFF_INTRA_6_IP 43296
+#define SET_SB_MBAFF_6_IP 43472
+#define NEXT_MB_MBAFF_6_IP 43536
+#define SET_SB_MBAFF_INTRA_8_IP 43840
+#define SET_SB_MBAFF_8_IP 44016
+#define NEXT_MB_MBAFF_8_IP 44080
+#define SET_SB_MBAFF_INTRA_10_IP 44384
+#define SET_SB_MBAFF_10_IP 44560
+#define NEXT_MB_MBAFF_10_IP 44624
+#define SET_SB_MBAFF_INTRA_12_IP 44928
+#define SET_SB_MBAFF_12_IP 45104
+#define NEXT_MB_MBAFF_12_IP 45168
+#define SET_SB_MBAFF_INTRA_14_IP 45472
+#define SET_SB_MBAFF_14_IP 45648
+#define NEXT_MB_MBAFF_14_IP 45712
+#define SetHWScoreboard_MBAFF_Remainder_IP 46048
+#define SetHWScoreboard_MBAFF_Remainder_Loop_IP 46272
+#define SET_SB_MBAFF_REM_INTRA_IP 46576
+#define SET_SB_MBAFF_REM_IP 46768
+#define Output_MBAFF_Remainder_Intra_IP 46848
+#define SetHWScoreboard_MBAFF_Done_IP 46944
+#define BSDRESET_IP 46960
+#define DCRESETDUMMY_IP 46976
+#define AllAVC_END_IP 46992
diff --git a/i965_drv_video/shaders/h264/mc/export.inc.gen5 b/i965_drv_video/shaders/h264/mc/export.inc.gen5
new file mode 100644 (file)
index 0000000..63accd9
--- /dev/null
@@ -0,0 +1,209 @@
+#define INTRA_16x16_IP_GEN5 0
+#define INTRA_16x16_VERTICAL_IP_GEN5 112
+#define INTRA_16x16_HORIZONTAL_IP_GEN5 256
+#define INTRA_16x16_DC_IP_GEN5 416
+#define INTRA_16x16_PLANE_IP_GEN5 784
+#define End_intra_Pred_16x16_Y_IP_GEN5 1328
+#define End_add_Error_16x16_Y_IP_GEN5 1632
+#define load_Intra_Ref_Y_IP_GEN5 1760
+#define decode_Chroma_Intra_IP_GEN5 1904
+#define INTRA_CHROMA_DC_IP_GEN5 2080
+#define INTRA_CHROMA_HORIZONTAL_IP_GEN5 2560
+#define INTRA_CHROMA_VERTICAL_IP_GEN5 2656
+#define INTRA_Chroma_PLANE_IP_GEN5 2736
+#define End_of_intra_Pred_Chroma_IP_GEN5 3136
+#define save_16x16_Y_IP_GEN5 3488
+#define INTRA_8x8_IP_GEN5 3712
+#define INTRA_8x8_BLK2_IP_GEN5 4544
+#define intra_Pred_8x8_Y_IP_GEN5 5120
+#define INTRA_8X8_VERTICAL_IP_GEN5 5376
+#define INTRA_8X8_HORIZONTAL_IP_GEN5 5456
+#define INTRA_8X8_DC_IP_GEN5 5536
+#define INTRA_8X8_DIAG_DOWN_LEFT_IP_GEN5 5792
+#define INTRA_8X8_DIAG_DOWN_RIGHT_IP_GEN5 5952
+#define INTRA_8X8_VERT_RIGHT_IP_GEN5 6176
+#define INTRA_8X8_HOR_DOWN_IP_GEN5 6464
+#define INTRA_8X8_VERT_LEFT_IP_GEN5 6736
+#define INTRA_8X8_HOR_UP_IP_GEN5 6896
+#define save_8x8_Y_IP_GEN5 7088
+#define INTRA_4x4_IP_GEN5 7424
+#define intra_Pred_4x4_Y_4_IP_GEN5 8496
+#define ADD_ERROR_SB0_IP_GEN5 8592
+#define ADD_ERROR_SB1_IP_GEN5 8704
+#define ADD_ERROR_SB2_IP_GEN5 8864
+#define ADD_ERROR_SB3_IP_GEN5 8992
+#define intra_Pred_4x4_Y_IP_GEN5 9040
+#define INTRA_4X4_VERTICAL_IP_GEN5 9040
+#define INTRA_4X4_HORIZONTAL_IP_GEN5 9072
+#define INTRA_4X4_DC_IP_GEN5 9104
+#define INTRA_4X4_DIAG_DOWN_LEFT_IP_GEN5 9280
+#define INTRA_4X4_DIAG_DOWN_RIGHT_IP_GEN5 9392
+#define INTRA_4X4_VERT_RIGHT_IP_GEN5 9536
+#define INTRA_4X4_HOR_DOWN_IP_GEN5 9744
+#define INTRA_4X4_VERT_LEFT_IP_GEN5 9968
+#define INTRA_4X4_HOR_UP_IP_GEN5 10080
+#define save_4x4_Y_IP_GEN5 10208
+#define INTRA_PCM_IP_GEN5 10560
+#define FRAME_MB_IP_GEN5 11072
+#define INIT_MBPARA_FRM_IP_GEN5 11120
+#define NOT_8x8_MODE_FRM_IP_GEN5 11408
+#define CONVERT_MVS_FRM_IP_GEN5 11488
+#define INIT_ADDRESS_REGS_FRM_IP_GEN5 11568
+#define LOOP_SUBMB_FRM_IP_GEN5 11632
+#define LOOP_DIR_FRM_IP_GEN5 11680
+#define LOADREF_MVXZERO_FRM_IP_GEN5 12080
+#define EXIT_LOADREF_Y_16x13_FRM_IP_GEN5 12192
+#define Interpolate_Y_8x8_Func_FRM_IP_GEN5 12352
+#define Interpolate_Y_8x8_Func2_FRM_IP_GEN5 12592
+#define Interpolate_Y_H_8x8_FRM_IP_GEN5 13664
+#define Interpolate_Y_V_8x8_FRM_IP_GEN5 14320
+#define VFILTER_8x8_FRM_IP_GEN5 14496
+#define Interpolate_Y_I_8x8_FRM_IP_GEN5 14880
+#define Average_8x8_FRM_IP_GEN5 15040
+#define Return_Interpolate_Y_8x8_FRM_IP_GEN5 15104
+#define Exit_Interpolate_Y_8x8_FRM_IP_GEN5 15120
+#define Interpolate_C_4x4_Func_FRM_IP_GEN5 15120
+#define PROCESS4x4_FRM_IP_GEN5 15424
+#define LOOP_SUBMBPT_FRM_IP_GEN5 15440
+#define Interpolate_Y_H_4x4_FRM_IP_GEN5 16528
+#define Interpolate_Y_V_4x4_FRM_IP_GEN5 16864
+#define VFILTER_4x4_FRM_IP_GEN5 17136
+#define Interpolate_Y_I_4x4_FRM_IP_GEN5 17184
+#define Average_4x4_FRM_IP_GEN5 17280
+#define Return_Interpolate_Y_4x4_FRM_IP_GEN5 17296
+#define Exit_Interpolate_Y_4x4_FRM_IP_GEN5 17392
+#define ROUND_SHIFT_C_FRM_IP_GEN5 17776
+#define LOOP_DIR_CONTINUE_FRM_IP_GEN5 17840
+#define Weighted_Prediction_FRM_IP_GEN5 17888
+#define DefaultWeightedPred_UniPred_FRM_IP_GEN5 17952
+#define DefaultWeightedPred_BiPred_FRM_IP_GEN5 18048
+#define WeightedPred_FRM_IP_GEN5 18112
+#define WeightedPred_Explicit_FRM_IP_GEN5 18256
+#define WeightedPred_LOOP_FRM_IP_GEN5 18576
+#define Return_WeightedPred_FRM_IP_GEN5 19056
+#define EXIT_LOOP_FRM_IP_GEN5 19392
+#define FIELD_MB_IP_GEN5 19968
+#define INIT_MBPARA_FLD_IP_GEN5 20016
+#define NOT_8x8_MODE_FLD_IP_GEN5 20304
+#define CONVERT_MVS_FLD_IP_GEN5 20384
+#define INIT_ADDRESS_REGS_FLD_IP_GEN5 20464
+#define LOOP_SUBMB_FLD_IP_GEN5 20544
+#define LOOP_DIR_FLD_IP_GEN5 20592
+#define LOADREF_MVXZERO_FLD_IP_GEN5 21152
+#define EXIT_LOADREF_Y_16x13_FLD_IP_GEN5 21264
+#define Interpolate_Y_8x8_Func_FLD_IP_GEN5 21440
+#define Interpolate_Y_8x8_Func2_FLD_IP_GEN5 21680
+#define Interpolate_Y_H_8x8_FLD_IP_GEN5 22752
+#define Interpolate_Y_V_8x8_FLD_IP_GEN5 23408
+#define VFILTER_8x8_FLD_IP_GEN5 23584
+#define Interpolate_Y_I_8x8_FLD_IP_GEN5 23968
+#define Average_8x8_FLD_IP_GEN5 24128
+#define Return_Interpolate_Y_8x8_FLD_IP_GEN5 24192
+#define Exit_Interpolate_Y_8x8_FLD_IP_GEN5 24208
+#define Interpolate_C_4x4_Func_FLD_IP_GEN5 24208
+#define PROCESS4x4_FLD_IP_GEN5 24512
+#define LOOP_SUBMBPT_FLD_IP_GEN5 24528
+#define Interpolate_Y_H_4x4_FLD_IP_GEN5 25632
+#define Interpolate_Y_V_4x4_FLD_IP_GEN5 25968
+#define VFILTER_4x4_FLD_IP_GEN5 26240
+#define Interpolate_Y_I_4x4_FLD_IP_GEN5 26288
+#define Average_4x4_FLD_IP_GEN5 26384
+#define Return_Interpolate_Y_4x4_FLD_IP_GEN5 26400
+#define Exit_Interpolate_Y_4x4_FLD_IP_GEN5 26496
+#define ROUND_SHIFT_C_FLD_IP_GEN5 26880
+#define LOOP_DIR_CONTINUE_FLD_IP_GEN5 26944
+#define Weighted_Prediction_FLD_IP_GEN5 26992
+#define DefaultWeightedPred_UniPred_FLD_IP_GEN5 27056
+#define DefaultWeightedPred_BiPred_FLD_IP_GEN5 27152
+#define WeightedPred_FLD_IP_GEN5 27216
+#define WeightedPred_Explicit_FLD_IP_GEN5 27360
+#define WeightedPred_LOOP_FLD_IP_GEN5 27680
+#define Return_WeightedPred_FLD_IP_GEN5 28160
+#define EXIT_LOOP_FLD_IP_GEN5 28496
+#define MBAFF_MB_IP_GEN5 29120
+#define INIT_MBPARA_MBF_IP_GEN5 29168
+#define NOT_8x8_MODE_MBF_IP_GEN5 29456
+#define CONVERT_MVS_MBF_IP_GEN5 29536
+#define INIT_ADDRESS_REGS_MBF_IP_GEN5 29616
+#define LOOP_SUBMB_MBF_IP_GEN5 29728
+#define LOOP_DIR_MBF_IP_GEN5 29776
+#define LOADREF_MVXZERO_MBF_IP_GEN5 30368
+#define EXIT_LOADREF_Y_16x13_MBF_IP_GEN5 30480
+#define Interpolate_Y_8x8_Func_MBF_IP_GEN5 30656
+#define Interpolate_Y_8x8_Func2_MBF_IP_GEN5 30896
+#define Interpolate_Y_H_8x8_MBF_IP_GEN5 31968
+#define Interpolate_Y_V_8x8_MBF_IP_GEN5 32624
+#define VFILTER_8x8_MBF_IP_GEN5 32800
+#define Interpolate_Y_I_8x8_MBF_IP_GEN5 33184
+#define Average_8x8_MBF_IP_GEN5 33344
+#define Return_Interpolate_Y_8x8_MBF_IP_GEN5 33408
+#define Exit_Interpolate_Y_8x8_MBF_IP_GEN5 33424
+#define Interpolate_C_4x4_Func_MBF_IP_GEN5 33424
+#define PROCESS4x4_MBF_IP_GEN5 33728
+#define LOOP_SUBMBPT_MBF_IP_GEN5 33744
+#define Interpolate_Y_H_4x4_MBF_IP_GEN5 34848
+#define Interpolate_Y_V_4x4_MBF_IP_GEN5 35184
+#define VFILTER_4x4_MBF_IP_GEN5 35456
+#define Interpolate_Y_I_4x4_MBF_IP_GEN5 35504
+#define Average_4x4_MBF_IP_GEN5 35600
+#define Return_Interpolate_Y_4x4_MBF_IP_GEN5 35616
+#define Exit_Interpolate_Y_4x4_MBF_IP_GEN5 35712
+#define ROUND_SHIFT_C_MBF_IP_GEN5 36096
+#define LOOP_DIR_CONTINUE_MBF_IP_GEN5 36160
+#define Weighted_Prediction_MBF_IP_GEN5 36208
+#define DefaultWeightedPred_UniPred_MBF_IP_GEN5 36272
+#define DefaultWeightedPred_BiPred_MBF_IP_GEN5 36368
+#define WeightedPred_MBF_IP_GEN5 36432
+#define WeightedPred_Explicit_MBF_IP_GEN5 36576
+#define WeightedPred_LOOP_MBF_IP_GEN5 36896
+#define Return_WeightedPred_MBF_IP_GEN5 37376
+#define EXIT_LOOP_MBF_IP_GEN5 37712
+#define SETHWSCOREBOARD_IP_GEN5 38336
+#define SetHWScoreboard_Loop_IP_GEN5 38448
+#define Parse_8_Loop_0_IP_GEN5 38816
+#define Parse_8_Loop_2_IP_GEN5 39008
+#define Parse_8_Loop_4_IP_GEN5 39200
+#define Parse_8_Loop_6_IP_GEN5 39392
+#define Parse_8_Loop_8_IP_GEN5 39584
+#define Parse_8_Loop_10_IP_GEN5 39776
+#define Parse_8_Loop_12_IP_GEN5 39968
+#define Parse_8_Loop_14_IP_GEN5 40160
+#define SetHWScoreboard_Remainder_IP_GEN5 40496
+#define SetHWScoreboard_Remainder_Loop_IP_GEN5 40720
+#define Output_Remainder_Intra_IP_GEN5 40928
+#define SetHWScoreboard_Done_IP_GEN5 41024
+#define SETHWSCOREBOARD_MBAFF_IP_GEN5 41088
+#define SetHWScoreboard_MBAFF_Loop_IP_GEN5 41184
+#define SET_SB_MBAFF_INTRA_0_IP_GEN5 41664
+#define SET_SB_MBAFF_0_IP_GEN5 41840
+#define NEXT_MB_MBAFF_0_IP_GEN5 41904
+#define SET_SB_MBAFF_INTRA_2_IP_GEN5 42208
+#define SET_SB_MBAFF_2_IP_GEN5 42384
+#define NEXT_MB_MBAFF_2_IP_GEN5 42448
+#define SET_SB_MBAFF_INTRA_4_IP_GEN5 42752
+#define SET_SB_MBAFF_4_IP_GEN5 42928
+#define NEXT_MB_MBAFF_4_IP_GEN5 42992
+#define SET_SB_MBAFF_INTRA_6_IP_GEN5 43296
+#define SET_SB_MBAFF_6_IP_GEN5 43472
+#define NEXT_MB_MBAFF_6_IP_GEN5 43536
+#define SET_SB_MBAFF_INTRA_8_IP_GEN5 43840
+#define SET_SB_MBAFF_8_IP_GEN5 44016
+#define NEXT_MB_MBAFF_8_IP_GEN5 44080
+#define SET_SB_MBAFF_INTRA_10_IP_GEN5 44384
+#define SET_SB_MBAFF_10_IP_GEN5 44560
+#define NEXT_MB_MBAFF_10_IP_GEN5 44624
+#define SET_SB_MBAFF_INTRA_12_IP_GEN5 44928
+#define SET_SB_MBAFF_12_IP_GEN5 45104
+#define NEXT_MB_MBAFF_12_IP_GEN5 45168
+#define SET_SB_MBAFF_INTRA_14_IP_GEN5 45472
+#define SET_SB_MBAFF_14_IP_GEN5 45648
+#define NEXT_MB_MBAFF_14_IP_GEN5 45712
+#define SetHWScoreboard_MBAFF_Remainder_IP_GEN5 46048
+#define SetHWScoreboard_MBAFF_Remainder_Loop_IP_GEN5 46272
+#define SET_SB_MBAFF_REM_INTRA_IP_GEN5 46576
+#define SET_SB_MBAFF_REM_IP_GEN5 46768
+#define Output_MBAFF_Remainder_Intra_IP_GEN5 46848
+#define SetHWScoreboard_MBAFF_Done_IP_GEN5 46944
+#define BSDRESET_IP_GEN5 46960
+#define DCRESETDUMMY_IP_GEN5 46976
+#define AllAVC_END_IP_GEN5 46992
diff --git a/i965_drv_video/shaders/h264/mc/header.inc b/i965_drv_video/shaders/h264/mc/header.inc
new file mode 100644 (file)
index 0000000..4a0eecf
--- /dev/null
@@ -0,0 +1,303 @@
+/*\r
+ * Common header file for all AVC MC kernels\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+#if !defined(__HEADER__)       // Make sure this file is only included once\r
+#define __HEADER__\r
+\r
+// Module name: header.inc\r
+//\r
+// Common header file for all AVC MC kernels\r
+//\r
+\r
+#ifndef        COMBINED_KERNEL\r
+#ifdef DEV_CTG\r
+  #define SW_SCOREBOARD                // SW Scoreboard should be enabled for CTG and earlier\r
+  #undef HW_SCOREBOARD         // HW Scoreboard should be disabled for CTG and earlier\r
+#else\r
+  #define HW_SCOREBOARD                // HW Scoreboard should be enabled for ILK and beyond\r
+  #undef SW_SCOREBOARD         // SW Scoreboard should be disabled for ILK and beyond\r
+#endif // DEV_CTG\r
+#endif // COMBINED_KERNEL\r
+\r
+//#define MONO                         // Build Monochrome kernels\r
+\r
+//  Surface state definition\r
+//\r
+#define        DESTY           0\r
+#define        DESTUV          1\r
+#define        REFYFM0         2\r
+#define        REFYFM1         3\r
+#define        REFYFM2         4\r
+#define        REFYFM3         5\r
+#define        REFYFM4         6\r
+#define        REFYFM5         7\r
+#define        REFYFM6         8\r
+#define        REFYFM7         9\r
+#define        REFYFM8         10\r
+#define        REFYFM9         11\r
+#define        REFYFM10        12\r
+#define        REFYFM11        13\r
+#define        REFYFM12        14\r
+#define        REFYFM13        15\r
+#define        REFYFM14        16\r
+#define        REFYFM15        17\r
+#define        REFUVFM0        18\r
+#define        REFUVFM1        19\r
+#define        REFUVFM2        20\r
+#define        REFUVFM3        21\r
+#define        REFUVFM4        22\r
+#define        REFUVFM5        23\r
+#define        REFUVFM6        24\r
+#define        REFUVFM7        25\r
+#define        REFUVFM8        26\r
+#define        REFUVFM9        27\r
+#define        REFUVFM10       28\r
+#define        REFUVFM11       29\r
+#define        REFUVFM12       30\r
+#define        REFUVFM13       31\r
+#define        REFUVFM14       32\r
+#define        REFUVFM15       33\r
+\r
+.default_execution_size        (16)\r
+.default_register_type :ub\r
+\r
+//  ----------- Common constant definitions ------------\r
+//\r
+//  Bit position constants\r
+//\r
+#define BIT0   0x01\r
+#define BIT1   0x02\r
+#define BIT2   0x04\r
+#define BIT3   0x08\r
+#define BIT4   0x10\r
+#define BIT5   0x20\r
+#define BIT6   0x40\r
+#define BIT7   0x80\r
+#define BIT8   0x0100\r
+#define BIT9   0x0200\r
+#define BIT10  0x0400\r
+#define BIT11  0x0800\r
+#define BIT12  0x1000\r
+#define BIT13  0x2000\r
+#define BIT14  0x4000\r
+#define BIT15  0x8000\r
+#define BIT16  0x00010000\r
+#define BIT17  0x00020000\r
+#define BIT18  0x00040000\r
+#define BIT19  0x00080000\r
+#define BIT20  0x00100000\r
+#define BIT21  0x00200000\r
+#define BIT22  0x00400000\r
+#define BIT23  0x00800000\r
+#define BIT24  0x01000000\r
+#define BIT25  0x02000000\r
+#define BIT26  0x04000000\r
+#define BIT27  0x08000000\r
+#define BIT28  0x10000000\r
+#define BIT29  0x20000000\r
+#define BIT30  0x40000000\r
+#define BIT31  0x80000000\r
+\r
+#define        GRFWIB  32              // GRF register width in byte\r
+#define        GRFWIW  16              // GRF register width in word\r
+#define        GRFWID  8               // GRF register width in dword\r
+\r
+#define INST_SIZE   16         // Instruction size = 128b = 16 Bytes\r
+\r
+#define REGION(Width,HStride) <Width*HStride;Width,HStride>\r
+\r
+#define NULLREG                null<1>:ud\r
+#define NULLREGW       null<1>:w\r
+\r
+#define TOP_FIELD              0\r
+#define BOTTOM_FIELD   1\r
+\r
+//  M2 - M9 for date writing message payload\r
+.declare    MSGPAYLOAD Base=m2 ElementSize=1 SrcRegion=REGION(16,1) Type=ub\r
+.declare    MSGPAYLOADB        Base=m2 ElementSize=1 SrcRegion=REGION(16,1) Type=ub\r
+.declare    MSGPAYLOADW        Base=m2 ElementSize=2 SrcRegion=REGION(16,1) Type=uw\r
+.declare    MSGPAYLOADD        Base=m2 ElementSize=4 SrcRegion=REGION(8,1) Type=ud\r
+\r
+//  ----------- Common Message Descriptor ------------\r
+//\r
+#ifdef DEV_ILK\r
+#define MSG_GW         0x03            // Message Gateway Extended Message Descriptor,\r
+#define DAPREAD                0x04            // Data Port Read Extended Message Descriptor,\r
+#define DAPWRITE       0x05            // Data Port Write Extended Message Descriptor,\r
+#define TS                     0x07            // Thread Spawner Extended Message Descriptor\r
+#define TS_EOT         0x27            // End of Thread Extended Message Descriptor\r
+\r
+#define EOTMSGDSC      0x02000010      // End of Thread Message Descriptor, don't deference URB handle\r
+\r
+// Data Port Message Descriptor\r
+#define DWBRMSGDSC_RC   0x02086000     // DWORD Block Read Message Descriptor, reading from render cache = 6.\r
+#define DWBRMSGDSC_RC_TF 0x02086600    // DWORD Block Read Message Descriptor, reading from render cache = 6.\r
+#define DWBRMSGDSC_RC_BF 0x02086700    // DWORD Block Read Message Descriptor, reading from render cache = 6.\r
+#define DWBRMSGDSC_SC   0x0208A000     // DWORD Block Read Message Descriptor, reading from sampler cache = A.\r
+#define DWBRMSGDSC_SC_TF 0x0208E600    // DWORD Block Read Message Descriptor, reading top field from field mode sampler cache.\r
+#define DWBRMSGDSC_SC_BF 0x0208E700    // DWORD Block Read Message Descriptor, reading bottom field from field mode sampler cache.\r
+\r
+#define DWBWMSGDSC              0x02082000     // DWORD Block Write Message Descriptor\r
+#define DWBWMSGDSC_WC   0x0218A000     // DWORD Block Write Message Descriptor + write commit\r
+\r
+// Enable Write Commit writeback mesage\r
+#define        ENWRCOM         0x00108000      // Enable "write commit" and set response length = 1\r
+\r
+// Thread Spawner Message Descriptor\r
+#define        TSMSGDSC        0x02000011\r
+\r
+// Message Gateway Message Descriptors\r
+#define OGWMSGDSC      0x02000000      // OpenGateway Message Descriptor\r
+#define CGWMSGDSC      0x02000001      // CloseGateway Message Descriptor\r
+#define FWDMSGDSC      0x02000002      // ForwardMsg Message Descriptor\r
+\r
+#define        NOTIFYMSG       0x00008000      // Send notification with ForwardMsg message\r
+\r
+#define        RESP_LEN(len)   0x100000*len\r
+#define MSG_LEN(len)   0x2000000*len\r
+\r
+#else  // Pre DEV_ILK\r
+\r
+#define MSG_GW\r
+#define DAPREAD\r
+#define DAPWRITE\r
+#define TS\r
+#define TS_EOT\r
+\r
+#define EOTMSGDSC      0x87100010      // End of Thread Message Descriptor, don't deference URB handle\r
+\r
+// Data Port Message Descriptor\r
+#define DWBRMSGDSC_RC   0x04106000     // DWORD Block Read Message Descriptor, reading from render cache = 6.\r
+#define DWBRMSGDSC_RC_TF 0x04106600    // DWORD Block Read Message Descriptor, reading from render cache = 6.\r
+#define DWBRMSGDSC_RC_BF 0x04106700    // DWORD Block Read Message Descriptor, reading from render cache = 6.\r
+#define DWBRMSGDSC_SC   0x0410A000     // DWORD Block Read Message Descriptor, reading from sampler cache = A.\r
+#define DWBRMSGDSC_SC_TF 0x0410A600    // DWORD Block Read Message Descriptor, reading top field from field mode sampler cache.\r
+#define DWBRMSGDSC_SC_BF 0x0410A700    // DWORD Block Read Message Descriptor, reading bottom field from field mode sampler cache.\r
+\r
+#define DWBWMSGDSC              0x05102000     // DWORD Block Write Message Descriptor\r
+#define DWBWMSGDSC_WC   0x0511A000     // DWORD Block Write Message Descriptor + write commit\r
+\r
+// Enable Write Commit writeback mesage\r
+#define        ENWRCOM         0x00018000      // Enable "write commit" and set response length = 1\r
+\r
+// Thread Spawner Message Descriptor\r
+#define        TSMSGDSC        0x07100011\r
+\r
+// Message Gateway Message Descriptors\r
+#define OGWMSGDSC      0x03100000      // OpenGateway Message Descriptor\r
+#define CGWMSGDSC      0x03100001      // CloseGateway Message Descriptor\r
+#define FWDMSGDSC      0x03100002      // ForwardMsg Message Descriptor\r
+\r
+#define        NOTIFYMSG       0x00008000      // Send notification with ForwardMsg message\r
+#define        ACKREQMSG       0x00014000      // Acknowledgement required so response length should be 1\r
+\r
+#define        RESP_LEN(len)   0x10000*len\r
+#define MSG_LEN(len)   0x100000*len\r
+\r
+#endif // DEV_ILK\r
+\r
+// Enable frame/field selection in message descriptor\r
+#define ENMSGDSCFM     0x400           // Enable MSGDSC to select frame surface\r
+#define ENMSGDSCTF     0x600           // Enable MSGDSC to select top field surface\r
+#define ENMSGDSCBF     0x700           // Enable MSGDSC to select bottom field surface\r
+\r
+//  ----------- Message related register ------------\r
+//\r
+#define MSGHDR         m1              // Message Payload Header\r
+#define MSGHDRY                m1              // Message Payload Header register for Y data\r
+#define MSGHDRY0       m1              // Message Payload Header register for Y data\r
+#define MSGHDRY1       m2              // Message Payload Header register for Y data\r
+#define MSGHDRY2       m3              // Message Payload Header register for Y data\r
+#define MSGHDRY3       m4              // Message Payload Header register for Y data\r
+#define MSGHDRUV       m5              // Message Payload Header register for U/V data\r
+#define MSGSRC         r62             // Message source register, should never be used for other purposes\r
+#define MSGDSC         a0.0:ud // Message Descriptor register (type DWORD)\r
+\r
+#define MH_ORI         MSGSRC.0        // DWORD block R/W message header block offset\r
+#define MH_ORIX                MSGSRC.0        // DWORD block R/W message header X offset\r
+#define MH_ORIY                MSGSRC.1        // DWORD block R/W message header Y offset\r
+#define MH_SIZE                MSGSRC.2        // DWORD block R/W message header block width & height\r
+\r
+// Data necessary for kernel operations\r
+//\r
+//  Address registers used as pointers\r
+//\r
+//  Note: Please keep the register order as is since they are used in compressed instructions\r
+//\r
+#define            PPREDBUF_Y          a0.4    // Pointer to predicted Y picture\r
+#define            PPREDBUF_Y1         a0.5    // Pointer to predicted Y picture for extended instruction\r
+\r
+#define            PPREDBUF_UV         a0.4    // Pointer to predicted U/V picture\r
+#define            PPREDBUF_UV1        a0.5    // Pointer to predicted U/V picture for extended instruction\r
+\r
+#define            PDECBUF             a0.4    // Pointer to decoded picture data\r
+#define            PDECBUF_UD  a0.2    // Pointer to decoded picture data in DWORD unit\r
+\r
+//  ----------- R63 is reserved for global variables ------------\r
+//\r
+//  Note: Don't program it with values other than what are defined here. \r
+\r
+#define G_REG          r63\r
+\r
+#define RETURN_REG     G_REG.0         // Return pointer for all sub-routine calls (type DWORD)\r
+#define RETURN_REG1    G_REG.1         // Return pointer for second-level calls\r
+\r
+#define I_ORIX         G_REG.13        // :uw, H. origin of the macroblock in pixel unit, don't overwrite in-line data\r
+#define I_ORIY         G_REG.14        // :uw, V. origin of the macroblock in pixel unit, don't overwrite in-line data\r
+\r
+//  Macros\r
+//\r
+//  Note: For macros that require multiple line expansion, insert "\n" at the end of each line.\r
+//\r
+#define        GRF(reg)        r##reg\r
+#ifdef DEV_ILK\r
+#define END_THREAD                     send (8) NULLREG MSGHDR r0:ud TS_EOT    EOTMSGDSC\r
+#else\r
+#define END_THREAD                     send (8) NULLREG MSGHDR r0:ud EOTMSGDSC\r
+#endif // DEV_ILK\r
+\r
+#define CALL(subFunc, skipInst)        add (1) RETURN_REG<1>:ud   ip:ud        (1+skipInst)*INST_SIZE \n\\r
+                               jmpi (1) subFunc\r
+\r
+#define CALL_1(subFunc, skipInst)      add (1) RETURN_REG1<1>:ud   ip:ud       (1+skipInst)*INST_SIZE \n\\r
+                               jmpi (1) subFunc\r
+\r
+#define        RETURN          mov (1) ip:ud   RETURN_REG<0;1,0>:ud            // Return to calling module\r
+#define        RETURN_1        mov (1) ip:ud   RETURN_REG1<0;1,0>:ud           // Return to second-level calling module\r
+                                                                                                                               // To support iterative calling\r
+#ifdef SW_SCOREBOARD    \r
+\r
+#ifdef DEV_CTG_A\r
+  #define LEADING_THREAD       1               // For CTG A, no SRT is needed. Only PRT is necessary\r
+#else\r
+  #define LEADING_THREAD       0               // For CTG B0 and beyond, PRT doesn't take into debug count\r
+  #define DOUBLE_SB                                    // Scoreboard size needs to be doubled\r
+#endif\r
+\r
+#ifdef DOUBLE_SB                                       // Scoreboard size needs to be doubled\r
+  #define SB_MASK              0x1ff           // Scoreboard wrap-around mask (for 512 entries)\r
+#else\r
+  #define SB_MASK              0xff            // Scoreboard wrap-around mask (for 256 entries)\r
+#endif // defined(DOUBLE_SB)\r
+\r
+// Scoreboard related definitions\r
+\r
+#define        TEMPX           r50\r
+#define        TEMPY           r51\r
+#define        DELTA           r52\r
+\r
+#define M05_STORE      r0.13           // :uw, reuse r0.6:ud upper-word to store M0.5 header information for scoreboard\r
+\r
+\r
+#endif // SW_SCOREBOARD\r
+\r
+// End of header.inc\r
+\r
+#endif // !defined(__HEADER__)\r
+\r
diff --git a/i965_drv_video/shaders/h264/mc/initialize_MBPara.asm b/i965_drv_video/shaders/h264/mc/initialize_MBPara.asm
new file mode 100644 (file)
index 0000000..bd651cf
--- /dev/null
@@ -0,0 +1,125 @@
+/*\r
+ * Initialize parameters\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Kernel name: Initialize_MBPara.asm\r
+//\r
+\r
+\r
+//#if !defined(__INITIALIZE_MBPARA__)          // Make sure this is only included once\r
+//#define __INITIALIZE_MBPARA__\r
+\r
+\r
+// WA for weighted prediction - 2007/09/06             // shlee\r
+//     mov (1)         guwW128(0)<1>                   guwR1(0)<0;1,0>         // Copy the unique number indicating weight/offset=(128,0)\r
+\r
+\r
+\r
+//     MB Type         Category\r
+//     1                       B_L0_16x16\r
+//     2                       B_L1_16x16\r
+//     3                       B_Bi_16x16\r
+//     4                       B_L0_L0_16x8\r
+//     5                       B_L0_L0_8x16\r
+//     6                       B_L1_L1_16x8\r
+//     7                       B_L1_L1_8x16\r
+//     8                       B_L0_L1_16x8\r
+//     9                       B_L0_L1_8x16\r
+//     10                      B_L1_L0_16x8\r
+//     11                      B_L1_L0_8x16\r
+//     12                      B_L0_Bi_16x8\r
+//     13                      B_L0_Bi_8x16\r
+//     14                      B_L1_Bi_16x8\r
+//     15                      B_L1_Bi_8x16\r
+//     16                      B_Bi_L0_16x8\r
+//     17                      B_Bi_L0_8x16\r
+//     18                      B_Bi_L1_16x8\r
+//     19                      B_Bi_L1_8x16\r
+//     20                      B_Bi_Bi_16x8\r
+//     21                      B_Bi_Bi_8x16\r
+//     22                      B_8x8\r
+\r
+       // TODO:\r
+       // Initialize interpolation area to eliminate uninitialized registers making the results of mac instructions XX.\r
+       // This issue was reported by Sharath on 5/25/2006, and why multiplication by zero still yields XX has not been understood yet.\r
+#if 0\r
+       mov (16)        gudINTPY0(0)<1>         0:ud    {Compr}\r
+       mov (16)        gudINTPY0(2)<1>         0:ud    {Compr}\r
+       mov (16)        gudINTPY1(0)<1>         0:ud    {Compr}\r
+       mov (16)        gudINTPY1(2)<1>         0:ud    {Compr}\r
+       mov (16)        gudINTPC0(0)<1>         0:ud    {Compr}\r
+       mov (16)        gudINTPC1(0)<1>         0:ud    {Compr}\r
+#endif\r
+\r
+       mov (1)         gMVSTEP:w                       0:w                                                             // Address increament for MV read\r
+\r
+       cmp.e.f0.0 (1) null:w                   gwMBTYPE<0;1,0>                                 22:w\r
+       (-f0.0)         jmpi                            INTERLABEL(NOT_8x8_MODE)\r
+\r
+       //--- 8x8 mode\r
+               \r
+       // Starting address of error data blocks\r
+       cmp.e.f0.1 (2) null<1>:w                gSUBMB_SHAPE<0;1,0>:ub                  0:w\r
+       (f0.1) jmpi INTERLABEL(CONVERT_MVS)\r
+\r
+       // Note: MVs and Weights/Offsets are already expanded by HW or driver\r
+       \r
+       // MV conversion - Convert each MV to absolute coord. (= MV + MB org. + block offset) \r
+       shl (16)        gwTEMP(0)<1>            gX<0;2,1>:w                                             2:w // Convert MB origin to 1/4-pel unit\r
+       mov (1)         gMVSTEP:w                       24:w                                                    // Address increament for MV read\r
+       add (2)         gwTEMP(0,4)<2>          gwTEMP(0,4)<4;2,2>                              16:w\r
+       add (2)         gwTEMP(0,9)<2>          gwTEMP(0,9)<4;2,2>                              16:w\r
+       add (4)         gwTEMP(0,12)<1>         gwTEMP(0,12)<4;4,1>                             16:w\r
+       \r
+       add (16)        gMV<1>:w                        gMV<16;16,1>:w                                  gwTEMP(0)<16;16,1>\r
+       add (8)         gwTEMP(0)<2>            gwTEMP(0)<16;8,2>                               32:w\r
+       add (16)        gwMV(1,0)<1>            gwMV(1,0)<16;16,1>                              gwTEMP(0)<16;16,1>      \r
+       add (8)         gwTEMP(0,1)<2>          gwTEMP(0,1)<16;8,2>                             32:w\r
+       add (16)        gwMV(3,0)<1>            gwMV(3,0)<16;16,1>                              gwTEMP(0)<16;16,1>      \r
+       add (8)         gwTEMP(0)<2>            gwTEMP(0)<16;8,2>                               -32:w\r
+       add (16)        gwMV(2,0)<1>            gwMV(2,0)<16;16,1>                              gwTEMP(0)<16;16,1>      \r
+\r
+       jmpi INTERLABEL(INIT_ADDRESS_REGS)\r
+\r
+INTERLABEL(NOT_8x8_MODE):\r
+\r
+       //--- !8x8 mode (16x16, 16x8, 8x16)\r
+\r
+       // MVs and Weights/Offsets are expanded\r
+       cmp.le.f0.1 (8) null<1>:w               gwMBTYPE<0;1,0>                                 3:w // Check 16x16\r
+       mov (1)         gSUBMB_SHAPE:ub         0:uw                                                    // subMB shape\r
+       (f0.1) mov (8)  gMV<1>:d                gMV<0;2,1>:d                                    \r
+       (f0.1) mov (8)  gdWGT(1,0)<1>   gWGT<0;4,1>:d                                   \r
+       (f0.1) mov (4)  gdWGT(0,4)<1>   gWGT<4;4,1>:d                                   \r
+       \r
+INTERLABEL(CONVERT_MVS):\r
+       // MV conversion - Convert each MV to absolute coord. (= MV + MB org. + block offset)\r
+       shl (2)         gwTEMP(0)<1>            gX<2;2,1>:w                                             2:w // Convert MB origin to 1/4-pel unit\r
+       add (16)        gMV<1>:w                        gMV<16;16,1>:w                                  gwTEMP(0)<0;2,1>\r
+       add (2)         gwMV(0,4)<2>            gwMV(0,4)<4;2,2>                                32:w    //{NoDDClr}\r
+       add (2)         gwMV(0,9)<2>            gwMV(0,9)<4;2,2>                                32:w    //{NoDDChk,NoDDClr}\r
+       add (4)         gwMV(0,12)<1>           gwMV(0,12)<4;4,1>                               32:w    //{NoDDChk}\r
+               \r
+INTERLABEL(INIT_ADDRESS_REGS):\r
+       // Initialize the address registers\r
+       mov (2)         pERRORYC:ud                     nOFFSET_ERROR:ud                                {NoDDClr} // Address of Y and C error blocks\r
+       mov (1)         pRECON_MV:ud            nOFFSET_RECON_MV:ud                             {NoDDChk,NoDDClr} // Address of recon area and motion vectors\r
+       mov (1)         pWGT_BIDX:ud            nOFFSET_WGT_BIDX:ud                             {NoDDChk} // Address of weights/offsets and binding tbl idx\r
+       \r
+       // Read the parity of the current field (gPARITY - 0:top, 1:bottom, 3:frame)\r
+       // and set message descriptor for frame/field write\r
+#if defined(MBAFF)\r
+       and.nz.f0.0 (1) null:uw                 gFIELDMBFLAG:ub                                 nFIELDMB_MASK:uw\r
+       (f0.0) and (1)  gPARITY:uw              gMBPARITY:ub                                    nMBPARITY_MASK:uw\r
+       (-f0.0) mov (1) gPARITY:uw              3:uw\r
+#elif defined(FIELD)\r
+       and (1)         gPARITY:uw                      gMBPARITY:ub                                    nMBPARITY_MASK:uw\r
+#endif\r
+       \r
+        \r
+//#endif       // !defined(__INITIALIZE_MBPARA__)\r
diff --git a/i965_drv_video/shaders/h264/mc/inter_Header.inc b/i965_drv_video/shaders/h264/mc/inter_Header.inc
new file mode 100644 (file)
index 0000000..bd10c22
--- /dev/null
@@ -0,0 +1,371 @@
+/*\r
+ * Header file for all AVC INTER prediction kernels\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+#if !defined(__INTER_HEADER__) // Make sure this file is only included once\r
+#define __INTER_HEADER__\r
+\r
+// Module name: inter_header.inc\r
+//\r
+// Header file for all AVC INTER prediction kernels\r
+//\r
+\r
+#define INTER_KERNEL\r
+\r
+//-------------------------------------------------------------------------------------------\r
+// TODO: The followings will be merged with the above definitions later\r
+//-------------------------------------------------------------------------------------------\r
+\r
+\r
+//------------ Input parameters & bit masks\r
+\r
+// SW WA for weighted prediction - 2007/09/06  \r
+//.declare     guwR1                   Base=r1 ElementSize=2 Type=uw   \r
+//.declare     guwW128                 Base=r63.13 ElementSize=2 Type=uw\r
+\r
+#ifdef DEV_ILK\r
+// #define SW_W_128            // Enable SW WA for special Weight=128 case. Can be commented to disable it\r
+#else  // Pre DEV_ILK\r
+#define SW_W_128               // Enable SW WA for special Weight=128 case.\r
+#endif // DEV_ILK\r
+\r
+#ifdef SW_W_128\r
+.declare       gudW128                 Base=r1.0 ElementSize=4 Type=ud\r
+#else\r
+#endif // SW_W_128\r
+\r
+#define                gORIX                   r3.4                            // :ub, X origin\r
+#define                gORIY                   r3.5                            // :ub, Y origin\r
+\r
+#define                gCBP                    r3.9                            // :ub, CBP (0, 0, Y0, Y1, Y2, Y3, Cb, Cr)\r
+#define                nCBPY_MASK              0x3c\r
+#define                nCBPU_MASK              0x2\r
+#define                nCBPV_MASK              0x1\r
+\r
+#define                gFIELDFLAGS             r3.1                            // :uw - To compute message descriptor for write\r
+\r
+#define                gMBTYPE                 r3.1                            // :ub, MB type\r
+#define                nMBTYPE_MASK    0x1f\r
+#define                gFIELDMBFLAG    r3.1                            // :ub, Field MB flag\r
+#define                nFIELDMB_MASK   0x40\r
+#define                gMBPARITY               r3.3                            // :ub, Bottom field flag\r
+#define                nMBPARITY_MASK  0x01\r
+\r
+#define                gWPREDFLAG              r3.0                            // :ub, Weighted pred flag\r
+#define                nWBIDIR_MASK    0xc0\r
+\r
+#define                gSUBMB_SHAPE    r3.12                           // :ub, Sub-MB shape\r
+#define                gSUBMB_MODE             r3.13                           // :ub, Sub-MB prediction mode\r
+.declare       guwSUBMB_SHAPE_MODE     Base=r3.6 ElementSize=2 Type=uw\r
+\r
+#define                gYWDENOM                r3.14                           // :ub, Luma log2 weight denom\r
+#define                gCWDENOM                r3.15                           // :ub, Chroma log2 weight denom\r
+\r
+#define                gADDR                   r3.24                           // :ub, Register addresses of error data / MV\r
+\r
+.declare       gubBIDX                 Base=r3.16 ElementSize=1 Type=ub\r
+\r
+#define                gWGT                    r8                                      // Weights/offsets\r
+.declare    gdWGT                      Base=r8  ElementSize=4 Type=d\r
+.declare    gwWGT                      Base=r8  ElementSize=2 Type=w\r
+#define                gMV                             r4                                      // MVs\r
+.declare    gwMV                       Base=r4  ElementSize=2 Type=w\r
+.declare    gdMV                       Base=r4  ElementSize=4 Type=d\r
+\r
+.declare       gwERRORY                Base=r10 ElementSize=2 Type=w           // 16 GRFs\r
+.declare       gubERRORY               Base=r10 ElementSize=1 Type=ub\r
+.declare       gwERRORC                Base=r26 ElementSize=2 Type=w           // 8 GRFs\r
+.declare       gubERRORC               Base=r26 ElementSize=2 Type=ub\r
+\r
+//------------ Address registers\r
+#define                pMSGDSC                 a0.0                            // ud: Must be the leading dword of the address register\r
+#define                pREF                    a0.0\r
+\r
+#define                pBIDX                   a0.2                            \r
+#define                pWGT                    a0.3\r
+#define                pERRORYC                a0.2                            // :ud  \r
+#define                pERRORY                 a0.4\r
+#define                pERRORC                 a0.5\r
+#define                pMV                             a0.6    \r
+\r
+#define                pWGT_BIDX               a0.1                            // :ud, WGT & BIDX\r
+#define                pRECON_MV               a0.3                            // :ud, RECON & MV\r
+\r
+#define                pREF0                   a0.0                            // :uw\r
+#define                pREF0D                  a0.0                            // :ud\r
+#define                pREF1                   a0.1\r
+#define                pREF2                   a0.2\r
+#define                pREF2D                  a0.1                            // :ud\r
+#define                pREF3                   a0.3\r
+#define                pREF4                   a0.4\r
+#define                pREF4D                  a0.2                            // :ud\r
+#define                pREF5                   a0.5\r
+#define                pREF6                   a0.6\r
+#define                pREF6D                  a0.3                            // :ud\r
+#define                pREF7                   a0.7\r
+\r
+#define                pRES                    a0.6\r
+#define                pRESD                   a0.3                            // :ud\r
+#define                pRESULT                 a0.7\r
+\r
+#define                p0                              a0.0\r
+#define                p1                              a0.1\r
+\r
+//------------ Constants for static/inline/indirect\r
+#define                nOFFSET_BIDX    112                                     // = 32*3+4*4\r
+\r
+#define                nOFFSET_WGT             256                                     // = 32*8\r
+#define                nOFFSET_WGT_BIDX 0x01000070                     // = (256<<16)+112\r
+#define                nOFFSET_ERROR   0x03400140                      // = (320+128*4)<<16+320=0x03400140\r
+#define                nOFFSET_ERRORY  0x0140\r
+#define                nOFFSET_ERRORC  0x0340\r
+#define                nOFFSET_MV              128                                     // = 32*4\r
+#define                nOFFSET_RECON_MV 0x04400080                     // = (1088<<16)+128             // TODO: OFFSET_RECON is obsolete\r
+\r
+//------------ Constants for kernel internal variables\r
+#define                nOFFSET_INTPY0  0x0640                          // = 32*50\r
+#define                nOFFSET_INTPY1  0x0780                          // = 32*60\r
+#define                nOFFSET_INTPC0  0x06c0                          // = 32*54\r
+#define                nOFFSET_INTPC1  0x0480                          // = 32*36\r
+#define                nOFFSET_INTP0   0x06c00640\r
+#define                nOFFSET_INTP1   0x04800780\r
+\r
+#define                nOFFSET_INTERIM         0x0480                          // = 32*36\r
+#define                nOFFSET_INTERIM2        0x04A00480                      // = ((32*37)<<16)|(32*36)\r
+#define                nOFFSET_INTERIM3        0x04A00480                      // = ((32*36+32)<<16)|(32*36)\r
+#define                nOFFSET_INTERIM4        0x04A00490                      // = ((32*37)<<16)|(32*36+16)\r
+\r
+#define                nOFFSET_INTERIM4x4              0x04C0                  // = 32*38\r
+#define                nOFFSET_INTERIM4x4_4    0x04E004D0              // = ((32*38+32)<<16)|(32*38+16)\r
+#define                nOFFSET_INTERIM4x4_5    0x04D004C0              // = ((32*38+16)<<16)|(32*38)\r
+#define                nOFFSET_INTERIM4x4_6    0x04E004C0              // = ((32*38+32)<<16)|(32*38)\r
+#define                nOFFSET_INTERIM4x4_7    0x04D004C8              // = ((32*38+16)<<16)|(32*38+8)\r
+#define                nOFFSET_INTERIM4x4_8    0x04E004D8              // = ((32*38+32)<<16)|(32*38+24)\r
+#define                nOFFSET_INTERIM4x4_9    0x04F004E8              // = ((32*38+48)<<16)|(32*38+40)\r
+\r
+#define                nOFFSET_RES             0x540                           // = 32*42\r
+#define                nOFFSET_REF             0x560                           // = 32*43\r
+#define                nOFFSET_REFC    0x700                           // = 32*56\r
+\r
+                       // Binding table index\r
+#define                nBDIX_DESTY             0\r
+#define                nBDIX_DESTC             1\r
+#define                nBI_LC_DIFF             0x10                            // Binding table index diff between luma and chroma\r
+\r
+#define                nGRFWIB                 32\r
+#define                nGRFHWIB                16\r
+\r
+//------------ Regions\r
+\r
+.declare    gudREF                     Base=r43 ElementSize=4 SrcRegion=<16;16,1> Type=ud\r
+.declare    gubREF                     Base=r43 ElementSize=1 Type=ub\r
+.declare    gudREFC                    Base=r56 ElementSize=4 SrcRegion=<16;16,1> Type=ud\r
+\r
+// 16x16 handling\r
+.declare    gudREF21x21                Base=r58 ElementSize=4 SrcRegion=<16;16,1> Type=ud\r
+.declare    gudREF18x10                Base=r66 ElementSize=4 SrcRegion=<16;16,1> Type=ud\r
+.declare    gubREF18x10                Base=r66 ElementSize=1 SrcRegion=<16;16,1> Type=ub\r
+\r
+\r
+\r
+.declare    gudREF16x16                Base=r38 ElementSize=4 Type=ud                  // 8 GRFs\r
+.declare    gubREF16x16                Base=r38 ElementSize=1 Type=ub\r
+.declare    gudREFC16x8                Base=r46 ElementSize=4 Type=ud                  // 4 GRFs\r
+.declare    gubREFC16x8                Base=r46 ElementSize=1 Type=ub\r
+\r
+// TODO\r
+.declare    gubAVG                     Base=r56 ElementSize=1 Type=ub\r
+.declare       gubREFY_BWD             Base=r64 ElementSize=1 Type=ub\r
+.declare       gubREFC_BWD             Base=r72 ElementSize=1 Type=ub\r
+\r
+\r
+.declare    guwINTPY0          Base=r50 ElementSize=2 SrcRegion=<16;16,1> Type=uw\r
+.declare       gudINTPY0               Base=r50 ElementSize=4 Type=ud\r
+.declare    gubINTPY0          Base=r50 ElementSize=1 SrcRegion=<32;16,2> Type=ub\r
+.declare    guwINTPY1          Base=r60 ElementSize=2 SrcRegion=<16;16,1> Type=uw\r
+.declare       gudINTPY1               Base=r60 ElementSize=4 Type=ud\r
+.declare    gubINTPY1          Base=r60 ElementSize=1 SrcRegion=<32;16,2> Type=ub\r
+.declare    guwYPRED           Base=r50 ElementSize=2 SrcRegion=<8;8,1> Type=uw\r
+.declare    gubYPRED           Base=r50 ElementSize=1 SrcRegion=<32;16,2> Type=ub\r
+\r
+.declare    guwINTPC0          Base=r54 ElementSize=2 SrcRegion=<16;16,1> Type=uw\r
+.declare    gwINTPC0           Base=r54 ElementSize=2 SrcRegion=<16;16,1> Type=w\r
+.declare       gudINTPC0               Base=r54 ElementSize=4 Type=ud\r
+.declare    gubINTPC0          Base=r54 ElementSize=1 SrcRegion=<32;16,2> Type=ub\r
+.declare    guwINTPC1          Base=r36 ElementSize=2 SrcRegion=<16;16,1> Type=uw\r
+.declare       gudINTPC1               Base=r36 ElementSize=4 Type=ud\r
+.declare    gubINTPC1          Base=r36 ElementSize=1 SrcRegion=<32;16,2> Type=ub\r
+.declare    guwCPRED           Base=r54 ElementSize=2 SrcRegion=<16;8,2> Type=uw\r
+.declare    gubCPRED           Base=r54 ElementSize=1 SrcRegion=<32;8,4> Type=ub\r
+\r
+#define                gINTERIM                r36\r
+.declare       gubINTERIM_BUF  Base=r36 ElementSize=1 SrcRegion=<32;16,2> Type=ub\r
+#define                gINTERIM4x4             r38\r
+.declare       gubINTERIM4x4_BUF Base=r38 ElementSize=1 SrcRegion=<32;16,2> Type=ub\r
+.declare       gwINTERIM4x4_BUF Base=r38 ElementSize=2  Type=w\r
+\r
+.declare       gubINTERIM_BUF2 Base=r42 ElementSize=1 SrcRegion=<8;4,2> Type=ub\r
+.declare       gwINTERIM_BUF2  Base=r42 ElementSize=2 SrcRegion=<16;16,1> Type=w       \r
+.declare       guwINTERIM_BUF2 Base=r42 ElementSize=2 Type=uw  \r
+\r
+.declare       gwINTERIM_BUF3  Base=r38 ElementSize=2 SrcRegion=<16;16,1> Type=w               // 2 GRFs\r
+.declare       gubINTERIM_BUF3 Base=r38 ElementSize=1 Type=ub                                                  \r
+\r
+.declare       gwTEMP                  Base=r42 ElementSize=2 SrcRegion=<16;16,1> Type=w\r
+\r
+//------------ General registers\r
+\r
+#define                gX                              r3.2                            // w\r
+#define                gY                              r3.3                            // w\r
+\r
+#define                gMSGDSC_R               r3.6                            // ud\r
+#define                gMSGDSC_W               r3.7                            // ud\r
+\r
+#ifdef SW_W_128\r
+.declare       gwMBTYPE                Base=r8.6 ElementSize=2 Type=w                  // Shared with gLOOP_SUBMB\r
+\r
+// TODO\r
+#define                gLOOP_SUBMB             r8.6\r
+#define                gLOOP_SUBMBPT   r8.7\r
+#define                gLOOP_DIR               r9.6\r
+#define                gLOOPCNT                r9.7                            // Loop counter for submodules\r
+#else\r
+.declare       gwMBTYPE                Base=r1.0 ElementSize=2 Type=w                  // Shared with gLOOP_SUBMB\r
+\r
+// TODO\r
+#define                gLOOP_SUBMB             r1.0\r
+#define                gLOOP_SUBMBPT   r1.1\r
+#define                gLOOP_DIR               r8.7\r
+#define                gLOOPCNT                r9.7                            // Loop counter for submodules\r
+#endif // SW_W_128\r
+\r
+#define                gW0                             r34.6                           // Temporary WORD \r
+#define                gW1                             r34.7                           // Temporary WORD \r
+#define                gW2                             r34.8                           // Temporary WORD \r
+#define                gW3                             r34.9                           // Temporary WORD \r
+#define                gD0                             r34.3                           // Temporary DWORD\r
+\r
+#define                gW4                             r34.15\r
+\r
+//\r
+\r
+#define                gMVX_INT                r34.0                           // :w\r
+#define                gMVY_INT                r34.1                           // :w\r
+#define                gMVX_FRAC               r34.2                           // :w\r
+#define                gMVY_FRAC               r34.3                           // :w\r
+#define                gMVX_FRACC              r34.4                           // :w\r
+#define                gMVY_FRACC              r34.5                           // :w\r
+\r
+#define                gpINTPY                 r34.10\r
+#define                gpINTPC                 r34.11\r
+#define                gpINTP                  r34.5                           // DW\r
+\r
+#define                gPREDFLAG               r34.12\r
+#define                gBIDX                   r34.13\r
+#define                gREFPARITY              r34.14\r
+#define                gCHRMVADJ               r1.14\r
+#define                gPARITY                 r1.15\r
+#define                gCBP_MASK               r1.1\r
+\r
+#define                gMVSTEP                 r1.13\r
+\r
+#define                gpADDR                  r1.2                            // :uw (8 words)\r
+\r
+#define                gSHAPETEMP              r8.15                           // :uw\r
+\r
+#define                gCOEFA                  r42.0                           \r
+#define                gCOEFB                  r42.1                           \r
+#define                gCOEFC                  r42.2                           \r
+#define                gCOEFD                  r42.3\r
+\r
+// Weighted prediction\r
+#define                gPREDFLAG0              r46.0\r
+#define                gPREDFLAG1              r46.2\r
+\r
+#define                gWEIGHTFLAG             r43.2\r
+#define                gBIPRED                 r43.3\r
+#define                gYADD                   r43.4\r
+#define                gCADD                   r43.5\r
+#define                gYSHIFT                 r43.6\r
+#define                gCSHIFT                 r43.7\r
+\r
+#define                gOFFSET                 r44.0\r
+#define                gUOFFSET                r44.1\r
+#define                gVOFFSET                r44.2\r
+\r
+#define                gWT0                    r45.0\r
+#define                gO0                             r45.1\r
+#define                gWT1                    r45.2\r
+#define                gO1                             r45.3\r
+#define                gUW0                    r45.4\r
+#define                gUO0                    r45.5\r
+#define                gUW1                    r45.6\r
+#define                gUO1                    r45.7   \r
+#define                gVW0                    r45.8   \r
+#define                gVO0                    r45.9   \r
+#define                gVW1                    r45.10  \r
+#define                gVO1                    r45.11\r
+\r
+#define                gWT0_D                  r45.0   \r
+#define                gUW0_D                  r45.2   \r
+\r
+//------------ Message-related Registers & constants\r
+#define                gMSGSRC                 r2                                      // Message Source\r
+\r
+#define                mMSGHDR                 m1              \r
+#define                mMSGHDRY                m1              \r
+#define                mMSGHDRC                m2              \r
+#define                mMSGHDR1                m1              \r
+#define                mMSGHDR2                m2              \r
+#define                mMSGHDR3                m3              \r
+#define                mMSGHDR4                m4              \r
+#define                mMSGHDRYW               m1              \r
+#define                mMSGHDRCW               m10             \r
+\r
+#ifdef DEV_ILK\r
+       // 0000 0100(read)  0001(msg len) xxxx(resp len) 1010 (sampler cache) xxxx (field/frame) xxxx xxxx (bidx)\r
+#define                nDWBRMSGDSC_SC          0x0208A002      // DWORD Block Read Message Descriptor through Data Port, Sampler Cache\r
+#define                nDWBRMSGDSC_SC_TF       0x0208E602      // DWORD Block Read Message Descriptor through Data Port, Sampler Cache\r
+#define                nDWBRMSGDSC_SC_BF       0x0208E702      // DWORD Block Read Message Descriptor through Data Port, Sampler Cache\r
+       // 0000 0101(write) 0001(msg len) xxxx(resp len) 0010 (render cache)  xxxx (field/frame) xxxx xxxx (bidx)                                                                               \r
+#define                nDWBWMSGDSC             0x02082000  // DWORD Block Write Message Descriptor through Data Port, Render Cache\r
+#define                nDWBWMSGDSC_TF  0x02082600  // DWORD Block Write Message Descriptor through Data Port, Render Cache\r
+#define                nDWBWMSGDSC_BF  0x02082700  // DWORD Block Write Message Descriptor through Data Port, Render Cache                                                                             \r
+\r
+#else  // Pre DEV_ILK\r
+       // 0000 0100(read)  0001(msg len) xxxx(resp len) 1010 (sampler cache) xxxx (field/frame) xxxx xxxx (bidx)\r
+#define                nDWBRMSGDSC_SC          0x0410A002      // DWORD Block Read Message Descriptor through Data Port, Sampler Cache\r
+#define                nDWBRMSGDSC_SC_TF       0x0410A602      // DWORD Block Read Message Descriptor through Data Port, Sampler Cache\r
+#define                nDWBRMSGDSC_SC_BF       0x0410A702      // DWORD Block Read Message Descriptor through Data Port, Sampler Cache\r
+       // 0000 0101(write) 0001(msg len) xxxx(resp len) 0010 (render cache)  xxxx (field/frame) xxxx xxxx (bidx)                                                                               \r
+#define                nDWBWMSGDSC             0x05102000  // DWORD Block Write Message Descriptor through Data Port, Render Cache\r
+#define                nDWBWMSGDSC_TF  0x05102600  // DWORD Block Write Message Descriptor through Data Port, Render Cache\r
+#define                nDWBWMSGDSC_BF  0x05102700  // DWORD Block Write Message Descriptor through Data Port, Render Cache                                                                             \r
+#endif // DEV_ILK\r
+\r
+#define                nDWB_FIELD_MASK 0x0600\r
+                                                                               \r
+// message data payload\r
+.declare    mbMSGPAYLOADY      Base=m2  ElementSize=1 SrcRegion=REGION(16,1) Type=b\r
+.declare    mbMSGPAYLOADC      Base=m11 ElementSize=1 SrcRegion=REGION(16,1) Type=b\r
+\r
+// Destination registers for write commit\r
+#define                gREG_WRITE_COMMIT_Y             r10.0\r
+#define                gREG_WRITE_COMMIT_UV    r11.0\r
+\r
+#define RETURN_REG_INTER       r1.5            // Return pointer for all sub-routine calls (type DWORD)\r
+\r
+#define CALL_INTER(subFunc, skipInst)  add (1) RETURN_REG_INTER<1>:ud   ip:ud  1+skipInst*INST_SIZE \n\\r
+                               jmpi (1) subFunc\r
+#define        RETURN_INTER            mov (1) ip:ud   RETURN_REG_INTER<0;1,0>:ud              // Return to calling module\r
+\r
+\r
+// End of inter_header.inc\r
+\r
+#endif // !defined(__INTER_HEADER__)\r
+\r
diff --git a/i965_drv_video/shaders/h264/mc/interpolate_C_2x2.asm b/i965_drv_video/shaders/h264/mc/interpolate_C_2x2.asm
new file mode 100644 (file)
index 0000000..ffa65cf
--- /dev/null
@@ -0,0 +1,57 @@
+/*\r
+ * Interpolation kernel for chrominance 2x2 motion compensation\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+//     Kernel name: Interpolate_C_2x2.asm\r
+//\r
+//     Interpolation kernel for chrominance 2x2 motion compensation\r
+//\r
+//  $Revision: 8 $\r
+//  $Date: 10/09/06 4:00p $\r
+//\r
+\r
+\r
+//#if !defined(__Interpolate_C_2x2__)          // Make sure this is only included once\r
+//#define __Interpolate_C_2x2__\r
+\r
+       \r
+       // (8-xFrac) and (8-yFrac)\r
+    add (2)            gW0<1>:w                        gMVX_FRACC<2;2,1>:w                             -0x08:w\r
+    \r
+       // Compute the GRF address of the starting position of the reference area\r
+    mov (1)            pREF0:w                         nOFFSET_REFC:w          {NoDDClr}\r
+       mov (1)         pRESULT:uw                      gpINTPC:uw                      {NoDDChk}\r
+\r
+       // gCOEFA = (8-xFrac)*(8-yFrac)\r
+    // gCOEFB = xFrac*(8-yFrac)  \r
+    // gCOEFC = (8-xFrac)*yFrac\r
+    // gCOEFD = xFrac*yFrac \r
+    mul (1)            gCOEFD:w                gMVX_FRACC:w                                    gMVY_FRACC:w    {NoDDClr}\r
+    mul (1)            gCOEFA:w                        -gW0:w                                                  -gW1:uw         {NoDDClr,NoDDChk}\r
+    mul (1)            gCOEFB:w                        gMVX_FRACC:w                                    -gW1:uw         {NoDDClr,NoDDChk}\r
+    mul (1)            gCOEFC:w                    -gW0:w                                                      gMVY_FRACC:w {NoDDChk}\r
+    \r
+    // (8-xFrac)*(8-yFrac)*A\r
+    // ---------------------\r
+    mul (8)            acc0<1>:uw                      r[pREF0,0]<8;4,1>:ub                    gCOEFA:uw\r
+        \r
+    // xFrac*(8-yFrac)*B\r
+    // -------------------\r
+    mac (8)            acc0<1>:uw                      r[pREF0,2]<8;4,1>:ub                    gCOEFB:uw\r
+          \r
+    // (8-xFrac)*yFrac*C\r
+    // -------------------\r
+    mac (8)            acc0<1>:uw                      r[pREF0,8]<8;4,1>:ub                    gCOEFC:uw\r
+            \r
+    // xFrac*yFrac*D\r
+    // -----------------\r
+    mac (8)            gwINTERIM_BUF2(0)<1>    r[pREF0,10]<8;4,1>:ub           gCOEFD:uw\r
+    mov (4)            r[pRESULT]<1>:uw                gwINTERIM_BUF2(0)<4;4,1>                {NoDDClr}\r
+       mov (4)         r[pRESULT,16]<1>:uw             gwINTERIM_BUF2(0,4)<4;4,1>              {NoDDChk}\r
+    \r
+//#endif       // !defined(__Interpolate_C_2x2__)\r
diff --git a/i965_drv_video/shaders/h264/mc/interpolate_C_4x4.asm b/i965_drv_video/shaders/h264/mc/interpolate_C_4x4.asm
new file mode 100644 (file)
index 0000000..ea23b11
--- /dev/null
@@ -0,0 +1,67 @@
+/*\r
+ * Interpolation kernel for chrominance 4x4 motion compensation\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+//     Kernel name: Interpolate_C_4x4_Func.asm\r
+//\r
+//     Interpolation kernel for chrominance 4x4 motion compensation\r
+//\r
+//  $Revision: 8 $\r
+//  $Date: 10/09/06 4:00p $\r
+//\r
+\r
+\r
+//#if !defined(__Interpolate_C_4x4_Func__)             // Make sure this is only included once\r
+//#define __Interpolate_C_4x4_Func__\r
+\r
+\r
+INTERLABEL(Interpolate_C_4x4_Func):\r
+\r
+\r
+       // (8-xFrac) and (8-yFrac)\r
+    add (2)            gW0<1>:w                        gMVX_FRACC<2;2,1>:w                             -0x08:w\r
+\r
+       // Compute the GRF address of the starting position of the reference area\r
+    mov (1)            pREF0:w                         nOFFSET_REFC:w          {NoDDClr}\r
+       mov (1)         pREF1:uw                        nOFFSET_REFC+16:w       {NoDDChk,NoDDClr}\r
+       mov (1)         pRESULT:uw                      gpINTPC:uw                      {NoDDChk}\r
+\r
+       // gCOEFA = (8-xFrac)*(8-yFrac)\r
+    // gCOEFB = xFrac*(8-yFrac)  \r
+    // gCOEFC = (8-xFrac)*yFrac\r
+    // gCOEFD = xFrac*yFrac \r
+    mul (1)            gCOEFD:w                gMVX_FRACC:w                                    gMVY_FRACC:w    {NoDDClr}\r
+    mul (1)            gCOEFA:w                        -gW0:w                                                  -gW1:uw         {NoDDClr,NoDDChk}\r
+    mul (1)            gCOEFB:w                        gMVX_FRACC:w                                    -gW1:uw         {NoDDClr,NoDDChk}\r
+    mul (1)            gCOEFC:w                    -gW0:w                                                      gMVY_FRACC:w {NoDDChk}\r
+\r
+       add (2)         gW0<1>:uw                       pREF0<2;2,1>:uw                                 16:w\r
+\r
+    // (8-xFrac)*(8-yFrac)*A\r
+    // ---------------------\r
+    mul (16)   acc0<1>:uw                      r[pREF0,0]<16;8,1>:ub                   gCOEFA:uw\r
+    mul (16)   acc1<1>:uw                      r[pREF0,nGRFWIB]<16;8,1>:ub             gCOEFA:uw\r
+        \r
+    // xFrac*(8-yFrac)*B\r
+    // -------------------\r
+    mac (16)   acc0<1>:uw          r[pREF0,2]<16;8,1>:ub                       gCOEFB:uw\r
+    mac (16)   acc1<1>:uw          r[pREF0,nGRFWIB+2]<16;8,1>:ub       gCOEFB:uw\r
+\r
+    // (8-xFrac)*yFrac*C\r
+    // -------------------\r
+    mov (2)            pREF0<1>:uw                     gW0<2;2,1>:uw\r
+    mac (16)   acc0<1>:uw          r[pREF0,0]<8,1>:ub                          gCOEFC:uw\r
+    mac (16)   acc1<1>:uw          r[pREF0,nGRFWIB]<8,1>:ub            gCOEFC:uw\r
+            \r
+    // xFrac*yFrac*D\r
+    // -----------------\r
+    mac (16)   r[pRESULT]<1>:uw        r[pREF0,2]<8,1>:ub                              gCOEFD:uw\r
+    mac (16)   r[pRESULT,GRFWIB]<1>:uw r[pREF0,nGRFWIB+2]<8,1>:ub gCOEFD:uw {SecHalf}\r
+\r
+   \r
+//#endif       // !defined(__Interpolate_C_4x4_Func__)\r
diff --git a/i965_drv_video/shaders/h264/mc/interpolate_Y_4x4.asm b/i965_drv_video/shaders/h264/mc/interpolate_Y_4x4.asm
new file mode 100644 (file)
index 0000000..dbb5733
--- /dev/null
@@ -0,0 +1,217 @@
+/*\r
+ * Interpolation kernel for luminance motion compensation\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Kernel name: Interpolate_Y_4x4.asm\r
+//\r
+// Interpolation kernel for luminance motion compensation\r
+//\r
+//  $Revision: 10 $\r
+//  $Date: 10/09/06 4:00p $\r
+//\r
+\r
+\r
+       // Compute the GRF address of the starting position of the reference area\r
+#if 1\r
+    (-f0.1) mov (1)    pREF:w                  nOFFSET_REF+2+nGRFWIB:w\r
+    (f0.1) mov (1)     pREF:w                  nOFFSET_REF+2:w         \r
+       mov (1)         pRESULT:uw                      gpINTPY:uw                                                                                      \r
+#else\r
+    mov (1)            pREF:w                          nOFFSET_REF+2+nGRFWIB:w {NoDDClr}\r
+       mov (1)         pRESULT:uw                      gpINTPY:uw                              {NoDDChk}\r
+#endif\r
+       \r
+       /*\r
+        *                       |               |\r
+        *               - - 0 1 2 3 + - \r
+        *                       4 5 6 7\r
+        *                       8 9 A B\r
+        *                       C D E F\r
+        *               - - + - - - + -\r
+     *                  |               |\r
+        */\r
+       \r
+       // Case 0\r
+       or.z.f0.1 (16) null:w                   gMVY_FRAC<0;1,0>:w                              gMVX_FRAC<0;1,0>:w      \r
+       (f0.1) mov (4)  r[pRESULT]<1>:uw        r[pREF0]<4;4,1>:ub\r
+       (f0.1) mov (4)  r[pRESULT,16]<1>:uw     r[pREF0,16]<4;4,1>:ub\r
+       (f0.1) mov (4)  r[pRESULT,32]<1>:uw r[pREF0,32]<4;4,1>:ub\r
+       (f0.1) mov (4)  r[pRESULT,48]<1>:uw r[pREF0,48]<4;4,1>:ub\r
+       (f0.1) jmpi INTERLABEL(Exit_Interpolate_Y_4x4)\r
+       \r
+       // Store all address registers\r
+       mov (8)         gpADDR<1>:w                     a0<8;8,1>:w\r
+       \r
+       mul.z.f0.0 (1) gW4:w                    gMVY_FRAC:w                                             gMVX_FRAC:w\r
+       and.nz.f0.1 (1) null                    gW4:w                                                   1:w\r
+\r
+       add (1)         pREF1:uw                        pREF0:uw                                                nGRFWIB/2:uw\r
+       add (2)         pREF2<1>:uw                     pREF0<2;2,1>:uw                                 nGRFWIB:uw\r
+       mov (4)         gW0<1>:uw                       pREF0<4;4,1>:uw\r
+\r
+       (f0.0) jmpi INTERLABEL(Interpolate_Y_H_4x4)\r
+       (f0.1) jmpi INTERLABEL(Interpolate_Y_H_4x4)     \r
+       \r
+       //-----------------------------------------------------------------------\r
+       // CASE: A69BE (H/V interpolation)\r
+       //-----------------------------------------------------------------------\r
+\r
+       // Compute interim horizontal intepolation \r
+       add (1)         pREF0<1>:uw                     pREF0<0;1,0>:uw                                 -34:w \r
+       add (1)         pREF1<1>:uw                     pREF1<0;1,0>:uw                                 -18:w {NoDDClr}\r
+       mov (1)         pRESD:ud                        nOFFSET_INTERIM4x4_5:ud                         {NoDDChk} // Case 69be\r
+       \r
+       // Check whether this position is 'A'\r
+       cmp.e.f0.0 (1) null                             gW4:w                                                   4:w\r
+\r
+    $for(0;<2;1) {\r
+       add (16)        acc0<1>:w                       r[pREF0,nGRFWIB*2*%1]<16;4,1>:ub                        r[pREF0,nGRFWIB*2*%1+5]<16;4,1>:ub              {Compr}\r
+       mac (16)        acc0<1>:w                       r[pREF0,nGRFWIB*2*%1+1]<16;4,1>:ub                      -5:w    {Compr}\r
+       mac (16)        acc0<1>:w                       r[pREF0,nGRFWIB*2*%1+2]<16;4,1>:ub                      20:w    {Compr}\r
+       mac (16)        acc0<1>:w                       r[pREF0,nGRFWIB*2*%1+3]<16;4,1>:ub                      20:w    {Compr}\r
+       mac (16)        r[pRES,nGRFWIB*%1]<1>:w         r[pREF0,nGRFWIB*2*%1+4]<16;4,1>:ub      -5:w    {Compr}\r
+       }\r
+       // last line\r
+       add (4)         acc0<1>:w                       r[pREF0,nGRFWIB*2*2]<4;4,1>:ub                          r[pREF0,nGRFWIB*2*2+5]<4;4,1>:ub\r
+       mac (4)         acc0<1>:w                       r[pREF0,nGRFWIB*2*2+1]<4;4,1>:ub                        -5:w\r
+       mac (4)         acc0<1>:w                       r[pREF0,nGRFWIB*2*2+2]<4;4,1>:ub                        20:w\r
+       mac (4)         acc0<1>:w                       r[pREF0,nGRFWIB*2*2+3]<4;4,1>:ub                        20:w\r
+       mac (4)         r[pRES,nGRFWIB*2]<1>:w          r[pREF0,nGRFWIB*2*2+4]<4;4,1>:ub        -5:w\r
+       \r
+    // Compute interim/output vertical interpolation \r
+       mov (1)         pREF6D:ud                       nOFFSET_INTERIM4x4_4:ud         {NoDDClr}\r
+       mov (1)         pREF0D:ud                       nOFFSET_INTERIM4x4_7:ud         {NoDDChk,NoDDClr}\r
+       mov (1)         pREF2D:ud                       nOFFSET_INTERIM4x4_8:ud         {NoDDChk,NoDDClr}\r
+       mov (1)         pREF4D:ud                       nOFFSET_INTERIM4x4_9:ud         {NoDDChk}\r
+\r
+       add (16)        acc0<1>:w                       gwINTERIM4x4_BUF(0)<16;16,1>            512:w\r
+       mac (16)        acc0<1>:w                       gwINTERIM4x4_BUF(1)<16;16,1>            -5:w\r
+       mac (16)        acc0<1>:w                       r[pREF6,0]<8,1>:w                               20:w\r
+       \r
+       (f0.0) mov (1) pRES:uw                  nOFFSET_RES:uw                                  // Case a\r
+       (-f0.0) mov (1) pRES:uw                 nOFFSET_INTERIM4x4:uw                           // Case 69be\r
+       \r
+       mac (16)        acc0<1>:w                       r[pREF0,0]<4,1>:w                               -5:w\r
+       mac (16)        acc0<1>:w                       r[pREF0,nGRFWIB]<4,1>:w                 1:w\r
+       mac (16)        acc0<1>:w                       r[pREF2,0]<4,1>:w                               20:w    \r
+       asr.sat (16) r[pRES]<2>:ub              acc0<16;16,1>:w                                 10:w  \r
+       \r
+       (f0.0) jmpi INTERLABEL(Return_Interpolate_Y_4x4)\r
+\r
+INTERLABEL(Interpolate_Y_H_4x4):\r
+       \r
+       cmp.e.f0.0 (1) null                             gMVX_FRAC:w                                             0:w\r
+       cmp.e.f0.1 (1) null                             gMVY_FRAC:w                                             2:w\r
+       (f0.0) jmpi INTERLABEL(Interpolate_Y_V_4x4)\r
+       (f0.1) jmpi INTERLABEL(Interpolate_Y_V_4x4)\r
+\r
+       //-----------------------------------------------------------------------\r
+       // CASE: 123567DEF (H interpolation)\r
+       //-----------------------------------------------------------------------\r
+       \r
+       add (4)         pREF0<1>:uw                     gW0<4;4,1>:uw                                   -2:w            \r
+       cmp.g.f0.0 (4) null:w                   gMVY_FRAC<0;1,0>:w                              2:w\r
+       cmp.e.f0.1 (1) null                             gMVX_FRAC:w                                             2:w\r
+       (f0.0) add (4) pREF0<1>:uw              pREF0<4;4,1>:uw                                 nGRFWIB/2:uw\r
+       \r
+       cmp.e.f0.0 (1) null:w                   gMVY_FRAC<0;1,0>:w                              0:w\r
+\r
+       (f0.1) mov (1) pRESULT:uw               nOFFSET_RES:uw                                  // Case 26E\r
+       (-f0.1) mov (1) pRESULT:uw              nOFFSET_INTERIM4x4:uw                   // Case 1357DF\r
+       \r
+       // Compute interim/output horizontal interpolation\r
+       add (16)        acc0<1>:w                       r[pREF0]<4,1>:ub                                16:w\r
+       mac (16)        acc0<1>:w                       r[pREF0,1]<4,1>:ub                              -5:w\r
+       mac (16)        acc0<1>:w                       r[pREF0,2]<4,1>:ub                              20:w\r
+       mac (16)        acc0<1>:w                       r[pREF0,3]<4,1>:ub                              20:w\r
+       mac (16)        acc0<1>:w                       r[pREF0,4]<4,1>:ub                              -5:w\r
+       mac (16)        acc0<1>:w                       r[pREF0,5]<4,1>:ub                              1:w\r
+       asr.sat (16) r[pRESULT]<2>:ub   acc0<16;16,1>:w                                 5:w\r
+       \r
+       (-f0.1) jmpi INTERLABEL(Interpolate_Y_V_4x4)\r
+       (-f0.0) jmpi INTERLABEL(Average_4x4)\r
+       \r
+       jmpi INTERLABEL(Return_Interpolate_Y_4x4)\r
+\r
+INTERLABEL(Interpolate_Y_V_4x4):\r
+\r
+       cmp.e.f0.0 (1) null                             gMVY_FRAC:w                                             0:w\r
+       (f0.0) jmpi INTERLABEL(Interpolate_Y_I_4x4)\r
+       \r
+       //-----------------------------------------------------------------------\r
+       // CASE: 48C59D7BF (V interpolation)\r
+       //-----------------------------------------------------------------------\r
+       \r
+       cmp.g.f0.1 (8) null:w                   gMVX_FRAC<0;1,0>:w                              2:w\r
+\r
+       mov (4)         pREF0<1>:uw                     gW0<4;4,1>:uw                                                   {NoDDClr}\r
+       add (4)         pREF4<1>:w                      gW0<4;4,1>:uw                                   16:w    {NoDDChk}\r
+       \r
+       (f0.1) add (8) pREF0<1>:uw              pREF0<4;4,1>:uw                                 1:uw\r
+\r
+       cmp.e.f0.0 (1) null:w                   gMVX_FRAC<0;1,0>:w                              0:w\r
+       cmp.e.f0.1 (1) null                             gMVY_FRAC:w                                             2:w\r
+\r
+       // Compute interim/output vertical interpolation\r
+       add (16)        acc0<1>:w                       r[pREF0,-nGRFWIB]<4,1>:ub               16:w\r
+       mac (16)        acc0<1>:w                       r[pREF0]<4,1>:ub                                20:w\r
+       mac (16)        acc0<1>:w                       r[pREF0,nGRFWIB]<4,1>:ub                -5:w\r
+       mac (16)        acc0<1>:w                       r[pREF4,-nGRFWIB]<4,1>:ub               -5:w\r
+       mac (16)        acc0<1>:w                       r[pREF4]<4,1>:ub                                20:w    \r
+       mac (16)        acc0<1>:w                       r[pREF4,nGRFWIB]<4,1>:ub                1:w\r
+       \r
+       mov (1)         pRESULT:uw                      nOFFSET_RES:uw\r
+       (-f0.0) jmpi INTERLABEL(VFILTER_4x4)\r
+       (-f0.1) mov (1) pRESULT:uw              nOFFSET_INTERIM4x4:uw\r
\r
+ INTERLABEL(VFILTER_4x4):\r
\r
+       asr.sat (16) r[pRESULT]<2>:ub   acc0<16;16,1>:w                                 5:w\r
+       \r
+       (-f0.0) jmpi INTERLABEL(Average_4x4)\r
+       (f0.1) jmpi INTERLABEL(Return_Interpolate_Y_4x4 )\r
+\r
+INTERLABEL(Interpolate_Y_I_4x4):\r
+\r
+       //-----------------------------------------------------------------------\r
+       // CASE: 134C (Integer position)\r
+       //-----------------------------------------------------------------------\r
+       \r
+       mov (4)         pREF0<1>:uw                     gW0<4;4,1>:uw\r
+               \r
+       cmp.e.f0.0 (4) null:w                   gMVX_FRAC<0;1,0>:w                              3:w\r
+       cmp.e.f0.1 (4) null:w                   gMVY_FRAC<0;1,0>:w                              3:w\r
+       (f0.0) add (4) pREF0<1>:uw              pREF0<4;4,1>:uw                                 1:uw \r
+       (f0.1) add (4) pREF0<1>:uw              pREF0<4;4,1>:uw                                 nGRFWIB/2:uw\r
+       \r
+       mov (16)        guwINTERIM_BUF2(0)<1>   r[pREF0]<4,1>:ub\r
+       \r
+INTERLABEL(Average_4x4):\r
+\r
+       //-----------------------------------------------------------------------\r
+       // CASE: 13456789BCDEF (Average)\r
+       //-----------------------------------------------------------------------\r
+       \r
+       // Average two interim results\r
+       avg.sat (16) gubINTERIM_BUF2(0)<2>      gubINTERIM_BUF2(0)<32;16,2>             gINTERIM4x4<32;16,2>:ub\r
+\r
+INTERLABEL(Return_Interpolate_Y_4x4):\r
+       // Move result\r
+       mov (1)         pRES:uw                         gpINTPY:uw\r
+       mov (4)         r[pRES,0]<2>:ub         gubINTERIM_BUF2(0,0)\r
+       mov (4)         r[pRES,16]<2>:ub        gubINTERIM_BUF2(0,8)\r
+       mov (4)         r[pRES,32]<2>:ub        gubINTERIM_BUF2(0,16)\r
+       mov (4)         r[pRES,48]<2>:ub        gubINTERIM_BUF2(0,24)\r
+\r
+       // Restore all address registers\r
+       mov (8)         a0<1>:w                                 gpADDR<8;8,1>:w\r
+       \r
+INTERLABEL(Exit_Interpolate_Y_4x4):\r
+       \r
+        \r
+// end of file\r
diff --git a/i965_drv_video/shaders/h264/mc/interpolate_Y_8x8.asm b/i965_drv_video/shaders/h264/mc/interpolate_Y_8x8.asm
new file mode 100644 (file)
index 0000000..e7e3ff9
--- /dev/null
@@ -0,0 +1,262 @@
+/*\r
+ * Interpolation kernel for luminance motion compensation\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Kernel name: Interpolate_Y_8x8.asm\r
+//\r
+// Interpolation kernel for luminance motion compensation\r
+//\r
+//  $Revision: 13 $\r
+//  $Date: 10/09/06 4:00p $\r
+//\r
+\r
+\r
+//---------------------------------------------------------------\r
+// In: pMV => Source address of MV\r
+// In: gMVX_FRAC<2;2,1>:w => MV fractional components\r
+// In: f0.1 (1) => If 1, vertical MV is integer\r
+// In: gpINTPY:uw => Destination address for interpolated result\r
+// In: Reference area staring from R43\r
+//             If horizontal/vertical MVs are all integer, 8x8 pixels are on R43~R44 (2 GRFs)\r
+//             If only horz MV is integer, 8x13 pixels are on R43~R46 (4 GRFs)\r
+//             If only vert MV is integer, 13x8 pixels are on R43~R46 (4 GRFs)\r
+//             If no MVs are integer, 13x13 pixels are on R43~R49 (7 GRFs)\r
+//---------------------------------------------------------------\r
+\r
+\r
+INTERLABEL(Interpolate_Y_8x8_Func):\r
+\r
+\r
+\r
+       // Check whether MVX is integer MV\r
+       and.z.f0.0 (1) null:w                   r[pMV,0]<0;1,0>:w                               0x3:w\r
+       (-f0.0) jmpi (1) INTERLABEL(Interpolate_Y_8x8_Func2)\r
+               \r
+       // TODO: remove this back-to-back read - huge latency..\r
+       mov (8) gubREF(6,2)<1>  gubREF(3,0)<8;8,1>\r
+    mov (8)    gubREF(5,18)<1> gubREF(2,24)<8;8,1>             {NoDDClr}\r
+       mov (8) gubREF(5,2)<1>  gubREF(2,16)<8;8,1>             {NoDDChk}\r
+       mov (8) gubREF(4,18)<1> gubREF(2,8)<8;8,1>              {NoDDClr}\r
+       mov (8) gubREF(4,2)<1>  gubREF(2,0)<8;8,1>              {NoDDChk}\r
+       mov (8) gubREF(3,18)<1> gubREF(1,24)<8;8,1>             {NoDDClr}\r
+       mov (8) gubREF(3,2)<1>  gubREF(1,16)<8;8,1>             {NoDDChk}\r
+       mov (8) gubREF(2,18)<1> gubREF(1,8)<8;8,1>              {NoDDClr}\r
+       mov (8) gubREF(2,2)<1>  gubREF(1,0)<8;8,1>              {NoDDChk}\r
+       mov (8) gubREF(1,18)<1> gubREF(0,24)<8;8,1>             {NoDDClr}\r
+       mov (8) gubREF(1,2)<1>  gubREF(0,16)<8;8,1>             {NoDDChk}\r
+       mov (8) gubREF(0,18)<1> gubREF(0,8)<8;8,1>      \r
+    mov (8)    gubREF(0,2)<1>  gubREF(0,0)<8;8,1>\r
+\r
+INTERLABEL(Interpolate_Y_8x8_Func2):\r
+\r
+       // Compute the GRF address of the starting position of the reference area\r
+    (-f0.1) mov (1)    pREF:w                  nOFFSET_REF+2+nGRFWIB:w \r
+    (f0.1) mov (1)     pREF:w                  nOFFSET_REF+2:w                 \r
+       mov (1)         pRESULT:uw                      gpINTPY:uw      \r
+       \r
+       /*\r
+        *                       |               |\r
+        *               - - 0 1 2 3 + - \r
+        *                       4 5 6 7\r
+        *                       8 9 A B\r
+        *                       C D E F\r
+        *               - - + - - - + -\r
+     *                  |               |\r
+        */\r
+       \r
+       // Case 0\r
+       or.z.f0.1 (16) null:w                   gMVY_FRAC<0;1,0>:w                              gMVX_FRAC<0;1,0>:w      \r
+       (f0.1) mov (16) r[pRESULT]<1>:uw                                r[pREF]<16;8,1>:ub\r
+       (f0.1) mov (16) r[pRESULT,nGRFWIB]<1>:uw                r[pREF,nGRFWIB]<16;8,1>:ub\r
+       (f0.1) mov (16) r[pRESULT,nGRFWIB*2]<1>:uw              r[pREF,nGRFWIB*2]<16;8,1>:ub\r
+       (f0.1) mov (16) r[pRESULT,nGRFWIB*3]<1>:uw              r[pREF,nGRFWIB*3]<16;8,1>:ub\r
+       (f0.1) jmpi INTERLABEL(Exit_Interpolate_Y_8x8)\r
+       \r
+       // Store all address registers\r
+       mov (8)         gpADDR<1>:w                     a0<8;8,1>:w\r
+       \r
+       mul.z.f0.0 (1) gW4:w                    gMVY_FRAC:w                                             gMVX_FRAC:w\r
+       add (1)         pREF1:uw                        pREF0:uw                                                nGRFWIB/2:uw\r
+       and.nz.f0.1 (1) null                    gW4:w                                                   1:w\r
+       add (2)         pREF2<1>:uw                     pREF0<2;2,1>:uw                                 nGRFWIB:uw\r
+       mov (4)         gW0<1>:uw                       pREF0<4;4,1>:uw\r
+\r
+       (f0.0) jmpi INTERLABEL(Interpolate_Y_H_8x8)\r
+       (f0.1) jmpi INTERLABEL(Interpolate_Y_H_8x8)\r
+       \r
+       //-----------------------------------------------------------------------\r
+       // CASE: A69BE (H/V interpolation)\r
+       //-----------------------------------------------------------------------\r
+       \r
+       // Compute interim horizontal intepolation of 12 lines (not 9 lines)\r
+//     add (1)         pREF0<1>:ud                     pREF0<0;1,0>:ud                                 0xffeeffde:ud   // (-18<<16)|(-34)\r
+       add (1)         pREF0<1>:uw                     pREF0<0;1,0>:uw                                 -34:w   \r
+       add (1)         pREF1<1>:uw                     pREF1<0;1,0>:uw                                 -18:w {NoDDClr} \r
+       mov (1)         pRESD:ud                        nOFFSET_INTERIM3:ud                                     {NoDDChk}                       \r
+       \r
+       // Check whether this position is 'A'    \r
+       cmp.e.f0.0 (1) null                             gW4:w                                                   4:w\r
+       \r
+       $for(0;<6;2) {\r
+       add (32)        acc0<1>:w                       r[pREF,nGRFWIB*%1]<16;8,1>:ub                   r[pREF0,nGRFWIB*%1+5]<16;8,1>:ub                {Compr}\r
+       mac (32)        acc0<1>:w                       r[pREF,nGRFWIB*%1+1]<16;8,1>:ub                 -5:w    {Compr}\r
+       mac (32)        acc0<1>:w                       r[pREF,nGRFWIB*%1+2]<16;8,1>:ub                 20:w    {Compr}\r
+       mac (32)        acc0<1>:w                       r[pREF,nGRFWIB*%1+3]<16;8,1>:ub                 20:w    {Compr}\r
+       mac (32)        r[pRES,nGRFWIB*%1]<1>:w         r[pREF,nGRFWIB*%1+4]<16;8,1>:ub -5:w    {Compr}\r
+       }\r
+       // last line\r
+       add (8)         acc0<1>:w                       r[pREF,nGRFWIB*6]<8;8,1>:ub                             r[pREF,nGRFWIB*6+5]<8;8,1>:ub\r
+       mac (8)         acc0<1>:w                       r[pREF,nGRFWIB*6+1]<8;8,1>:ub                   -5:w\r
+       mac (8)         acc0<1>:w                       r[pREF,nGRFWIB*6+2]<8;8,1>:ub                   20:w\r
+       mac (8)         acc0<1>:w                       r[pREF,nGRFWIB*6+3]<8;8,1>:ub                   20:w\r
+       mac (8)         r[pRES,nGRFWIB*6]<1>:w          r[pREF,nGRFWIB*6+4]<8;8,1>:ub   -5:w\r
+\r
+    // Compute interim/output vertical interpolation \r
+    mov (1)            pREF0:ud                        nOFFSET_INTERIM2:ud     {NoDDClr}                       // set pREF0 and pREF1 at the same time\r
+       mov (1)         pREF2D:ud                       nOFFSET_INTERIM4:ud     {NoDDChk,NoDDClr}       // set pREF2 and pREF3 at the same time\r
+       (f0.0) sel (1) pRES:uw                  gpINTPY:uw      nOFFSET_INTERIM:uw {NoDDChk} // Case A vs. 69BE\r
+    \r
+       $for(0;<4;2) {\r
+       add (32)        acc0<1>:w                       r[pREF0,nGRFWIB*%1]<16;16,1>:w                          512:w   {Compr}\r
+       mac (16)        acc0<1>:w                       r[pREF2,nGRFWIB*%1]<8,1>:w                                      -5:w\r
+       mac (16)        acc1<1>:w                       r[pREF2,nGRFWIB*%1+nGRFWIB]<8,1>:w                      -5:w\r
+       mac (32)        acc0<1>:w                       r[pREF0,nGRFWIB*%1+nGRFWIB]<16;16,1>:w          20:w    {Compr}\r
+       mac (16)        acc0<1>:w                       r[pREF2,nGRFWIB*%1+nGRFWIB]<8,1>:w                      20:w    \r
+       mac (16)        acc1<1>:w                       r[pREF2,nGRFWIB*%1+nGRFWIB+nGRFWIB]<8,1>:w      20:w    \r
+       mac (32)        acc0<1>:w                       r[pREF0,(2+%1)*nGRFWIB]<16;16,1>:w                      -5:w    {Compr}\r
+       mac (16)        acc0<1>:w                       r[pREF2,(2+%1)*nGRFWIB]<8,1>:w                          1:w\r
+       mac (16)        acc1<1>:w                       r[pREF2,(2+%1)*nGRFWIB+nGRFWIB]<8,1>:w          1:w\r
+       asr.sat (16) r[pRES,nGRFWIB*%1]<2>:ub                   acc0<16;16,1>:w                         10:w\r
+       asr.sat (16) r[pRES,nGRFWIB*%1+nGRFWIB]<2>:ub   acc1<16;16,1>:w                         10:w {SecHalf}\r
+       }\r
+       \r
+       (f0.0) jmpi INTERLABEL(Return_Interpolate_Y_8x8)\r
+       \r
+INTERLABEL(Interpolate_Y_H_8x8):\r
+       \r
+       cmp.e.f0.0 (1) null                             gMVX_FRAC:w                                             0:w\r
+       cmp.e.f0.1 (1) null                             gMVY_FRAC:w                                             2:w\r
+       (f0.0) jmpi INTERLABEL(Interpolate_Y_V_8x8)\r
+       (f0.1) jmpi INTERLABEL(Interpolate_Y_V_8x8)\r
+       \r
+       //-----------------------------------------------------------------------\r
+       // CASE: 123567DEF (H interpolation)\r
+       //-----------------------------------------------------------------------\r
+\r
+       add (4)         pREF0<1>:uw                     gW0<4;4,1>:uw                                   -2:w            \r
+       cmp.g.f0.0 (4) null:w                   gMVY_FRAC<0;1,0>:w                              2:w\r
+       cmp.e.f0.1 (1) null                             gMVX_FRAC:w                                             2:w\r
+       (f0.0) add (4) pREF0<1>:uw              pREF0<4;4,1>:uw                                 nGRFWIB/2:uw\r
+\r
+       cmp.e.f0.0 (1) null:w                   gMVY_FRAC<0;1,0>:w                              0:w\r
+\r
+       (f0.1) sel (1) pRES:uw                  gpINTPY:uw                                              nOFFSET_INTERIM:uw // Case 26E vs. 1357DF\r
+       \r
+       // Compute interim/output horizontal interpolation\r
+       $for(0;<4;2) {\r
+       add (16)        acc0<1>:w                       r[pREF0,nGRFWIB*%1]<8,1>:ub                             16:w\r
+       add (16)        acc1<1>:w                       r[pREF0,nGRFWIB*%1+nGRFWIB]<8,1>:ub             16:w\r
+       mac (16)        acc0<1>:w                       r[pREF0,nGRFWIB*%1+1]<8,1>:ub                   -5:w\r
+       mac (16)        acc1<1>:w                       r[pREF0,nGRFWIB*%1+1+nGRFWIB]<8,1>:ub   -5:w\r
+       mac (16)        acc0<1>:w                       r[pREF0,nGRFWIB*%1+2]<8,1>:ub                   20:w\r
+       mac (16)        acc1<1>:w                       r[pREF0,nGRFWIB*%1+2+nGRFWIB]<8,1>:ub   20:w\r
+       mac (16)        acc0<1>:w                       r[pREF0,nGRFWIB*%1+3]<8,1>:ub                   20:w\r
+       mac (16)        acc1<1>:w                       r[pREF0,nGRFWIB*%1+3+nGRFWIB]<8,1>:ub   20:w\r
+       mac (16)        acc0<1>:w                       r[pREF0,nGRFWIB*%1+4]<8,1>:ub                   -5:w\r
+       mac (16)        acc1<1>:w                       r[pREF0,nGRFWIB*%1+4+nGRFWIB]<8,1>:ub   -5:w\r
+       mac (16)        acc0<1>:w                       r[pREF0,nGRFWIB*%1+5]<8,1>:ub                   1:w\r
+       mac (16)        acc1<1>:w                       r[pREF0,nGRFWIB*%1+5+nGRFWIB]<8,1>:ub   1:w\r
+       asr.sat (16) r[pRES,nGRFWIB*%1]<2>:ub                   acc0<16;16,1>:w         5:w\r
+       asr.sat (16) r[pRES,nGRFWIB*%1+nGRFWIB]<2>:ub   acc1<16;16,1>:w         5:w {SecHalf}\r
+    }\r
+    \r
+    (-f0.1) jmpi INTERLABEL(Interpolate_Y_V_8x8)\r
+       (-f0.0) jmpi INTERLABEL(Average_8x8)\r
+       \r
+       jmpi INTERLABEL(Return_Interpolate_Y_8x8)\r
+\r
+INTERLABEL(Interpolate_Y_V_8x8):\r
+\r
+       cmp.e.f0.0 (1) null                             gMVY_FRAC:w                                             0:w\r
+       (f0.0) jmpi INTERLABEL(Interpolate_Y_I_8x8)\r
+       \r
+       //-----------------------------------------------------------------------\r
+       // CASE: 48C59D7BF (V interpolation)\r
+       //-----------------------------------------------------------------------\r
+\r
+       mov (2)         pREF0<1>:uw                     gW0<4;2,2>:uw   {NoDDClr}\r
+       mov (2)         pREF2<1>:uw                     gW1<2;2,1>:uw   {NoDDChk,NoDDClr}\r
+       mov (1)         pRES:uw                         gpINTPY:uw              {NoDDChk}\r
+\r
+       cmp.g.f0.1 (4) null:w                   gMVX_FRAC<0;1,0>:w                              2:w\r
+       cmp.e.f0.0 (1) null:w                   gMVX_FRAC<0;1,0>:w                              0:w\r
+       (f0.1) add (4) pREF0<1>:uw              pREF0<4;4,1>:uw                                 1:uw\r
+\r
+       cmp.e.f0.1 (1) null                             gMVY_FRAC:w                                             2:w\r
+       (-f0.0) jmpi INTERLABEL(VFILTER_8x8)\r
+       (-f0.1) mov (1) pRES:uw         nOFFSET_INTERIM:uw\r
+       \r
+  INTERLABEL(VFILTER_8x8): \r
+\r
+       // Compute interim/output vertical interpolation\r
+       $for(0;<4;2) {\r
+       add (32)        acc0<1>:w                       r[pREF0,nGRFWIB*%1-nGRFWIB]<16;8,1>:ub                  16:w {Compr}\r
+       mac (16)        acc0<1>:w                       r[pREF2,nGRFWIB*%1-nGRFWIB]<8,1>:ub                             -5:w\r
+       mac (16)        acc1<1>:w                       r[pREF2,nGRFWIB*%1]<8,1>:ub                                             -5:w\r
+       mac (32)        acc0<1>:w                       r[pREF0,nGRFWIB*%1]<16;8,1>:ub                                  20:w {Compr}\r
+       mac (16)        acc0<1>:w                       r[pREF2,nGRFWIB*%1]<8,1>:ub                                             20:w    \r
+       mac (16)        acc1<1>:w                       r[pREF2,nGRFWIB*%1+nGRFWIB]<8,1>:ub                             20:w    \r
+       mac (32)        acc0<1>:w                       r[pREF0,nGRFWIB*%1+nGRFWIB]<16;8,1>:ub                  -5:w {Compr}\r
+       mac (16)        acc0<1>:w                       r[pREF2,nGRFWIB*%1+nGRFWIB]<8,1>:ub                             1:w\r
+       mac (16)        acc1<1>:w                       r[pREF2,nGRFWIB*%1+nGRFWIB+nGRFWIB]<8,1>:ub             1:w\r
+       asr.sat (16) r[pRES,nGRFWIB*%1]<2>:ub                   acc0<16;16,1>:w                                 5:w\r
+       asr.sat (16) r[pRES,nGRFWIB*%1+nGRFWIB]<2>:ub   acc1<16;16,1>:w                                 5:w     {SecHalf}\r
+       }\r
+\r
+       (-f0.0) jmpi INTERLABEL(Average_8x8)\r
+       (f0.1) jmpi INTERLABEL(Return_Interpolate_Y_8x8)\r
+\r
+INTERLABEL(Interpolate_Y_I_8x8):\r
+\r
+       //-----------------------------------------------------------------------\r
+       // CASE: 134C (Integer position)\r
+       //-----------------------------------------------------------------------\r
+       \r
+       mov (2)         pREF0<1>:uw                     gW0<2;2,1>:uw           {NoDDClr}\r
+                       \r
+       mov (1)         pRES:uw                         gpINTPY:uw                      {NoDDChk}\r
+\r
+       cmp.e.f0.0 (2) null:w                   gMVX_FRAC<0;1,0>:w                              3:w\r
+       cmp.e.f0.1 (2) null:w                   gMVY_FRAC<0;1,0>:w                              3:w\r
+       (f0.0) add (2) pREF0<1>:uw              pREF0<2;2,1>:uw                                 1:uw \r
+       (f0.1) add (2) pREF0<1>:uw              pREF0<2;2,1>:uw                                 nGRFWIB/2:uw\r
+       \r
+       mov (16)        r[pRES]<1>:uw                   r[pREF0]<8,1>:ub\r
+       mov (16)        r[pRES,nGRFWIB]<1>:uw   r[pREF0,nGRFWIB]<8,1>:ub\r
+       mov (16)        r[pRES,nGRFWIB*2]<1>:uw r[pREF0,nGRFWIB*2]<8,1>:ub\r
+       mov (16)        r[pRES,nGRFWIB*3]<1>:uw r[pREF0,nGRFWIB*3]<8,1>:ub\r
+       \r
+INTERLABEL(Average_8x8):\r
+\r
+       //-----------------------------------------------------------------------\r
+       // CASE: 13456789BCDEF (Average)\r
+       //-----------------------------------------------------------------------\r
+\r
+       // Average two interim results\r
+       avg.sat (16) r[pRES,0]<2>:ub                    r[pRES,0]<32;16,2>:ub                   gubINTERIM_BUF(0)       \r
+       avg.sat (16) r[pRES,nGRFWIB]<2>:ub              r[pRES,nGRFWIB]<32;16,2>:ub             gubINTERIM_BUF(1)       \r
+       avg.sat (16) r[pRES,nGRFWIB*2]<2>:ub    r[pRES,nGRFWIB*2]<32;16,2>:ub   gubINTERIM_BUF(2)       \r
+       avg.sat (16) r[pRES,nGRFWIB*3]<2>:ub    r[pRES,nGRFWIB*3]<32;16,2>:ub   gubINTERIM_BUF(3)       \r
+\r
+INTERLABEL(Return_Interpolate_Y_8x8):\r
+       // Restore all address registers\r
+       mov (8)         a0<1>:w                                 gpADDR<8;8,1>:w\r
+       \r
+INTERLABEL(Exit_Interpolate_Y_8x8):\r
+               \r
+// end of file\r
diff --git a/i965_drv_video/shaders/h264/mc/intra_Header.inc b/i965_drv_video/shaders/h264/mc/intra_Header.inc
new file mode 100644 (file)
index 0000000..501c7a8
--- /dev/null
@@ -0,0 +1,276 @@
+/*\r
+ * Header file for all AVC intra prediction kernels\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+#if !defined(__INTRA_HEADER__) // Make sure this file is only included once\r
+#define __INTRA_HEADER__\r
+\r
+// Module name: intra_header.inc\r
+//\r
+// Header file for all AVC intra prediction kernels\r
+//\r
+//     This header file defines everything that's specific to intra macroblock kernels\r
+\r
+\r
+//  ----------- Various data buffers and pointers ------------\r
+//\r
+//     I_PCM data buffer\r
+//\r
+#define                I_PCM_BUF_Y                     4\r
+#define                I_PCM_BUF_UV            12\r
+\r
+#define                REG_I_PCM_BUF_Y         r4\r
+#define                REG_I_PCM_BUF_UV        r12\r
+\r
+.declare    I_PCM_Y  Base=REG_I_PCM_BUF_Y  ElementSize=1 SrcRegion=REGION(16,1) Type=ub        // 8-bit I_PCM Y data\r
+.declare    I_PCM_UV Base=REG_I_PCM_BUF_UV ElementSize=1 SrcRegion=REGION(16,1) Type=ub        // 8-bit I_PCM U/V data\r
+\r
+//     Intra macroblock error data blocks\r
+//\r
+#define            ERRBUF              4               // Starting GRF index for error data\r
+#define                REG_ERRBUF      r4\r
+.declare    MBBLOCKW Base=REG_ERRBUF ElementSize=2 SrcRegion=REGION(16,1) Type=w       // For 16-bit inter MB\r
+.declare    MBBLOCKD Base=REG_ERRBUF ElementSize=2 SrcRegion=REGION(16,1) Type=uw      // For use in "send" command\r
+\r
+#define            PERROR              a0.2    // Pointer to macroblock error data\r
+#define            PERROR1             a0.3    // Pointer to macroblock error data used by instruction compression\r
+#define            PERROR_UD   a0.1    // Pointer to macroblock error data in DWORD unit\r
+\r
+//     Intra macroblock reference data\r
+//\r
+#define                REG_INTRA_REF_TOP       r49             // Must be an odd numbered GRF register\r
+.declare    INTRA_REF_TOP0             Base=REG_INTRA_REF_TOP   ElementSize=1 SrcRegion=REGION(16,1) Type=ub\r
+.declare    INTRA_REF_TOP              Base=REG_INTRA_REF_TOP.4 ElementSize=1 SrcRegion=REGION(16,1) Type=ub\r
+                                                                               // Actual top row reference data start at offset 4 in BYTE\r
+.declare    INTRA_REF_TOP_W            Base=REG_INTRA_REF_TOP.2 ElementSize=2 SrcRegion=REGION(16,1) Type=uw\r
+                                                                               // Actual top row reference data start at offset 2 in WORD\r
+.declare    INTRA_REF_TOP_D            Base=REG_INTRA_REF_TOP ElementSize=4 DstRegion=<1> Type=ud      // Only used in "send" instruction\r
+\r
+#define                INTRA_REF_LEFT_ID       50\r
+#define                REG_INTRA_REF_LEFT      r50\r
+.declare    INTRA_REF_LEFT0            Base=REG_INTRA_REF_LEFT ElementSize=1 SrcRegion=REGION(8,4) Type=ub\r
+.declare    INTRA_REF_LEFT             Base=REG_INTRA_REF_LEFT.3 ElementSize=1 SrcRegion=REGION(8,4) Type=ub\r
+                                                                               // Actual left column reference data are located at offset 3 in BYTE\r
+.declare    INTRA_REF_LEFT_UV  Base=REG_INTRA_REF_LEFT.2 ElementSize=1 SrcRegion=REGION(8,4) Type=ub\r
+                                                                               // Actual left column U/V reference data are located at offset 2 in BYTE\r
+.declare    INTRA_REF_LEFT_W   Base=REG_INTRA_REF_LEFT.1 ElementSize=2 SrcRegion=REGION(8,2) Type=uw\r
+                                                                               // Actual left column reference data are located at offset 1 in WORD\r
+.declare    INTRA_REF_LEFT_D   Base=REG_INTRA_REF_LEFT ElementSize=4 DstRegion=<1> Type=ud     // Only used in "send" instruction\r
+\r
+#define                PREF_LEFT               a0.4    // Pointer to left reference data\r
+#define                PREF_LEFT_UD    a0.2    // Pointer in DWORD to left reference data\r
+\r
+#define                INTRA_TEMP_0    52\r
+#define                INTRA_TEMP_1    53\r
+#define                INTRA_TEMP_2    54\r
+#define                INTRA_TEMP_3    55\r
+#define                INTRA_TEMP_4    56\r
+#define                INTRA_TEMP_5    57\r
+#define                INTRA_TEMP_6    58\r
+\r
+#define                REG_INTRA_TEMP_0        r52\r
+#define                REG_INTRA_TEMP_1        r53\r
+#define                REG_INTRA_TEMP_2        r54\r
+#define                REG_INTRA_TEMP_3        r55\r
+#define                REG_INTRA_TEMP_4        r56\r
+#define                REG_INTRA_TEMP_5        r57\r
+#define                REG_INTRA_TEMP_6        r58\r
+#define                REG_INTRA_TEMP_7        r59\r
+#define                REG_INTRA_TEMP_8        r60\r
+\r
+// Destination registers for write commit\r
+#define                REG_WRITE_COMMIT_Y      r60.0\r
+#define                REG_WRITE_COMMIT_UV     r61.0\r
+\r
+//  ----------- Various data buffers and pointers ------------\r
+//  R32 - R47 for predicted picture buffer (for both Y and U/V blocks)\r
+//\r
+#define            PREDBUF             32              // Starting GRF index for predicted buffer\r
+#define                REG_PREDBUF     r32\r
+\r
+.declare    PRED_Y             Base=REG_PREDBUF ElementSize=1 SrcRegion=REGION(16,1) Type=ub   // Predicted Y picture\r
+.declare    PRED_YW            Base=REG_PREDBUF ElementSize=2 SrcRegion=REGION(16,1) Type=uw   // Predicted Y picture stored in WORD\r
+.declare    PRED_Y_FM  Base=REG_PREDBUF ElementSize=1 SrcRegion=REGION(16,1) Type=ub   // Predicted Y picture frame\r
+.declare    PRED_Y_TF  Base=REG_PREDBUF ElementSize=1 SrcRegion=REGION(16,1) Type=ub   // Predicted Y picture Top field\r
+\r
+.declare    PRED_UV            Base=REG_PREDBUF ElementSize=1 SrcRegion=REGION(16,1) Type=ub   // Predicted U/V picture\r
+.declare    PRED_UVW   Base=REG_PREDBUF ElementSize=2 SrcRegion=REGION(16,1) Type=uw   // Predicted U/V picture stored in WORD\r
+.declare    PRED_UV_FM Base=REG_PREDBUF ElementSize=1 SrcRegion=REGION(16,1) Type=ub   // Predicted U/V picture frame\r
+.declare    PRED_UV_TF Base=REG_PREDBUF ElementSize=1 SrcRegion=REGION(16,1) Type=ub   // Predicted U/V picture top field\r
+.declare    PRED_UV_BF Base=REG_PREDBUF.16 ElementSize=1 SrcRegion=REGION(16,1) Type=ub        // Predicted U/V picture bottom field\r
+\r
+//  The same region will also be used as finally decoded Y blocks shared with U/V blocks\r
+//\r
+#define            DECBUF              32\r
+#define                REG_DECBUF      r32\r
+.declare    DEC_Y              Base=REG_DECBUF ElementSize=1 SrcRegion=REGION(16,1) Type=ub    // Decoded Y picture\r
+.declare    DEC_UV             Base=REG_DECBUF ElementSize=1 SrcRegion=REGION(16,1) Type=ub    // Decoded U/V P-/B-picture\r
+.declare    DEC_UD             Base=REG_DECBUF ElementSize=4 SrcRegion=REGION(8,1) Type=ud             // Decoded buffer in UD type\r
+\r
+//     Reference buffer for intra_NxN prediction\r
+//\r
+#define                PRED_MODE       REG_INTRA_TEMP_0\r
+.declare    REF_TOP0   Base=REG_INTRA_TEMP_5   ElementSize=1 SrcRegion=REGION(16,1) Type=ub\r
+.declare    REF_TOP            Base=REG_INTRA_TEMP_5.4 ElementSize=1 SrcRegion=REGION(16,1) Type=ub\r
+                                                                       // Actual top reference data start from offset 3,i.e. p[-1,-1]\r
+.declare    REF_TOP_W  Base=REG_INTRA_TEMP_5 ElementSize=2 SrcRegion=REGION(16,1) Type=uw\r
+.declare    REF_TOP_D  Base=REG_INTRA_TEMP_5 ElementSize=4 SrcRegion=REGION(8,1) Type=ud\r
+.declare    REF_LEFT   Base=REG_INTRA_TEMP_6 ElementSize=1 SrcRegion=REGION(16,1) Type=ub\r
+.declare    REF_LEFT_D Base=REG_INTRA_TEMP_6 ElementSize=4 SrcRegion=REGION(8,1) Type=ud\r
+\r
+// For intra prediction plane mode\r
+//\r
+.declare    H1 Base=REG_INTRA_TEMP_0 ElementSize=2 SrcRegion=REGION(8,1) Type=w        // Make sure it's an even GRF\r
+.declare    H2 Base=REG_INTRA_TEMP_0.8 ElementSize=2 SrcRegion=REGION(8,1) Type=w\r
+.declare    V1 Base=REG_INTRA_TEMP_1 ElementSize=2 SrcRegion=REGION(8,1) Type=w        // Make sure it's the following odd GRF\r
+.declare    V2 Base=REG_INTRA_TEMP_1.8 ElementSize=2 SrcRegion=REGION(8,1) Type=w\r
+\r
+.declare       CP      Base=REG_INTRA_TEMP_2 ElementSize=2 SrcRegion=REGION(16,1) Type=w\r
+\r
+#define                PINTRAPRED_Y    a0.7    // Used as luma intra prediction mode pointer\r
+#define                PINTRAPRED_UV   a0.7    // Used as chroma intra prediction mode pointer\r
+#define                PINTRA4X4_Y             a0.4    // Used as luma intra_4x4 prediction mode pointer\r
+\r
+#define                PBWDCOPY_4              a0.4    // a0.4 - a0.7 used in intra_4x4 prediction for moving data backward\r
+#define                PBWDCOPY_8              a0.6    // a0.6 - a0.7 used in intra_8x8 prediction for moving data backward\r
+\r
+// For Intra_4x4 prediction mode\r
+//\r
+.declare    INTRA_4X4_MODE     Base=REG_INTRA_TEMP_1 ElementSize=4 SrcRegion=REGION(1,0) DstRegion=<1> Type=d  // Actually only need 1 DWORD\r
+\r
+//  ----------- Intra CURBE constants ------------\r
+//\r
+#define                REG_CURBE1      r1\r
+#define                REG_CURBE2      r2\r
+#define                INTRA_4X4_OFFSET        1*GRFWIB                // 9 Bytes\r
+#define                INTRA_8X8_OFFSET        1*GRFWIB+12             // 9 Bytes starting sub-register r1.3:ud\r
+#define                INTRA_16X16_OFFSET      1*GRFWIB+24             // 4 Bytes starting sub-register r1.6:ud\r
+#define                INTRA_CHROMA_OFFSET     1*GRFWIB+28             // 4 Bytes starting sub-register r1.7:ud\r
+\r
+#define                TOP_REF_OFFSET          REG_CURBE1.10   // r1.5:w\r
+\r
+//     Constants used in plane intra prediction mode\r
+#define                XY_3    REG_CURBE2.4    // Stored BYTE constants x-3 for x=0...7, i.e. -3,-2,...3,4 for U/V, need duplicate to every other byte\r
+#define                XY_3_1  REG_CURBE2.5    // Stored BYTE constants x-3 for x=0...7, i.e. -3,-2,...3,4 for 2nd instruction in {Comp}\r
+#define                XY_7    REG_CURBE2.0    // Stored BYTE constants x-7 for x=0...15, i.e. -7,-6,...7,8 for Y\r
+#define                XY_7_1  REG_CURBE2.1    // Stored BYTE constants x-7 for x=0...15, i.e. -7,-6,...7,8 for 2nd instruction in {Comp}\r
+\r
+#define                INV_SHIFT       REG_CURBE2.16\r
+\r
+#define                INV_TRANS4      REG_CURBE2.20   // For reverse data transfer for intra_4x4 (0x00020406)\r
+#define                INV_TRANS48     REG_CURBE2.22   // For reverse data transfer for intra_4x4 (0x0002)\r
+#define                INV_TRANS8      REG_CURBE1.22   // For reverse data transfer for intra_8x8 (0x0001)\r
+\r
+#define                INTRA_MODE      REG_CURBE2.24   // Offset to intra_Pred_4x4_Y from each sub-block\r
+\r
+//  ----------- In-line parameters ------------\r
+//\r
+#define REG_INLINE     r3\r
+\r
+#define INLINE_DW0     REG_INLINE.0<0;1,0>:ud\r
+#define INLINE_DW1     REG_INLINE.1<0;1,0>:ud\r
+#define INLINE_DW2     REG_INLINE.2<0;1,0>:ud\r
+#define INLINE_DW3     REG_INLINE.3<0;1,0>:ud\r
+#define INLINE_DW4     REG_INLINE.4<0;1,0>:ud\r
+#define INLINE_DW5     REG_INLINE.5<0;1,0>:ud\r
+#define INLINE_DW6     REG_INLINE.6<0;1,0>:ud\r
+#define INLINE_DW7     REG_INLINE.7<0;1,0>:ud\r
+\r
+//     Intra macroblock in-line data\r
+//\r
+//     In-line DWORD 0\r
+#define REG_MBAFF_FIELD                                REG_INLINE.1    // :uw, can be added directly to lower-word of MSGDSC\r
+#define MBAFF_FIELD                                    BIT26+BIT25             // Bits 26:25 - MBAFF field macroblock flag\r
+                                                                                                       //  00 = Current macroblock is not an MBAFF field macroblock\r
+                                                                                                       //  11 = Current macroblock is an MBAFF field macroblock\r
+\r
+#define REG_FIELD_PARITY                       INLINE_DW0\r
+#define FIELD_PARITY                           BIT24                   // Bit 24 - Macroblock field parity flag\r
+                                                                                                       //  0 = Current field is a top field\r
+                                                                                                       //  1 = Current field is a bottom field\r
+\r
+#define REG_FIELD_MACROBLOCK_FLAG      INLINE_DW0\r
+#define FIELD_MACROBLOCK_FLAG          BIT14                   // Bit 14 - Field macroblock flag\r
+                                                                                                       //  0 = Current macroblock is not a field macroblock\r
+                                                                                                       //  1 = Current macroblock is a field macroblock\r
+#define REG_MACROBLOCK_TYPE                    INLINE_DW0\r
+#define MACROBLOCK_TYPE                                BIT12+BIT11+BIT10+BIT9+BIT8             // Bit 12:8 - Intra macroblock flag\r
+\r
+#define REG_CHROMA_FORMAT_IDC          INLINE_DW0\r
+#define CHROMA_FORMAT_IDC                      BIT3+BIT2               // Bit 3:2 - Chroma format\r
+                                                                                                       // 00 = Luma only (Monochrome)\r
+                                                                                                       // 01 = YUV420\r
+                                                                                                       // 10 = YUV422\r
+                                                                                                       // 11 = YUV444\r
+#define        REG_MBAFF_PIC                           INLINE_DW0\r
+#define MBAFF_PIC                                      BIT1                    // Bit 1 - MBAFF Frame picture\r
+                                                                                                       // 0 = Not an MBAFF frame picture\r
+                                                                                                       // 1 = An MBAFF frame picture\r
+#define REG_INTRA_PRED_8X8_BLK2_AVAIL_FLAG     INLINE_DW0\r
+#define INTRA_PRED_8X8_BLK2_AVAIL_FLAG BIT4            // Bit 4: Pixel available for block 2 in an intra_8x8 MB.\r
+\r
+//     In-line DWORD 1\r
+#define ORIX                   REG_INLINE.4    // :ub, H. origin of the macroblock in macroblock unit\r
+#define ORIY                   REG_INLINE.5    // :ub, V. origin of the macroblock in macroblock unit\r
+\r
+//     In-line DWORD 2\r
+#define        REG_CBPCYB                                      REG_INLINE.9    // :ub, Coded block pattern\r
+#define        REG_CBPCY                                       INLINE_DW2              // Bits 13:8 - Coded block pattern\r
+                                                                                                       // reflect Y0, Y1, Y2, Y3, Cb4, Cr5\r
+                                                                                                       // Bit 13 - Y0\r
+                                                                                                       // Bit 12 - Y1\r
+                                                                                                       // Bit 11 - Y2\r
+                                                                                                       // Bit 10 - Y3\r
+                                                                                                       // Bit 9 - U4\r
+                                                                                                       // Bit 8 - V5\r
+#define CBP_MASK                                       0x3F00:ud               // Bit mask for all CBP bits\r
+#define CBP_Y_MASK                                     0x3C00:ud               // Bit mask for CBP Y bits\r
+#define CBP_UV_MASK                                    0x0300:ud               // Bit mask for CBP U/V bits\r
+\r
+#define CBP_Y0_MASK                                    BIT13:ud                // Bit mask for CBP Y0 bit\r
+#define CBP_Y1_MASK                                    BIT12:ud                // Bit mask for CBP Y1 bit\r
+#define CBP_Y2_MASK                                    BIT11:ud                // Bit mask for CBP Y2 bit\r
+#define CBP_Y3_MASK                                    BIT10:ud                // Bit mask for CBP Y3 bit\r
+#define CBP_U_MASK                                     BIT9:ud                 // Bit mask for CBP U bit\r
+#define CBP_V_MASK                                     BIT8:ud                 // Bit mask for CBP V bit\r
+\r
+//     In-line DWORD 3\r
+#define REG_INTRA_CHROMA_PRED_MODE     REG_INLINE.12   // :ub - Intra chroma prediction mode\r
+#define INTRA_CHROMA_PRED_MODE         BIT7+BIT6               // Bit 7:6 - Intra chroma prediction mode\r
+                                                                                                       // 00 = Intra DC prediction\r
+                                                                                                       // 01 = Intra horizontal prediction\r
+                                                                                                       // 10 = Intra vertical prediction\r
+                                                                                                       // 11 = Intra plane prediction\r
+#define INTRA_CHROMA_PRED_MODE_SHIFT   6                       // Intra chroma prediction mode shift\r
+\r
+#define REG_INTRA_PRED_AVAIL_FLAG      INLINE_DW3\r
+#define INTRA_PRED_AVAIL_FLAG          BIT4+BIT3+BIT2+BIT1+BIT0        // Bits 4:0 - Intra prediction available flag\r
+                                                                                                       // Bit 0: Macroblock A (the left neighbor) entire or top half\r
+                                                                                                       // Bit 1: Macroblock B (the upper neighbor)\r
+                                                                                                       // Bit 2: Macroblock C (the above-right neighbor)\r
+                                                                                                       // Bit 3: Macroblock D (the above-left neighbor)\r
+                                                                                                       // Bit 4: Macroblock A (the left neighbor) bottom half\r
+                                                                                                       // Each bit is defined below\r
+                                                                                                       // 0 = The macroblock is not available for intra prediction\r
+                                                                                                       // 1 = The macroblock is available for intra prediction\r
+#define INTRA_PRED_LEFT_TH_AVAIL_FLAG  BIT0            // Bit 0: Macroblock A (the left neighbor) entire or top half\r
+#define INTRA_PRED_UP_AVAIL_FLAG               BIT1            // Bit 1: Macroblock B (the upper neighbor)\r
+#define INTRA_PRED_UP_RIGHT_AVAIL_FLAG BIT2            // Bit 2: Macroblock C (the above-right neighbor)\r
+#define INTRA_PRED_UP_LEFT_AVAIL_FLAG  BIT3            // Bit 3: Macroblock D (the above-left neighbor)\r
+#define INTRA_PRED_LEFT_BH_AVAIL_FLAG  BIT4            // Bit 4: Macroblock A (the left neighbor) bottom half\r
+//#define INTRA_PRED_8X8_BLK2_AVAIL_FLAG       BIT5            // Bit 5: Pixel available for block 2 in an intra_8x8 MB.\r
+#define REG_INTRA_PRED_AVAIL_FLAG_BYTE REG_INLINE.12   // Byte location of Intra_Pred_Avail_Flag\r
+#define REG_INTRA_PRED_AVAIL_FLAG_WORD REG_INLINE.6    // Word location of Intra_Pred_Avail_Flag\r
+\r
+\r
+.declare    INTRA_PRED_MODE  Base=REG_INLINE.16 ElementSize=1 SrcRegion=REGION(16,1) Type=ub   // Intra prediction mode\r
+\r
+// End of intra_header.inc\r
+\r
+#endif // !defined(__INTRA_HEADER__)\r
+\r
diff --git a/i965_drv_video/shaders/h264/mc/intra_Pred_4x4_Y_4.asm b/i965_drv_video/shaders/h264/mc/intra_Pred_4x4_Y_4.asm
new file mode 100644 (file)
index 0000000..584d012
--- /dev/null
@@ -0,0 +1,240 @@
+/*\r
+ * Intra predict 4 Intra_4x4 luma blocks\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+#if !defined(__INTRA_PRED_4X4_Y_4__)           // Make sure this is only included once\r
+#define __INTRA_PRED_4X4_Y_4__\r
+\r
+// Module name: intra_Pred_4x4_Y_4.asm\r
+//\r
+// Intra predict 4 Intra_4x4 luma blocks\r
+//\r
+//--------------------------------------------------------------------------\r
+//  Input data:\r
+//\r
+//  REF_TOP:   Top reference data stored in BYTE with p[-1,-1] at REF_TOP(0,-1)\r
+//  REF_LEFT:  Left reference data stored in BYTE with p[-1,0] at REF_LEFT(0,0)\r
+//     PRED_MODE:      Intra prediction mode stored in 4 words (4 LSB)\r
+//     REG_INTRA_PRED_AVAIL:   Top/Left available flag, (Bit0: Left, Bit1: Top)\r
+//\r
+//--------------------------------------------------------------------------\r
+\r
+#undef INTRA_PRED_AVAIL\r
+#undef INTRA_REF\r
+#undef REF_LEFT_BACK\r
+#undef REF_TMP\r
+#undef REF_TMP1\r
+\r
+#define        INTRA_PRED_AVAIL        REG_INTRA_TEMP_2.8\r
+#define INTRA_REF                      REG_INTRA_TEMP_2\r
+#define        REF_LEFT_BACK           REG_INTRA_TEMP_8\r
+#define        REF_TMP                         REG_INTRA_TEMP_3\r
+#define REF_TMP1                       REG_INTRA_TEMP_4\r
+\r
+intra_Pred_4x4_Y_4:\r
+\r
+       mov     (8)             REF_LEFT_BACK<1>:ub     REF_LEFT(0)REGION(8,1)  // Store left referece data\r
+//     Set up pointers to each intra_4x4 prediction mode\r
+//\r
+       and     (4)             PINTRA4X4_Y<1>:w        PRED_MODE<4;4,1>:w      0x0F:w\r
+       add (4)         INTRA_4X4_MODE(0)       r[PINTRA4X4_Y, INTRA_4X4_OFFSET]<1,0>:ub        INTRA_MODE<4;4,1>:ub\r
+\r
+//     Sub-block 0 *****************\r
+       mov (1)         INTRA_PRED_AVAIL<1>:w   REG_INTRA_PRED_AVAIL<0;1,0>:w           // Top/Left neighbor available flags\r
+       CALL_1(INTRA_4X4_MODE(0),1)\r
+\r
+//     Add error data to predicted intra data\r
+ADD_ERROR_SB0:\r
+       add.sat (8)     r[PPREDBUF_Y,PREDSUBBLK0]<2>:ub r[PERROR,ERRBLK0]<8;4,1>:w              REG_INTRA_4X4_PRED<8;8,1>:w             // Too bad indexed src can't\r
+       add.sat (8)     r[PPREDBUF_Y,PREDSUBBLK0+16]<2>:ub      r[PERROR,ERRBLK0+32]<8;4,1>:w   REG_INTRA_4X4_PRED.8<8;8,1>:w   // cross 2 GRFs\r
+\r
+//     Sub-block 1 *****************\r
+       mov     (16)    REF_TOP0(0)<1>  REF_TOP0(0,4)REGION(8,1)                // Top reference data\r
+       mov     (4)             REF_LEFT(0)<1>  r[PPREDBUF_Y,PREDSUBBLK0+6]<8;1,0>:ub   // New left referece data from sub-block 0\r
+       or (1)          INTRA_PRED_AVAIL<1>:w   REG_INTRA_PRED_AVAIL<0;1,0>:w   1:w             // Left neighbor is available\r
+       CALL_1(INTRA_4X4_MODE(0,1),1)\r
+\r
+//     Add error data to predicted intra data\r
+ADD_ERROR_SB1:\r
+       add.sat (8)     r[PPREDBUF_Y,PREDSUBBLK1]<2>:ub r[PERROR,ERRBLK1]<8;4,1>:w              REG_INTRA_4X4_PRED<8;8,1>:w             // Too bad indexed src can't\r
+       add.sat (8)     r[PPREDBUF_Y,PREDSUBBLK1+16]<2>:ub      r[PERROR,ERRBLK1+32]<8;4,1>:w   REG_INTRA_4X4_PRED.8<8;8,1>:w   // cross 2 GRFs\r
+\r
+//     Sub-block 2 *****************\r
+       mov     (1)             REF_TOP0(0,3)<1>        REF_LEFT_BACK.3<0;1,0>:ub               // Top-left reference data from stored left referece data\r
+       mov     (4)             REF_TOP0(0,4)<1>        r[PPREDBUF_Y,PREDSUBBLK0+24]REGION(4,2):ub      // Top reference data\r
+       mov     (4)             REF_TOP0(0,8)<1>        r[PPREDBUF_Y,PREDSUBBLK0+24+32]REGION(4,2):ub   // Too bad indexed src can't cross 2 GRFs\r
+       mov     (4)             REF_TOP0(0,12)<1>       r[PPREDBUF_Y,PREDSUBBLK0+30+32]REGION(1,0):ub   // Extended top-right reference data\r
+       mov     (4)             REF_LEFT(0)<1>          REF_LEFT_BACK.4<4;4,1>:ub       // From stored left referece data\r
+       or (1)          INTRA_PRED_AVAIL<1>:w   REG_INTRA_PRED_AVAIL<0;1,0>:w   2:w             // Top neighbor is available\r
+       CALL_1(INTRA_4X4_MODE(0,2),1)\r
+\r
+//     Add error data to predicted intra data\r
+ADD_ERROR_SB2:\r
+       add.sat (8)     r[PPREDBUF_Y,PREDSUBBLK2]<2>:ub r[PERROR,ERRBLK2]<8;4,1>:w              REG_INTRA_4X4_PRED<8;8,1>:w             // Too bad indexed src can't\r
+       add.sat (8)     r[PPREDBUF_Y,PREDSUBBLK2+16]<2>:ub      r[PERROR,ERRBLK2+32]<8;4,1>:w   REG_INTRA_4X4_PRED.8<8;8,1>:w   // cross 2 GRFs\r
+\r
+//     Sub-block 3 *****************\r
+       mov     (16)    REF_TOP0(0)<1>          REF_TOP0(0,4)REGION(8,1)        // Top reference data\r
+       mov     (8)             REF_TOP0(0,8)<1>        REF_TOP0(0,7)<0;1,0>    // Extended top-right reference data\r
+       mov     (4)             REF_LEFT(0)<1>  r[PPREDBUF_Y,PREDSUBBLK2+6]<8;1,0>:ub   // Left referece data from sub-block 0\r
+       or (1)          INTRA_PRED_AVAIL<1>:w   REG_INTRA_PRED_AVAIL<0;1,0>:w   3:w             // Top/Left neighbor are available\r
+       CALL_1(INTRA_4X4_MODE(0,3),1)\r
+\r
+//     Add error data to predicted intra data\r
+ADD_ERROR_SB3:\r
+       add.sat (8)     r[PPREDBUF_Y,PREDSUBBLK3]<2>:ub r[PERROR,ERRBLK3]<8;4,1>:w              REG_INTRA_4X4_PRED<8;8,1>:w             // Too bad indexed src can't\r
+       add.sat (8)     r[PPREDBUF_Y,PREDSUBBLK3+16]<2>:ub      r[PERROR,ERRBLK3+32]<8;4,1>:w   REG_INTRA_4X4_PRED.8<8;8,1>:w   // cross 2 GRFs\r
+\r
+       RETURN\r
+\r
+//--------------------------------------------------------------------------\r
+//  Actual module that performs Intra_4x4 prediction and construction\r
+//\r
+//  REF_TOP:           Top reference data stored in BYTE with p[-1,-1] at REF_TOP(0,-1)\r
+//  REF_LEFT:          Left reference data stored in BYTE with p[-1,0] at REF_LEFT(0,0)\r
+//     PINTRA4X4_Y:    Intra prediction mode\r
+//     INTRA_PRED_AVAIL:       Top/Left available flag, (Bit0: Left, Bit1: Top)\r
+//\r
+//     Output data:\r
+//\r
+//     REG_INTRA_4X4_PRED: Predicted 4x4 block data stored in 1 GRF register\r
+//--------------------------------------------------------------------------\r
+intra_Pred_4x4_Y:\r
+// Mode 0\r
+INTRA_4X4_VERTICAL:\r
+       mov (16)        REG_INTRA_4X4_PRED<1>:w REF_TOP(0)<0;4,1>\r
+       RETURN_1\r
+\r
+// Mode 1\r
+INTRA_4X4_HORIZONTAL:\r
+       mov (16)        REG_INTRA_4X4_PRED<1>:w REF_LEFT(0)<1;4,0>\r
+       RETURN_1\r
+\r
+// Mode 2\r
+INTRA_4X4_DC:\r
+// Rearrange reference samples for unified DC prediction code\r
+//\r
+    and.nz.f0.0 (16)   NULLREG         INTRA_PRED_AVAIL<0;1,0>:w       2:w  {Compr}\r
+    and.nz.f0.1 (16)   NULLREG         INTRA_PRED_AVAIL<0;1,0>:w       1:w  {Compr}\r
+       (-f0.0.any16h) mov (16) REF_TOP_W(0)<1> 0x8080:uw                               // Top macroblock not available for intra prediction\r
+       (-f0.1.any8h) mov (8)   REF_LEFT(0)<1>  REF_TOP(0)REGION(8,1)   // Left macroblock not available for intra prediction\r
+       (-f0.0.any8h) mov (8)   REF_TOP(0)<1>   REF_LEFT(0)REGION(8,1)  // Top macroblock not available for intra prediction\r
+// Perform DC prediction\r
+//\r
+       add (4)         PRED_YW(15)<1>  REF_TOP(0)REGION(4,1)   REF_LEFT(0)REGION(4,1)\r
+       add (2)         PRED_YW(15)<1>  PRED_YW(15)REGION(2,1)  PRED_YW(15,2)REGION(2,1)\r
+       add (16)        acc0<1>:w               PRED_YW(15)REGION(1,0)  PRED_YW(15,1)REGION(1,0)\r
+       add     (16)    acc0<1>:w               acc0:w  4:w\r
+       shr (16)        REG_INTRA_4X4_PRED<1>:w acc0:w  3:w\r
+       RETURN_1\r
+\r
+// Mode 3\r
+INTRA_4X4_DIAG_DOWN_LEFT:\r
+       mov     (8)             INTRA_REF<1>:ub REF_TOP(0)REGION(8,1)           // Keep REF_TOP untouched for future use\r
+       mov     (4)             INTRA_REF.8<1>:ub       REF_TOP(0,7)REGION(4,1) // p[8,-1] = p[7,-1]\r
+       add (8)         acc0<1>:w               INTRA_REF.2<8;8,1>      2:w             // p[x+2]+2\r
+       mac (8)         acc0<1>:w               INTRA_REF.1<8;8,1>      2:w             // 2*p[x+1]+p[x+2]+2\r
+       mac (8)         PRED_YW(15)<1>  INTRA_REF.0<8;8,1>      1:w             // p[x]+2*p[x+1]+p[x+2]+2\r
+\r
+       shr (16)        REG_INTRA_4X4_PRED<1>:w PRED_YW(15)<1;4,1>      2:w             // (p[x]+2*p[x+1]+p[x+2]+2)>>2\r
+       RETURN_1\r
+\r
+// Mode 4\r
+INTRA_4X4_DIAG_DOWN_RIGHT:\r
+\r
+//     Set inverse shift count\r
+       shl     (4)             REF_TMP<1>:ud   REF_LEFT_D(0)REGION(1,0)        INV_SHIFT<4;4,1>:b\r
+       mov     (8)             INTRA_REF.4<1>:ub       REF_TOP(0,-1)REGION(8,1)        // INTRA_REF holds all reference data\r
+       mov     (4)             INTRA_REF<1>:ub REF_TMP.3<16;4,4>:ub\r
+\r
+       add (8)         acc0<1>:w               INTRA_REF.2<8;8,1>:ub   2:w             // p[x+2]+2\r
+       mac (8)         acc0<1>:w               INTRA_REF.1<8;8,1>:ub   2:w             // 2*p[x+1]+p[x+2]+2\r
+       mac (8)         INTRA_REF<1>:w  INTRA_REF<8;8,1>:ub             1:w             // p[x]+2*p[x+1]+p[x+2]+2\r
+\r
+//     Store data in reversed order\r
+       add (4)         PBWDCOPY_4<1>:w INV_TRANS4<4;4,1>:b     INTRA_TEMP_2*GRFWIB:w   // Must match with INTRA_REF\r
+       shr (16)        REG_INTRA_4X4_PRED<1>:w r[PBWDCOPY_4]<4,1>:w    2:w\r
+       RETURN_1\r
+\r
+// Mode 5\r
+INTRA_4X4_VERT_RIGHT:\r
+\r
+//     Set inverse shift count\r
+       shl     (4)             REF_TMP<1>:ud   REF_LEFT_D(0)REGION(1,0)        INV_SHIFT<4;4,1>:b\r
+       mov     (8)             INTRA_REF.4<1>:ub       REF_TOP(0,-1)REGION(8,1)        // INTRA_REF holds all reference data\r
+       mov     (4)             INTRA_REF<1>:ub REF_TMP.3<16;4,4>:ub\r
+\r
+       // Even rows\r
+       avg (8)         PRED_YW(14)<1>  INTRA_REF.4<8;8,1>      INTRA_REF.5<8;8,1>      // avg(p[x-1],p[x])\r
+       // Odd rows\r
+       add (8)         acc0<1>:w               INTRA_REF.3<8;8,1>:ub           2:w             // p[x]+2\r
+       mac (8)         acc0<1>:w               INTRA_REF.2<8;8,1>:ub           2:w             // 2*p[x-1]+p[x]+2\r
+       mac (8)         acc0<1>:w               INTRA_REF.1<8;8,1>:ub           1:w             // p[x-2]+2*p[x-1]+p[x]+2\r
+       shr (8)         INTRA_REF<1>:w  acc0:w  2:w             // (p[x-2]+2*p[x-1]+p[x]+2)>>2\r
+\r
+       mov     (4)             INTRA_REF.2<2>:w        INTRA_REF.2<4;4,1>:w    // Keep zVR = -2,-3 unchanged\r
+       mov     (4)             INTRA_REF.3<2>:w        PRED_YW(14)REGION(4,1)  // Combining even rows\r
+\r
+       add (4)         PBWDCOPY_4<1>:w INV_TRANS4<4;4,1>:b     INTRA_TEMP_2*GRFWIB:w   // Must match with INTRA_REF\r
+       mov (16)        REG_INTRA_4X4_PRED<1>:w r[PBWDCOPY_4]<4,2>:w\r
+       RETURN_1\r
+\r
+// Mode 6\r
+INTRA_4X4_HOR_DOWN:\r
+//     Set inverse shift count\r
+       shl     (4)             REF_TMP<1>:ud   REF_LEFT_D(0)REGION(1,0)        INV_SHIFT<4;4,1>:b\r
+       mov     (8)             INTRA_REF.4<1>:ub       REF_TOP(0,-1)REGION(8,1)        // INTRA_REF holds all reference data\r
+       mov     (4)             INTRA_REF<1>:ub REF_TMP.3<16;4,4>:ub\r
+\r
+       // Even pixels\r
+       avg (8)         PRED_YW(14)<1>  INTRA_REF<8;8,1>        INTRA_REF.1<8;8,1>      // avg(p[y-1],p[y])\r
+       // Odd pixels\r
+       add (8)         acc0<1>:w               INTRA_REF.2<8;8,1>:ub   2:w             // p[y]+2\r
+       mac (8)         acc0<1>:w               INTRA_REF.1<8;8,1>:ub   2:w             // 2*p[y-1]+p[y]+2\r
+       mac (8)         REF_TMP<1>:w    INTRA_REF.0<8;8,1>:ub   1:w             // p[y-2]+2*p[y-1]+p[y]+2\r
+       shr (4)         INTRA_REF.1<2>:w        REF_TMP<4;4,1>:w        2:w             // (p[y-2]+2*p[y-1]+p[y]+2)>>2\r
+\r
+       shr     (2)             INTRA_REF.8<1>:w        REF_TMP.4<2;2,1>:w      2:w             // Keep zVR = -2,-3 unchanged\r
+       mov     (4)             INTRA_REF.0<2>:w        PRED_YW(14)REGION(4,1)  // Combining even pixels\r
+\r
+       shl (4)         PBWDCOPY_4<1>:w INV_TRANS4<4;4,1>:b     1:w             // Convert to WORD offset\r
+       add (4)         PBWDCOPY_4<1>:w PBWDCOPY_4<4;4,1>:w     INTRA_TEMP_2*GRFWIB:w   // Must match with INTRA_REF\r
+       mov (16)        REG_INTRA_4X4_PRED<1>:w r[PBWDCOPY_4]<4,1>:w\r
+       RETURN_1\r
+\r
+// Mode 7\r
+INTRA_4X4_VERT_LEFT:\r
+       // Even rows\r
+       avg (8)         PRED_YW(14)<2>  REF_TOP(0)REGION(8,1)   REF_TOP(0,1)REGION(8,1) // avg(p[x],p[x+1])\r
+       // Odd rows\r
+       add (8)         acc0<1>:w               REF_TOP(0,2)REGION(8,1) 2:w             // p[x+2]+2\r
+       mac (8)         acc0<1>:w               REF_TOP(0,1)REGION(8,1) 2:w             // 2*p[x+1]+p[x+2]+2\r
+       mac (8)         PRED_YW(15)<1>  REF_TOP(0)REGION(8,1)           1:w             // p[x]+2*p[x+1]+p[x+2]+2\r
+       shr (8)         PRED_YW(14,1)<2>        PRED_YW(15)REGION(8,1)  2:w\r
+\r
+       mov (16)        REG_INTRA_4X4_PRED<1>:w PRED_YW(14)<1;4,2>\r
+       RETURN_1\r
+\r
+// Mode 8\r
+INTRA_4X4_HOR_UP:\r
+//     Set extra left reference pixels for unified prediction\r
+       mov     (8)             REF_LEFT(0,4)<1>        REF_LEFT(0,3)REGION(1,0)        // Copy p[-1,3] to p[-1,y],y=4...7\r
+       // Even pixels\r
+       avg (8)         PRED_YW(14)<2>  REF_LEFT(0)REGION(8,1)  REF_LEFT(0,1)REGION(8,1)        // avg(p[y],p[y+1])\r
+       // Odd pixels\r
+       add (8)         acc0<1>:w               REF_LEFT(0,2)REGION(8,1)        2:w             // p[y+2]+2\r
+       mac (8)         acc0<1>:w               REF_LEFT(0,1)REGION(8,1)        2:w             // 2*p[y+1]+p[y+2]+2\r
+       mac (8)         PRED_YW(15)<1>  REF_LEFT(0)REGION(8,1)          1:w             // p[y]+2*p[y+1]+p[y+2]+2\r
+       shr (8)         PRED_YW(14,1)<2>        PRED_YW(15)REGION(8,1)  2:w             // (p[y]+2*p[y+1]+p[y+2]+2)>>2\r
+\r
+       mov (16)        REG_INTRA_4X4_PRED<1>:w PRED_YW(14)<2;4,1>\r
+       RETURN_1\r
+\r
+// End of intra_Pred_4x4_Y_4\r
+\r
+#endif // !defined(__INTRA_PRED_4X4_Y_4__)\r
diff --git a/i965_drv_video/shaders/h264/mc/intra_Pred_8x8_Y.asm b/i965_drv_video/shaders/h264/mc/intra_Pred_8x8_Y.asm
new file mode 100644 (file)
index 0000000..ce77771
--- /dev/null
@@ -0,0 +1,246 @@
+/*\r
+ * Intra predict 8X8 luma block\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+#if !defined(__INTRA_PRED_8X8_Y__)             // Make sure this is only included once\r
+#define __INTRA_PRED_8X8_Y__\r
+\r
+// Module name: intra_Pred_8X8_Y.asm\r
+//\r
+// Intra predict 8X8 luma block\r
+//\r
+//--------------------------------------------------------------------------\r
+//  Input data:\r
+//\r
+//  REF_TOP:   Top reference data stored in BYTE with p[-1,-1] at REF_TOP(0,-1), p[-1,-1] and [15,-1] adjusted\r
+//  REF_LEFT:  Left reference data stored in BYTE with p[-1,0] at REF_LEFT(0,2), REF_LEFT(0,1) (p[-1,-1]) adjusted\r
+//     PRED_MODE:      Intra prediction mode stored in 4 LSBs\r
+//     INTRA_PRED_AVAIL:       Top/Left available flag, (Bit0: Left, Bit1: Top)\r
+//\r
+//     Output data:\r
+//\r
+//     REG_INTRA_8X8_PRED: Predicted 8X8 block data\r
+//--------------------------------------------------------------------------\r
+\r
+#define INTRA_REF      REG_INTRA_TEMP_1\r
+#define REF_TMP                REG_INTRA_TEMP_2\r
+\r
+intra_Pred_8x8_Y:\r
+\r
+//     Reference sample filtering\r
+//\r
+       // Set up boundary pixels for unified filtering\r
+       mov (1)         REF_TOP(0,16)<1>        REF_TOP(0,15)REGION(1,0)        // p[16,-1] = p[15,-1]\r
+       mov     (8)             REF_LEFT(0,2+8)<1>      REF_LEFT(0,2+7)REGION(1,0)      // p[-1,8] = p[-1,7]\r
+\r
+       // Top reference sample filtering (!!Consider instruction compression later)\r
+       add (16)        acc0<1>:w       REF_TOP(0,-1)REGION(16,1)       2:w             // p[x-1,-1]+2\r
+       mac (16)        acc0<1>:w       REF_TOP(0)REGION(16,1)          2:w             // p[x-1,-1]+2*p[x,-1]+2\r
+       mac (16)        acc0<1>:w       REF_TOP(0,1)REGION(16,1)        1:w             // p[x-1,-1]+2*p[x,-1]+p[x+1,-1]+2\r
+       shr     (16)    REF_TMP<1>:w    acc0:w  2:w             // (p[x-1,-1]+2*p[x,-1]+p[x+1,-1]+2)>>2\r
+\r
+       // Left reference sample filtering\r
+       add (16)        acc0<1>:w       REF_LEFT(0)REGION(16,1)         2:w             // p[-1,y-1]+2\r
+       mac (16)        acc0<1>:w       REF_LEFT(0,1)REGION(16,1)       2:w             // p[-1,y-1]+2*p[-1,y]+2\r
+       mac (16)        acc0<1>:w       REF_LEFT(0,2)REGION(16,1)       1:w             // p[-1,y-1]+2*p[-1,y]+p[-1,y+1]+2\r
+       shr     (16)    INTRA_REF<1>:w  acc0:w  2:w             // (p[-1,y-1]+2*p[-1,y]+p[-1,y+1]+2)>>2\r
+\r
+       // Re-assign filtered reference samples\r
+       mov     (16)    REF_TOP(0)<1>   REF_TMP<32;16,2>:ub                     // p'[x,-1], x=0...15\r
+       mov     (8)             REF_LEFT(0)<1>  INTRA_REF.2<16;8,2>:ub          // p'[-1,y], y=0...7\r
+       mov     (1)             REF_TOP(0,-1)<1>        INTRA_REF<0;1,0>:ub             // p'[-1,-1]\r
+\r
+//     Select intra_8x8 prediction mode\r
+//\r
+       and     (1)     PINTRAPRED_Y<1>:w       PRED_MODE<0;1,0>:w      0x0F:w\r
+       // WA for "jmpi" restriction\r
+       mov (1) REG_INTRA_TEMP_1<1>:ud  r[PINTRAPRED_Y, INTRA_8X8_OFFSET]:ub\r
+       jmpi (1) REG_INTRA_TEMP_1<0;1,0>:d\r
+\r
+// Mode 0\r
+#define        PTMP    a0.6\r
+#define PTMP_D a0.3\r
+INTRA_8X8_VERTICAL:\r
+    $for(0,0; <4; 1,32) {\r
+       add.sat (16)    r[PPREDBUF_Y,%2]<2>:ub  r[PERROR,%2]<16;16,1>:w REF_TOP(0)<0;8,1>\r
+       }\r
+       RETURN\r
+\r
+// Mode 1\r
+INTRA_8X8_HORIZONTAL:\r
+    $for(0,0; <8; 2,32) {\r
+       add.sat (16)    r[PPREDBUF_Y,%2]<2>:ub  r[PERROR,%2]<16;16,1>:w REF_LEFT(0,%1)<1;8,0>\r
+       }\r
+       RETURN\r
+\r
+// Mode 2\r
+INTRA_8X8_DC:\r
+// Rearrange reference samples for unified DC prediction code\r
+//\r
+    and.nz.f0.0 (16)   NULLREG         INTRA_PRED_AVAIL<0;1,0>:w       2:w     // Top macroblock available for intra prediction?\r
+    and.nz.f0.1 (8)            NULLREG         INTRA_PRED_AVAIL<0;1,0>:w       1:w     // Left macroblock available for intra prediction?\r
+       (-f0.0.any16h) mov (16) REF_TOP_W(0)<1> 0x8080:uw\r
+       (-f0.1.any8h) mov (8)   REF_LEFT(0)<1>  REF_TOP(0)REGION(8,1)\r
+       (-f0.0.any8h) mov (8)   REF_TOP(0)<1>   REF_LEFT(0)REGION(8,1)\r
+\r
+// Perform DC prediction\r
+//\r
+       add (8)         PRED_YW(15)<1>  REF_TOP(0)REGION(8,1)   REF_LEFT(0)REGION(8,1)\r
+       add (4)         PRED_YW(15)<1>  PRED_YW(15)REGION(4,1)  PRED_YW(15,4)REGION(4,1)\r
+       add (2)         PRED_YW(15)<1>  PRED_YW(15)REGION(2,1)  PRED_YW(15,2)REGION(2,1)\r
+       add (16)        acc0<1>:w               PRED_YW(15)REGION(1,0)  PRED_YW(15,1)REGION(1,0)\r
+       add     (16)    acc0<1>:w               acc0:w  8:w\r
+       shr (16)        REG_INTRA_TEMP_0<1>:w   acc0:w  4:w\r
+\r
+       // Add error block\r
+    $for(0,0; <4; 1,32) {\r
+       add.sat (16)    r[PPREDBUF_Y,%2]<2>:ub  r[PERROR,%2]<16;16,1>:w REG_INTRA_TEMP_0<16;16,1>:w\r
+       }\r
+       RETURN\r
+\r
+// Mode 3\r
+INTRA_8X8_DIAG_DOWN_LEFT:\r
+       mov     (8)             REF_TOP(0,16)<1>        REF_TOP(0,15)REGION(8,1)        // p[16,-1] = p[15,-1]\r
+       add (16)        acc0<1>:w               REF_TOP(0,2)REGION(16,1)        2:w             // p[x+2]+2\r
+       mac (16)        acc0<1>:w               REF_TOP(0,1)REGION(16,1)        2:w             // 2*p[x+1]+p[x+2]+2\r
+       mac (16)        acc0<1>:w               REF_TOP(0)REGION(16,1)          1:w             // p[x]+2*p[x+1]+p[x+2]+2\r
+       shr (16)        REG_INTRA_TEMP_0<1>:w   acc0<16;16,1>:w         2:w             // (p[x]+2*p[x+1]+p[x+2]+2)>>2\r
+\r
+       // Add error block\r
+    $for(0,0; <8; 2,32) {\r
+       add.sat (16)    r[PPREDBUF_Y,%2]<2>:ub  r[PERROR,%2]<16;16,1>:w REG_INTRA_TEMP_0.%1<1;8,1>:w\r
+       }\r
+       RETURN\r
+\r
+// Mode 4\r
+INTRA_8X8_DIAG_DOWN_RIGHT:\r
+#define INTRA_REF      REG_INTRA_TEMP_1\r
+#define REF_TMP                REG_INTRA_TEMP_2\r
+\r
+//     Set inverse shift count\r
+       shl     (4)             REF_TMP<1>:ud   REF_LEFT_D(0,1)REGION(1,0)      INV_SHIFT<4;4,1>:b      // Reverse order bottom 4 pixels of left ref.\r
+       shl     (4)             REF_TMP.4<1>:ud REF_LEFT_D(0)REGION(1,0)        INV_SHIFT<4;4,1>:b      // Reverse order top 4 pixels of left ref.\r
+       mov     (8)             INTRA_REF<1>:ub REF_TMP.3<32;8,4>:ub\r
+       mov     (16)    INTRA_REF.8<1>:ub       REF_TOP(0,-1)REGION(16,1)       // INTRA_REF holds all reference data\r
+\r
+       add (16)        acc0<1>:w               INTRA_REF.2<16;16,1>:ub         2:w             // p[x+2]+2\r
+       mac (16)        acc0<1>:w               INTRA_REF.1<16;16,1>:ub         2:w             // 2*p[x+1]+p[x+2]+2\r
+       mac (16)        acc0<1>:w               INTRA_REF<16;16,1>:ub           1:w             // p[x]+2*p[x+1]+p[x+2]+2\r
+       shr (16)        INTRA_REF<1>:w  acc0<16;16,1>:w                         2:w             // (p[x]+2*p[x+1]+p[x+2]+2)>>2\r
+\r
+//     Store data in reversed order\r
+       add (2)         PBWDCOPY_8<1>:w INV_TRANS48<2;2,1>:b    INTRA_TEMP_1*GRFWIB:w   // Must match with INTRA_REF\r
+\r
+       // Add error block\r
+    $for(0,96; <8; 2,-32) {\r
+       add.sat (16)    r[PPREDBUF_Y,%2]<2>:ub  r[PBWDCOPY_8,%1*2]<8,1>:w       r[PERROR,%2]<16;16,1>:w\r
+       }\r
+       RETURN\r
+\r
+// Mode 5\r
+INTRA_8X8_VERT_RIGHT:\r
+#define INTRA_REF      REG_INTRA_TEMP_1\r
+#define REF_TMP                REG_INTRA_TEMP_2\r
+#define REF_TMP1       REG_INTRA_TEMP_3\r
+\r
+//     Set inverse shift count\r
+       shl     (4)             REF_TMP<1>:ud   REF_LEFT_D(0,1)REGION(1,0)      INV_SHIFT<4;4,1>:b      // Reverse order bottom 4 pixels of left ref.\r
+       shl     (4)             REF_TMP.4<1>:ud REF_LEFT_D(0)REGION(1,0)        INV_SHIFT<4;4,1>:b      // Reverse order top 4 pixels of left ref.\r
+       mov     (8)             INTRA_REF<1>:ub REF_TMP.3<32;8,4>:ub\r
+       mov     (16)    INTRA_REF.8<1>:ub       REF_TOP(0,-1)REGION(16,1)       // INTRA_REF holds all reference data\r
+\r
+       // Even rows\r
+       avg (16)        PRED_YW(14)<1>  INTRA_REF.8<16;16,1>    INTRA_REF.9<16;16,1>    // avg(p[x-1],p[x])\r
+       // Odd rows\r
+       add (16)        acc0<1>:w               INTRA_REF.3<16;16,1>:ub         2:w             // p[x]+2\r
+       mac (16)        acc0<1>:w               INTRA_REF.2<16;16,1>:ub         2:w             // 2*p[x-1]+p[x]+2\r
+       mac (16)        acc0<1>:w               INTRA_REF.1<16;16,1>:ub         1:w             // p[x-2]+2*p[x-1]+p[x]+2\r
+       shr (16)        REF_TMP<1>:w    acc0:w  2:w             // (p[x-2]+2*p[x-1]+p[x]+2)>>2\r
+\r
+       mov     (8)             INTRA_REF<1>:ub         REF_TMP<16;8,2>:ub              // Keep zVR = -1,-2,-3,-4,-5,-6,-7 sequencially\r
+       mov     (8)             INTRA_REF.6<2>:ub       REF_TMP.12<16;8,2>:ub   // Keep zVR = -1,1,3,5,7,9,11,13 at even byte\r
+       mov     (8)             INTRA_REF.7<2>:ub       PRED_Y(14)REGION(8,2)   // Combining zVR = 0,2,4,6,8,10,12,14 at odd byte\r
+\r
+       add (2)         PBWDCOPY_8<1>:w INV_TRANS8<2;2,1>:b     INTRA_TEMP_1*GRFWIB:w   // Must match with INTRA_REF\r
+\r
+       // Add error block\r
+    $for(0,96; <8; 2,-32) {\r
+       add.sat (16)    r[PPREDBUF_Y,%2]<2>:ub  r[PBWDCOPY_8,%1]<8,2>:ub        r[PERROR,%2]<16;16,1>:w\r
+       }\r
+       RETURN\r
+\r
+// Mode 6\r
+INTRA_8X8_HOR_DOWN:\r
+//     Set inverse shift count\r
+       shl     (4)             REF_TMP<1>:ud   REF_LEFT_D(0,1)REGION(1,0)      INV_SHIFT<4;4,1>:b      // Reverse order bottom 4 pixels of left ref.\r
+       shl     (4)             REF_TMP.4<1>:ud REF_LEFT_D(0)REGION(1,0)        INV_SHIFT<4;4,1>:b      // Reverse order top 4 pixels of left ref.\r
+       mov     (8)             INTRA_REF<1>:ub REF_TMP.3<16;4,4>:ub\r
+       mov     (16)    INTRA_REF.8<1>:ub       REF_TOP(0,-1)REGION(16,1)       // INTRA_REF holds all reference data\r
+\r
+       // Odd pixels\r
+       add (16)        acc0<1>:w       INTRA_REF.2<16;16,1>:ub         2:w             // p[y]+2\r
+       mac (16)        acc0<1>:w       INTRA_REF.1<16;16,1>:ub         2:w             // 2*p[y-1]+p[y]+2\r
+       mac (16)        acc0<1>:w       INTRA_REF.0<16;16,1>:ub         1:w             // p[y-2]+2*p[y-1]+p[y]+2\r
+       shr (16)        PRED_YW(14)<1>  acc0:w  2:w             // (p[y-2]+2*p[y-1]+p[y]+2)>>2\r
+       // Even pixels\r
+       avg (16)        INTRA_REF<1>:w  INTRA_REF<16;16,1>:ub   INTRA_REF.1<16;16,1>:ub // avg(p[y-1],p[y])\r
+\r
+       mov     (8)             INTRA_REF.1<2>:ub       PRED_Y(14)REGION(8,2)           // Combining odd pixels to form byte type\r
+       mov     (8)             INTRA_REF.16<1>:ub      PRED_Y(14,16)REGION(8,2)        // Keep zVR = -2,-3,-4,-5,-6,-7 unchanged\r
+       // Now INTRA_REF.0 - INTRA_REF.21 contain predicted data\r
+\r
+       add (2)         PBWDCOPY_8<1>:w INV_TRANS48<2;2,1>:b    INTRA_TEMP_1*GRFWIB:w   // Must match with INTRA_REF\r
+\r
+       // Add error block\r
+    $for(0,96; <13; 4,-32) {\r
+       add.sat (16)    r[PPREDBUF_Y,%2]<2>:ub  r[PBWDCOPY_8,%1]<8,1>:ub        r[PERROR,%2]<16;16,1>:w\r
+       }\r
+       RETURN\r
+\r
+// Mode 7\r
+INTRA_8X8_VERT_LEFT:\r
+       // Even rows\r
+       avg (16)                PRED_YW(14)<1>  REF_TOP(0)REGION(16,1)  REF_TOP(0,1)REGION(16,1)        // avg(p[x],p[x+1])\r
+       // Odd rows\r
+       add (16)                acc0<1>:w               REF_TOP(0,2)REGION(16,1)        2:w             // p[x+2]+2\r
+       mac (16)                acc0<1>:w               REF_TOP(0,1)REGION(16,1)        2:w             // 2*p[x+1]+p[x+2]+2\r
+       mac (16)                acc0<1>:w               REF_TOP(0)REGION(16,1)          1:w             // p[x]+2*p[x+1]+p[x+2]+2\r
+       shr (16)                PRED_YW(15)<1>  acc0<1>:w       2:w             // (p[x]+2*p[x+1]+p[x+2]+2)>>2\r
+\r
+       // Add error block\r
+    $for(0,0; <4; 1,32) {\r
+       add.sat (16)    r[PPREDBUF_Y,%2]<2>:ub  PRED_YW(14,%1)<16;8,1>  r[PERROR,%2]<16;16,1>:w\r
+       }\r
+       RETURN\r
+\r
+// Mode 8\r
+INTRA_8X8_HOR_UP:\r
+//     Set extra left reference pixels for unified prediction\r
+       mov     (8)             REF_LEFT(0,8)<1>        REF_LEFT(0,7)REGION(1,0)        // Copy p[-1,7] to p[-1,y],y=8...15\r
+\r
+       // Even pixels\r
+       avg (16)        PRED_YW(14)<1>  REF_LEFT(0)REGION(16,1) REF_LEFT(0,1)REGION(16,1)       // avg(p[y],p[y+1])\r
+       // Odd pixels\r
+       add (16)        acc0<1>:w               REF_LEFT(0,2)REGION(16,1)       2:w             // p[y+2]+2\r
+       mac (16)        acc0<1>:w               REF_LEFT(0,1)REGION(16,1)       2:w             // 2*p[y+1]+p[y+2]+2\r
+       mac (16)        acc0<1>:w               REF_LEFT(0)REGION(16,1)         1:w             // p[y]+2*p[y+1]+p[y+2]+2\r
+       shr (16)        PRED_YW(15)<1>  acc0<1>:w       2:w             // (p[y]+2*p[y+1]+p[y+2]+2)>>2\r
+\r
+       // Merge even/odd pixels\r
+       // The predicted data need to be stored in byte type (22 bytes are required)\r
+       mov (16)        PRED_Y(14,1)<2> PRED_Y(15)REGION(16,2)\r
+\r
+       // Add error block\r
+    $for(0,0; <4; 1,32) {\r
+       add.sat (16)    r[PPREDBUF_Y,%2]<2>:ub  PRED_Y(14,%1*4)<2;8,1>  r[PERROR,%2]<16;16,1>:w\r
+       }\r
+       RETURN\r
+\r
+// End of intra_Pred_8X8_Y\r
+\r
+#endif // !defined(__INTRA_PRED_8X8_Y__)\r
diff --git a/i965_drv_video/shaders/h264/mc/intra_Pred_Chroma.asm b/i965_drv_video/shaders/h264/mc/intra_Pred_Chroma.asm
new file mode 100644 (file)
index 0000000..a1e1697
--- /dev/null
@@ -0,0 +1,155 @@
+/*\r
+ * Intra predict 8x8 chroma block\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+#if !defined(__INTRA_PRED_CHROMA__)            // Make sure this is only included once\r
+#define __INTRA_PRED_CHROMA__\r
+\r
+// Module name: intra_Pred_Chroma.asm\r
+//\r
+// Intra predict 8x8 chroma block\r
+//\r
+\r
+       shr     (1)     PINTRAPRED_UV<1>:w      REG_INTRA_CHROMA_PRED_MODE<0;1,0>:ub    INTRA_CHROMA_PRED_MODE_SHIFT:w  // Bits 1:0 = intra chroma pred mode\r
+       // WA for "jmpi" restriction\r
+       mov (1) REG_INTRA_TEMP_1<1>:d   r[PINTRAPRED_UV, INTRA_CHROMA_OFFSET]:b\r
+       jmpi (1) REG_INTRA_TEMP_1<0;1,0>:d\r
+\r
+// Mode 0\r
+INTRA_CHROMA_DC:\r
+    and.nz.f0.0 (8)            NULLREG         REG_INTRA_PRED_AVAIL_FLAG       INTRA_PRED_UP_AVAIL_FLAG:ud     // Top macroblock available for intra prediction?\r
+\r
+// Calculate DC values for sub-block 0 and 3\r
+//\r
+// Rearrange reference samples for unified DC prediction code\r
+//     Need to check INTRA_PRED_LEFT_TH_AVAIL_FLAG for blk0 and INTRA_PRED_LEFT_BH_AVAIL_FLAG for blk3\r
+// \r
+       (-f0.0.any8h)   mov (8)         INTRA_REF_TOP_W(0)<1>   0x8080:uw       // Up not available\r
+\r
+    and.nz.f0.1 (4)    NULLREG         REG_INTRA_PRED_AVAIL_FLAG       INTRA_PRED_LEFT_TH_AVAIL_FLAG:ud\r
+       (-f0.1.any4h)   mov (4)         INTRA_REF_LEFT_W(0)<2>  INTRA_REF_TOP_W(0)REGION(4,1)   // Left top half macroblock not available for intra prediction\r
+    and.nz.f0.1 (4)    NULLREG         REG_INTRA_PRED_AVAIL_FLAG       INTRA_PRED_LEFT_BH_AVAIL_FLAG:ud\r
+       (-f0.1.any4h)   mov (4)         INTRA_REF_LEFT_W(0,8)<2>        INTRA_REF_TOP_W(0,4)REGION(4,1) // Left bottom half macroblock not available for intra prediction\r
+\r
+       (-f0.0.any8h)   mov (8)         INTRA_REF_TOP_W(0)<1>   INTRA_REF_LEFT_W(0)REGION(8,2)  // Up not available\r
+// Calculate DC prediction\r
+//\r
+       add (16)        PRED_UVW(0)<1>  INTRA_REF_TOP(0)REGION(16,1)    INTRA_REF_LEFT_UV(0)<4;2,1>     // Sum of top and left reference\r
+       add (8)         PRED_UVW(0)<1>  PRED_UVW(0)<4;2,1>      PRED_UVW(0,2)<4;2,1>    // Sum of first half (blk #0) and second half (blk #3)\r
+\r
+       add (8)         PRED_UVW(9)<1>  PRED_UVW(0)<0;2,1>      PRED_UVW(0,2)<0;2,1>    // Sum of blk #0\r
+       add (8)         PRED_UVW(11,8)<1>       PRED_UVW(0,4)<0;2,1>    PRED_UVW(0,6)<0;2,1>    // Sum of blk #3\r
+\r
+// Calculate DC values for sub-block 1 and 2\r
+//\r
+// Rearrange reference samples for unified DC prediction code\r
+//\r
+       // Blk #2\r
+       (-f0.0.any4h)   mov (4)         INTRA_REF_TOP_W(0)<1>   0x8080:uw\r
+       (f0.1.any4h)    mov (4)         INTRA_REF_TOP_W(0)<1>   INTRA_REF_LEFT_W(0,8)REGION(4,2)        // Always use available left reference\r
+       (-f0.1.any4h)   mov (4)         INTRA_REF_LEFT_W(0,8)<2>        INTRA_REF_TOP_W(0)REGION(4,1)\r
+\r
+       // Blk #1\r
+    and.nz.f0.1 (4)    NULLREG         REG_INTRA_PRED_AVAIL_FLAG       INTRA_PRED_LEFT_TH_AVAIL_FLAG:ud\r
+       (-f0.1.any4h)   mov (4)         INTRA_REF_LEFT_W(0)<2>  0x8080:uw\r
+       (f0.0.any4h)    mov (4)         INTRA_REF_LEFT_W(0)<2>  INTRA_REF_TOP_W(0,4)REGION(4,1) // Always use available top reference\r
+       (-f0.0.any4h)   mov (4)         INTRA_REF_TOP_W(0,4)<1> INTRA_REF_LEFT_W(0)REGION(4,2)\r
+\r
+// Calculate DC prediction\r
+//\r
+       add (8) PRED_UVW(0)<1>          INTRA_REF_TOP(0)REGION(8,1)     INTRA_REF_LEFT_UV(0,16)<4;2,1>  // Sum of top and left reference for blk #2\r
+       add (8) PRED_UVW(0,8)<1>        INTRA_REF_LEFT_UV(0)<4;2,1>     INTRA_REF_TOP(0,8)REGION(8,1)   // Sum of top and left reference for blk #1\r
+       add (8) PRED_UVW(0)<1>          PRED_UVW(0)<4;2,1>              PRED_UVW(0,2)<4;2,1>    // Sum of first half (blk #2) and second half (blk #1)\r
+\r
+       add (8) PRED_UVW(9,8)<1>        PRED_UVW(0,4)<0;2,1>    PRED_UVW(0,6)<0;2,1>    // Sum of blk #1\r
+       add (8) PRED_UVW(11)<1>         PRED_UVW(0)<0;2,1>              PRED_UVW(0,2)<0;2,1>    // Sum of blk #2\r
+\r
+// Now, PRED_UVW(9) holds sums for blks #0 and #1 and PRED_UVW(11) holds sums for blks #2 and #3\r
+//\r
+       add (32)        acc0<1>:w       PRED_UVW(9)REGION(16,1)         4:w {Compr}             // Add rounder\r
+    $for(0; <4; 2) {\r
+       shr (32)        PRED_UVW(%1)<1> acc0:w          3:w {Compr}\r
+       }\r
+\r
+       add (32)        acc0<1>:w       PRED_UVW(11)REGION(16,1)        4:w {Compr}             // Add rounder\r
+    $for(4; <8; 2) {\r
+       shr (32)        PRED_UVW(%1)<1> acc0:w          3:w {Compr}\r
+       }\r
+       jmpi (1)        End_of_intra_Pred_Chroma\r
+\r
+// Mode 1\r
+INTRA_CHROMA_HORIZONTAL:\r
+       mov (1)         PREF_LEFT_UD<1>:ud      INTRA_REF_LEFT_ID*GRFWIB*0x00010001+0x00040000:ud       // Set address registers for instruction compression\r
+    $for(0,0; <8; 2,8) {\r
+       mov (32)        PRED_UVW(%1)<1> r[PREF_LEFT,%2+2]<0;2,1>:ub {Compr}     // Actual left column reference data start at offset 2\r
+       }\r
+       jmpi (1)        End_of_intra_Pred_Chroma\r
+\r
+// Mode 2\r
+INTRA_CHROMA_VERTICAL:\r
+    $for(0; <8; 2) {\r
+       mov (32)        PRED_UVW(%1)<1> INTRA_REF_TOP(0) {Compr}\r
+       }\r
+       jmpi (1)        End_of_intra_Pred_Chroma\r
+\r
+// Mode 3\r
+INTRA_Chroma_PLANE:\r
+// Refer to H.264/AVC spec Section 8.3.4.4\r
+\r
+#undef C\r
+\r
+#define A              REG_INTRA_TEMP_2.0              // All are WORD type\r
+#define B              REG_INTRA_TEMP_3.0              // B[U] & B[V]\r
+#define C              REG_INTRA_TEMP_3.2              // C[U] & C[V]\r
+#define YP             REG_INTRA_TEMP_0                // Store intermediate results of c*(y-3). Make sure it's an even GRF\r
+#define YP1            REG_INTRA_TEMP_1                // Store intermediate results of c*(y-3). Make sure it's an odd GRF\r
+#define XP             REG_INTRA_TEMP_5                // Store intermediate results of a+b*(x-3)+16. Make sure it's an odd GRF\r
+\r
+// First Calculate constants H and V\r
+//     H1 = sum((x'+1)*p[4+x',-1]), x'=0,1,2,3\r
+//     H2 =  sum((-x'-1)*p[2-x',-1]), x'=3,2,1,0\r
+//     H = H1 + H2\r
+//     The same calculation holds for V\r
+//\r
+       mul (8) H1(0)<1>        INTRA_REF_TOP(0,8)REGION(8,1)   0x44332211:v\r
+       mul (8) H2(0)<1>        INTRA_REF_TOP(0,-2)REGION(8,1)  0xFFEEDDCC:v\r
+\r
+       mul (8) V1(0)<1>        INTRA_REF_LEFT_UV(0,4*4)<4;2,1> 0x44332211:v\r
+       mul (8) V2(0)<1>        INTRA_REF_LEFT_UV(0)<4;2,1>             0x00FFEEDD:v\r
+       mul (2) V2(0,6)<1>      INTRA_REF_TOP(0,-2)REGION(2,1)  -4:w            // Replace 0*p[-1,3] with -4*p[-1,-1]\r
+       // Now, REG_INTRA_TEMP_0 holds [H2, H1] and REG_INTRA_TEMP_1 holds [V2, V1]\r
+\r
+       // Sum up [H2, H1] and [V2, V1] using instruction compression\r
+       // ExecSize = 16 is restricted by B-spec for instruction compression\r
+       // Actual intermediate results are in lower sub-registers after each summing step\r
+       add     (16)    H1(0)<1>        H1(0)   H2(0) {Compr}   // Results in lower 8 WORDs\r
+       add     (16)    H1(0)<1>        H1(0)   H1(0,4) {Compr} // Results in lower 4 WORDs\r
+       add     (16)    H1(0)<1>        H1(0)   H1(0,2) {Compr} // Results in lower 2 WORDs\r
+\r
+//     Calculate a, b, c and further derivations\r
+       mov     (16)    acc0<1>:w       32:w\r
+       mac     (4)             acc0<1>:w       H1(0)<16;2,1>   34:w\r
+       shr     (4)             B<1>:w          acc0:w  6:w             // Done b,c\r
+       mov     (16)    acc0<1>:w       16:w\r
+       mac     (16)    acc0<1>:w       INTRA_REF_TOP(0,7*2)<0;2,1>             16:w\r
+       mac     (16)    A<1>:w          INTRA_REF_LEFT_UV(0,7*4)<0;2,1> 16:w    // A = a+16\r
+       mac (16)        XP<1>:w         B<0;2,1>:w              XY_3<1;2,0>:b           // XP = A+b*(x-3)\r
+       mul     (8)             YP<1>:w         C<0;2,1>:w              XY_3<2;2,0>:b           // YP = c*(y-3), Even portion\r
+       mul     (8)             YP1<1>:w        C<0;2,1>:w              XY_3_1<2;2,0>:b // YP = c*(y-3), Odd portion\r
+\r
+//     Finally the intra_Chroma plane prediction\r
+    $for(0; <8; 2) {\r
+       add (32)        acc0<1>:w               XP<16;16,1>:w   YP.%1<0;2,1>:w {Compr}\r
+       shr.sat (32)    PRED_UV(%1)<2>  acc0<16;16,1>:w 5:w {Compr}\r
+       }\r
+\r
+End_of_intra_Pred_Chroma:\r
+\r
+// End of intra_Pred_Chroma\r
+\r
+#endif // !defined(__INTRA_PRED_CHROMA__)\r
diff --git a/i965_drv_video/shaders/h264/mc/intra_pred_16x16_Y.asm b/i965_drv_video/shaders/h264/mc/intra_pred_16x16_Y.asm
new file mode 100644 (file)
index 0000000..94ccb07
--- /dev/null
@@ -0,0 +1,111 @@
+/*\r
+ * Intra predict 16x16 luma block\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Module name: intra_Pred_16x16_Y.asm\r
+//\r
+// Intra predict 16x16 luma block\r
+//\r
+       and     (1)     PINTRAPRED_Y<1>:w       INTRA_PRED_MODE(0)REGION(1,0)   0x0F:w\r
+       // WA for "jmpi" restriction\r
+       mov (1) REG_INTRA_TEMP_1<1>:ud  r[PINTRAPRED_Y, INTRA_16X16_OFFSET]:ub\r
+       jmpi (1) REG_INTRA_TEMP_1<0;1,0>:d\r
+\r
+// Mode 0\r
+INTRA_16x16_VERTICAL:\r
+    $for(0; <16; 2) {\r
+       mov (32)        PRED_YW(%1)<1>  INTRA_REF_TOP(0) {Compr}\r
+       }\r
+       jmpi (1) End_intra_Pred_16x16_Y\r
+\r
+// Mode 1\r
+INTRA_16x16_HORIZONTAL:\r
+       mov (1)         PREF_LEFT_UD<1>:ud      INTRA_REF_LEFT_ID*GRFWIB*0x00010001+0x00040000:ud       // Set address registers for instruction compression\r
+    $for(0,0; <16; 2,8) {\r
+       mov (32)        PRED_YW(%1)<1>  r[PREF_LEFT,%2+3]<0;1,0>:ub {Compr}     // Actual left column reference data start at offset 3\r
+       }\r
+       jmpi (1) End_intra_Pred_16x16_Y\r
+\r
+// Mode 2\r
+INTRA_16x16_DC:\r
+    and.nz.f0.0 (8)    NULLREG         REG_INTRA_PRED_AVAIL_FLAG       INTRA_PRED_UP_AVAIL_FLAG:ud     // Top macroblock available for intra prediction?\r
+    and (8)                    acc0<1>:ud      REG_INTRA_PRED_AVAIL_FLAG       INTRA_PRED_LEFT_TH_AVAIL_FLAG+INTRA_PRED_LEFT_BH_AVAIL_FLAG:ud  // Left macroblock available for intra prediction?\r
+    xor.z.f0.1 (8)     NULLREG         acc0:ud INTRA_PRED_LEFT_TH_AVAIL_FLAG+INTRA_PRED_LEFT_BH_AVAIL_FLAG:ud  // Left macroblock available for intra prediction?\r
+// Rearrange reference samples for unified DC prediction code\r
+//\r
+       (-f0.0.any8h)   mov (8) INTRA_REF_TOP_W(0)<1>   0x8080:uw\r
+       (-f0.1.any8h)   mov (8) INTRA_REF_LEFT(0)<4>    INTRA_REF_TOP(0)REGION(8,1)\r
+       (-f0.1.any8h)   mov (8) INTRA_REF_LEFT(1)<4>    INTRA_REF_TOP(0,8)REGION(8,1)\r
+       (-f0.0.any8h)   mov (8) INTRA_REF_TOP(0)<1>             INTRA_REF_LEFT(0)REGION(8,4)\r
+       (-f0.0.any8h)   mov (8) INTRA_REF_TOP(0,8)<1>   INTRA_REF_LEFT(1)REGION(8,4)    // Split due to HW limitation\r
+// Perform DC prediction\r
+//\r
+       add (16)        PRED_YW(15)<1>  INTRA_REF_LEFT(0)REGION(8,4)    INTRA_REF_TOP(0)REGION(16,1)\r
+       add (8)         PRED_YW(15)<1>  PRED_YW(15)REGION(8,1)  PRED_YW(15,8)REGION(8,1)\r
+       add (4)         PRED_YW(15)<1>  PRED_YW(15)REGION(4,1)  PRED_YW(15,4)REGION(4,1)\r
+       add (2)         PRED_YW(15)<1>  PRED_YW(15)REGION(2,1)  PRED_YW(15,2)REGION(2,1)\r
+       add (32)        acc0<1>:w               PRED_YW(15)REGION(1,0)  PRED_YW(15,1)REGION(1,0) {Compr}        // Set up both acc0 and acc1\r
+       add     (32)    acc0<1>:w               acc0:w  16:w {Compr}\r
+\r
+    $for(0; <16; 2) {\r
+       shr (32)        PRED_YW(%1)<1>  acc0:w  5:w {Compr}\r
+       }\r
+       jmpi (1) End_intra_Pred_16x16_Y\r
+\r
+// Mode 3\r
+INTRA_16x16_PLANE:\r
+// Refer to H.264/AVC spec Section 8.3.3.4\r
+\r
+#define A              REG_INTRA_TEMP_2.0              // All are WORD type\r
+#define B              REG_INTRA_TEMP_3.0\r
+#define C              REG_INTRA_TEMP_3.1\r
+#define YP             REG_INTRA_TEMP_0                // Store intermediate results of c*(y-7). Make sure it's an even GRF\r
+#define YP1            REG_INTRA_TEMP_1                // Store intermediate results of c*(y-7). Make sure it's an odd GRF, used in {Comp}\r
+#define XP             REG_INTRA_TEMP_5                // Store intermediate results of a+b*(x-7)+16. Make sure it's an odd GRF\r
+\r
+// First Calculate constants H and V\r
+//     H1 = sum((-x'-1)*p[8+x',-1]), x'=0,1,...7\r
+//     H2 =  sum((-x'-1)*p[6-x',-1]), x'=7,6,...0\r
+//     H = -H1 + H2\r
+//     The same calculation holds for V\r
+//\r
+       mul (8) H1(0)<1>        INTRA_REF_TOP(0,8)REGION(8,1)           0x89ABCDEF:v\r
+       mul (8) H2(0)<1>        INTRA_REF_TOP(0,-1)REGION(8,1)          0xFEDCBA98:v\r
+\r
+       mul (8) V1(0)<1>        INTRA_REF_LEFT(0,8*4)REGION(8,4)        0x89ABCDEF:v\r
+       mul (8) V2(0)<1>        INTRA_REF_LEFT(0)REGION(8,4)            0x0FEDCBA9:v\r
+       mul (1) V2(0,7)<1>      INTRA_REF_TOP(0,-1)<0;1,0>      -8:w            // Replace 0*p[-1,7] with -8*p[-1,-1]\r
+       // Now, REG_INTRA_TEMP_0 holds [H2, -H1] and REG_INTRA_TEMP_1 holds [V2, -V1]\r
+\r
+       // Sum up [H2, -H1] and [V2, -V1] using instruction compression\r
+       // ExecSize = 16 is restricted by B-spec for instruction compression\r
+       // Actual intermediate results are in lower sub-registers after each summing step\r
+       add     (16)    H1(0)<1>        -H1(0)  H2(0)   {Compr} // Results in lower 8 WORDs\r
+       add     (16)    H1(0)<1>        H1(0)   H1(0,4) {Compr} // Results in lower 4 WORDs\r
+       add     (16)    H1(0)<1>        H1(0)   H1(0,2) {Compr} // Results in lower 2 WORDs\r
+       add     (16)    H1(0)<1>        H1(0)   H1(0,1) {Compr} // Results in lower 1 WORD\r
+\r
+//     Calculate a, b, c and further derivations\r
+       mov     (16)    acc0<1>:w       32:w\r
+       mac     (2)             acc0<1>:w       H1(0)<16;1,0>   5:w\r
+       shr     (2)             B<1>:w          acc0:w  6:w             // Done b,c\r
+       mov     (16)    acc0<1>:w       16:w\r
+       mac     (16)    acc0<1>:w       INTRA_REF_TOP(0,15)<0;1,0>      16:w\r
+       mac     (16)    A<1>:w          INTRA_REF_LEFT(0,15*4)<0;1,0>   16:w    // A = a+16\r
+       mac (16)        XP<1>:w         B<0;1,0>:w              XY_7<16;16,1>:b                 // XP = A+b*(x-7)\r
+       mul     (8)             YP<1>:w         C<0;1,0>:w              XY_7<16;8,2>:b                  // YP = c*(y-7), even portion\r
+       mul     (8)             YP1<1>:w        C<0;1,0>:w              XY_7_1<16;8,2>:b                // YP = c*(y-7), odd portion\r
+\r
+//     Finally the intra_16x16 plane prediction\r
+    $for(0,0; <16; 2,1) {\r
+       add (32)        acc0<1>:w               XP<16;16,1>:w   YP.%2<16;16,0>:w {Compr}        // Set Width!= 1 to trick EU to use YP_1.%2 for 2nd instruction\r
+       shr.sat (32)    PRED_Y(%1)<2>   acc0<16;16,1>:w 5:w {Compr}\r
+       }\r
+\r
+End_intra_Pred_16x16_Y:\r
+// End of intra_Pred_16x16_Y\r
diff --git a/i965_drv_video/shaders/h264/mc/loadRef_C_10x5.asm b/i965_drv_video/shaders/h264/mc/loadRef_C_10x5.asm
new file mode 100644 (file)
index 0000000..3c0e851
--- /dev/null
@@ -0,0 +1,57 @@
+/*\r
+ * Load reference 10x5 area for chroma NV12 4x4 MC\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Kernel name: LoadRef_C_10x5.asm\r
+//\r
+// Load reference 10x5 area for chroma NV12 4x4 MC\r
+\r
+\r
+//#if !defined(__LOADREF_C_10x5__)             // Make sure this is only included once\r
+//#define __LOADREF_C_10x5__\r
+\r
+\r
+#if 1\r
+\r
+       // Compute integer and fractional components of MV\r
+    asr (2)            gMVX_INT<1>:w           r[pMV,0]<2;2,1>:w                               0x03:w {NoDDClr}\r
+    and (2)            gMVX_FRACC<1>:w         r[pMV,0]<2;2,1>:w                               0x07:w {NoDDChk}\r
+    \r
+    // Check whether MVY is integer\r
+       or.z.f0.0 (8) null:w                    gMVY_FRACC<0;1,0>:w                             0:w\r
+       \r
+       // Compute top-left corner position to be loaded\r
+    mov (2)            gMSGSRC.0<1>:d          gMVX_INT<2;2,1>:w\r
+       shl (1)         gMSGSRC.0:d                     gMSGSRC.0:d                                             1:w\r
+\r
+       (f0.0) add (1)  pMSGDSC:ud              gMSGDSC_R:ud                                    RESP_LEN(2)+nBI_LC_DIFF:ud\r
+       (-f0.0) add (1) pMSGDSC:ud              gMSGDSC_R:ud                                    RESP_LEN(3)+nBI_LC_DIFF:ud\r
+       \r
+    // Read 16x5 pixels - TODO: Reading 12x5 instead of 16x5 took more time on CL. Why?\r
+    (f0.0) mov (1)     gMSGSRC.2:ud    0x00030009:ud                                   //{NoDDChk}\r
+    (-f0.0) mov (1)    gMSGSRC.2:ud    0x00040009:ud                                   //{NoDDChk}\r
+    send (8)   gudREFC(0)<1>       mMSGHDRC                                            gMSGSRC<8;8,1>:ud       DAPREAD pMSGDSC:ud\r
+\r
+#else\r
+\r
+       add (1)         pMSGDSC:ud                      gMSGDSC_R:ud                                    RESP_LEN(3)+nBI_LC_DIFF:ud\r
+\r
+       // Compute integer and fractional components of MV\r
+    asr (2)            gMVX_INT<1>:w           r[pMV,0]<2;2,1>:w                               0x03:w {NoDDClr}\r
+    and (2)            gMVX_FRACC<1>:w         r[pMV,0]<2;2,1>:w                               0x07:w {NoDDChk}\r
+       \r
+       // Compute top-left corner position to be loaded\r
+    mov (2)            gMSGSRC.0<1>:d          gMVX_INT<2;2,1>:w\r
+       shl (1)         gMSGSRC.0:d                     gMSGSRC.0:d                                             1:w\r
+\r
+    // Read 16x5 pixels\r
+    mov (1)            gMSGSRC.2:ud            0x00040009:ud                                   {NoDDChk}\r
+    send (8)   gudREFC(0)<1>       mMSGHDRC                                            gMSGSRC<8;8,1>:ud       DAPREAD pMSGDSC:ud\r
+#endif\r
+        \r
+//#endif       // !defined(__LOADREF_C_10x5__)\r
diff --git a/i965_drv_video/shaders/h264/mc/loadRef_C_6x3.asm b/i965_drv_video/shaders/h264/mc/loadRef_C_6x3.asm
new file mode 100644 (file)
index 0000000..5ed7b69
--- /dev/null
@@ -0,0 +1,38 @@
+/*\r
+ * Load reference 6x3 area for chroma NV12 4x4 MC\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Kernel name: LoadRef_C_6x3.asm\r
+//\r
+// Load reference 6x3 area for chroma NV12 4x4 MC\r
+\r
+\r
+//#if !defined(__LOADREF_C_6x3__)              // Make sure this is only included once\r
+//#define __LOADREF_C_6x3__\r
+\r
+\r
+#ifdef DEV_ILK\r
+       add (1)         pMSGDSC:ud                      gMSGDSC_R:ud                                    0x00100010:ud\r
+#else\r
+       add (1)         pMSGDSC:ud                      gMSGDSC_R:ud                                    0x00010010:ud\r
+#endif // DEV_ILK\r
+\r
+       // Compute integer and fractional components of MV\r
+    asr (2)            gMVX_INT<1>:w           r[pMV,0]<2;2,1>:w                               0x03:w {NoDDClr}\r
+    and (2)            gMVX_FRACC<1>:w         r[pMV,0]<2;2,1>:w                               0x07:w {NoDDChk}\r
+       \r
+       // Compute top-left corner position to be loaded\r
+    mov (2)            gMSGSRC.0<1>:d          gMVX_INT<2;2,1>:w\r
+       shl (1)         gMSGSRC.0:d                     gMSGSRC.0:d                                             1:w\r
+\r
+    // Read 8x3 pixels\r
+    mov (1)            gMSGSRC.2:ud            0x00020005:ud\r
+    send (8)   gudREFC(0)<1>       mMSGHDRC                                            gMSGSRC<8;8,1>:ud       DAPREAD pMSGDSC:ud\r
+\r
+        \r
+//#endif       // !defined(__LOADREF_C_6x3__)\r
diff --git a/i965_drv_video/shaders/h264/mc/loadRef_Y_16x13.asm b/i965_drv_video/shaders/h264/mc/loadRef_Y_16x13.asm
new file mode 100644 (file)
index 0000000..b233ea1
--- /dev/null
@@ -0,0 +1,127 @@
+/*\r
+ * Load reference 16x13 area for luma 8x8 MC\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Kernel name: LoadRef_Y_16x13.asm\r
+//\r
+// Load reference 16x13 area for luma 8x8 MC\r
+\r
+\r
+//#if !defined(__LOADREF_Y_16x13__)            // Make sure this is only included once\r
+//#define __LOADREF_Y_16x13__\r
+\r
+#if 1\r
+\r
+#if 1\r
+\r
+       // Check whether MVX is integer MV\r
+       and.z.f0.0 (1) null:w                   r[pMV,0]<0;1,0>:w                               0x3:w\r
+\r
+       // Compute integer and fractional components of MV\r
+    and (2)            gMVX_FRAC<1>:w          r[pMV,0]<2;2,1>:w                               0x03:w  //{NoDDClr}\r
+    asr (2)            gMVX_INT<1>:w           r[pMV,0]<2;2,1>:w                               0x02:w  //{NoDDChk}\r
+    \r
+    // Check whether MVY is integer\r
+       or.z.f0.1 (8) null:w                    gMVY_FRAC<0;1,0>:w                              0:w\r
+       \r
+       // If MVX is a multiple of 4 (..., -4, 0, 4, ...) integer MV, do special handling\r
+       (f0.0)  jmpi (1)        INTERLABEL(LOADREF_MVXZERO)     \r
+       \r
+    // Set message descriptor\r
+    (f0.1) add (1)     pMSGDSC:ud              gMSGDSC_R:ud                                    RESP_LEN(4):ud  \r
+       (-f0.1) add (1) pMSGDSC:ud              gMSGDSC_R:ud                                    RESP_LEN(7):ud\r
+       \r
+       // Compute top-left corner position to be loaded\r
+       // TODO: sel\r
+    (-f0.1) add (2)    gMSGSRC.0<1>:d  gMVX_INT<2;2,1>:w                               -0x02:d //{NoDDClr}\r
+    (-f0.1) mov (1)    gMSGSRC.2:ud    0x000c000c:ud                                                   //{NoDDChk}\r
+    (f0.1) add (1)     gMSGSRC.0<1>:d  gMVX_INT<0;1,0>:w                               -0x02:d //{NoDDClr}\r
+       (f0.1) mov (1)  gMSGSRC.1<1>:d  gMVY_INT<0;1,0>:w                                               //{NoDDChk,NoDDClr}\r
+    (f0.1) mov (1)     gMSGSRC.2:ud    0x0007000c:ud                                                   //{NoDDChk}\r
+\r
+    // Read 16x13 pixels\r
+    send (8)   gudREF(0)<1>            mMSGHDRY                                                gMSGSRC<8;8,1>:ud       DAPREAD pMSGDSC:ud\r
+    \r
+    jmpi INTERLABEL(EXIT_LOADREF_Y_16x13)\r
+    \r
+INTERLABEL(LOADREF_MVXZERO):\r
+    \r
+    // Set message descriptor\r
+#ifdef DEV_ILK\r
+    (f0.1) add (1)     pMSGDSC:ud              gMSGDSC_R:ud                                    0x00200000:ud   \r
+       (-f0.1) add (1) pMSGDSC:ud              gMSGDSC_R:ud                                    0x00400000:ud\r
+#else\r
+    (f0.1) add (1)     pMSGDSC:ud              gMSGDSC_R:ud                                    0x00020000:ud   \r
+       (-f0.1) add (1) pMSGDSC:ud              gMSGDSC_R:ud                                    0x00040000:ud\r
+#endif\r
+       \r
+       // Compute top-left corner position to be loaded\r
+       // TODO: sel\r
+       mov (2) gMSGSRC.0<1>:d  gMVX_INT<2;2,1>:w\r
+    (-f0.1) add (1)    gMSGSRC.1<1>:d  gMVY_INT<0;1,0>:w                               -0x02:d\r
+    (-f0.1) mov (1)    gMSGSRC.2:ud    0x000c0007:ud                                                   //{NoDDChk}\r
+    (f0.1) mov (1)     gMSGSRC.2:ud    0x00070007:ud                                                   //{NoDDChk}\r
+\r
+    // Read 16x13 pixels\r
+    send (8)   gudREF(0)<1>            mMSGHDRY                                                gMSGSRC<8;8,1>:ud       DAPREAD pMSGDSC:ud\r
+    \r
+\r
+#else\r
+       // Compute integer and fractional components of MV\r
+    and (2)            gMVX_FRAC<1>:w          r[pMV,0]<2;2,1>:w                               0x03:w  //{NoDDClr}\r
+    asr (2)            gMVX_INT<1>:w           r[pMV,0]<2;2,1>:w                               0x02:w  //{NoDDChk}\r
+    \r
+    // Check whether MVY is integer\r
+       or.z.f0.1 (8) null:w                    gMVY_FRAC<0;1,0>:w                              0:w\r
+       \r
+    // Set message descriptor\r
+#ifdef DEV_ILK\r
+    (f0.1) add (1)     pMSGDSC:ud              gMSGDSC_R:ud                                    0x00400000:ud   \r
+       (-f0.1) add (1) pMSGDSC:ud              gMSGDSC_R:ud                                    0x00700000:ud\r
+#else\r
+    (f0.1) add (1)     pMSGDSC:ud              gMSGDSC_R:ud                                    0x00040000:ud   \r
+       (-f0.1) add (1) pMSGDSC:ud              gMSGDSC_R:ud                                    0x00070000:ud\r
+#endif\r
+       \r
+       // Compute top-left corner position to be loaded\r
+       // TODO: sel\r
+    (-f0.1) add (2)    gMSGSRC.0<1>:d  gMVX_INT<2;2,1>:w                               -0x02:d //{NoDDClr}\r
+    (-f0.1) mov (1)    gMSGSRC.2:ud    0x000c000c:ud                                                   //{NoDDChk}\r
+    (f0.1) add (1)     gMSGSRC.0<1>:d  gMVX_INT<0;1,0>:w                               -0x02:d //{NoDDClr}\r
+       (f0.1) mov (1)  gMSGSRC.1<1>:d  gMVY_INT<0;1,0>:w                                               //{NoDDChk,NoDDClr}\r
+    (f0.1) mov (1)     gMSGSRC.2:ud    0x0007000c:ud                                                   //{NoDDChk}\r
+\r
+    // Read 16x13 pixels\r
+    send (8)   gudREF(0)<1>            mMSGHDRY                                                gMSGSRC<8;8,1>:ud       DAPREAD pMSGDSC:ud\r
+#endif\r
+    \r
+#else   \r
+\r
+       // Compute integer and fractional components of MV\r
+    and (2)            gMVX_FRAC<1>:w          r[pMV,0]<2;2,1>:w                               0x03:w  {NoDDClr} //\r
+    asr (2)            gMVX_INT<1>:w           r[pMV,0]<2;2,1>:w                               0x02:w  {NoDDChk} //\r
+    \r
+    // Set message descriptor\r
+#ifdef DEV_ILK\r
+    add (1)            pMSGDSC:ud                      gMSGDSC_R:ud                                    0x00700000:ud\r
+#else\r
+    add (1)            pMSGDSC:ud                      gMSGDSC_R:ud                                    0x00070000:ud\r
+#endif // DEV_ILK\r
+    \r
+       // Compute top-left corner position to be loaded\r
+    add (2)            gMSGSRC.0<1>:d          gMVX_INT<2;2,1>:w                               -0x02:d {NoDDClr} //\r
+    mov (1)            gMSGSRC.2:ud            0x000c000c:ud                                                   {NoDDChk} //\r
+\r
+    // Read 16x13 pixels\r
+    send (8)   gudREF(0)<1>        mMSGHDRY                                            gMSGSRC<8;8,1>:ud       DAPREAD pMSGDSC:ud\r
+    \r
+#endif\r
+\r
+INTERLABEL(EXIT_LOADREF_Y_16x13):\r
+\r
+//#endif       // !defined(__LOADREF_Y_16x13__)\r
diff --git a/i965_drv_video/shaders/h264/mc/loadRef_Y_16x9.asm b/i965_drv_video/shaders/h264/mc/loadRef_Y_16x9.asm
new file mode 100644 (file)
index 0000000..e48151e
--- /dev/null
@@ -0,0 +1,61 @@
+/*\r
+ * Load reference 16x9 area for luma 4x4 MC\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Kernel name: LoadRef_Y_16x9.asm\r
+//\r
+// Load reference 16x9 area for luma 4x4 MC\r
+\r
+\r
+//#if !defined(__LOADREF_Y_16x9__)             // Make sure this is only included once\r
+//#define __LOADREF_Y_16x9__\r
+\r
+#if 1\r
+\r
+       // Compute integer and fractional components of MV\r
+    and (2)            gMVX_FRAC<1>:w          r[pMV,0]<2;2,1>:w                               0x03:w  //{NoDDClr}\r
+    asr (2)            gMVX_INT<1>:w           r[pMV,0]<2;2,1>:w                               0x02:w  //{NoDDChk}\r
\r
+    // Check whether MVY is integer\r
+       or.z.f0.1 (8) null:w                    gMVY_FRAC<0;1,0>:w                              0:w\r
+          \r
+       // Set message descriptor\r
+       (f0.1) add (1)  pMSGDSC:ud              gMSGDSC_R:ud                                    RESP_LEN(2):ud\r
+       (-f0.1) add (1) pMSGDSC:ud              gMSGDSC_R:ud                                    RESP_LEN(5):ud\r
+\r
+       // Compute top-left corner position to be loaded\r
+       // TODO: sel\r
+    (-f0.1) add (2)    gMSGSRC.0<1>:d  gMVX_INT<2;2,1>:w                               -0x02:d //{NoDDClr}\r
+    (-f0.1) mov (1)    gMSGSRC.2:ud    0x00080008:ud                                                   //{NoDDChk}\r
+    (f0.1) add (1)     gMSGSRC.0<1>:d  gMVX_INT<0;1,0>:w                               -0x02:d //{NoDDClr}\r
+       (f0.1) mov (1)  gMSGSRC.1<1>:d  gMVY_INT<0;1,0>:w                                               //{NoDDChk,NoDDClr}\r
+    (f0.1) mov (1)     gMSGSRC.2:ud    0x00030008:ud                                                   //{NoDDChk}\r
+\r
+    // Read 16x9 pixels\r
+    send (8)   gudREF(0)<1>        mMSGHDRY                                            gMSGSRC<8;8,1>:ud       DAPREAD pMSGDSC:ud\r
+\r
+#else\r
+\r
+       // Compute integer and fractional components of MV\r
+    and (2)            gMVX_FRAC<1>:w          r[pMV,0]<2;2,1>:w                               0x03:w {NoDDClr} //\r
+    asr (2)            gMVX_INT<1>:w           r[pMV,0]<2;2,1>:w                               0x02:w {NoDDChk} //\r
+\r
+       // Set message descriptor\r
+       add (1)         pMSGDSC:ud                      gMSGDSC_R:ud                                    RESP_LEN(5):ud\r
+    \r
+       // Compute top-left corner position to be loaded \r
+    add (2)            gMSGSRC.0<1>:d          gMVX_INT<2;2,1>:w                               -0x02:d {NoDDClr} //\r
+    mov (1)            gMSGSRC.2:ud            0x00080008:ud                                                   {NoDDChk} //\r
+\r
+    // Read 16x9 pixels\r
+    send (8)   gudREF(0)<1>        mMSGHDRY                                            gMSGSRC<8;8,1>:ud       DAPREAD pMSGDSC:ud\r
+\r
+#endif\r
+\r
+        \r
+//#endif       // !defined(__LOADREF_Y_16x9__)\r
diff --git a/i965_drv_video/shaders/h264/mc/load_Intra_Ref_UV.asm b/i965_drv_video/shaders/h264/mc/load_Intra_Ref_UV.asm
new file mode 100644 (file)
index 0000000..34adbe6
--- /dev/null
@@ -0,0 +1,44 @@
+/*\r
+ * Load all reference U/V samples from neighboring macroblocks\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+#if !defined(__LOAD_INTRA_REF_UV__)            // Make sure this is only included once\r
+#define __LOAD_INTRA_REF_UV__\r
+\r
+// Module name: load_Intra_Ref_UV.asm\r
+//\r
+// Load all reference U/V samples from neighboring macroblocks\r
+//\r
+// Note: Since loading of U/V data always follows writing of Y, the message descriptor is manipulated\r
+// to avoid recalculating due to frame/field variztions.\r
+\r
+// First load top 20x1 row U/V reference samples\r
+// 4 from macroblock D (actually use 2), 16 from macroblock B\r
+//\r
+    shr        (1)     I_ORIY<1>:w             I_ORIY<0;1,0>:w 1:w             // Adjust I_ORIY for NV12 format\r
+    add        (2)     MSGSRC.0<1>:d   I_ORIX<2;2,1>:w TOP_REF_OFFSET<2;2,1>:b {NoDDClr}       // Reference samples positioned at (-4, -1)\r
+    mov (1)    MSGSRC.2:ud             0x00000013:ud {NoDDChk}                 // Block width and height (20x1)\r
+\r
+//  Update message descriptor based on previous Y block write\r
+//\r
+#ifdef DEV_ILK\r
+    add (1)    MSGDSC  MSGDSC  RESP_LEN(1)+DWBRMSGDSC_RC+DESTUV-DWBWMSGDSC_WC-0x10000000-DESTY:ud  // Set message descriptor\r
+#else\r
+    add (1)    MSGDSC  MSGDSC  RESP_LEN(1)+DWBRMSGDSC_RC+DESTUV-DWBWMSGDSC_WC-0x00800000-DESTY:ud  // Set message descriptor\r
+#endif // DEV_ILK\r
+\r
+    send (8)   INTRA_REF_TOP_D(0)      MSGHDR  MSGSRC<8;8,1>:ud        DAPREAD MSGDSC\r
+\r
+// Then load left 4x8 reference samples (actually use 1x8 column)\r
+//\r
+    add        (1)     MSGSRC.1<1>:d   MSGSRC.1<0;1,0>:d       1:w {NoDDClr}   // Reference samples positioned next row\r
+    mov (1)    MSGSRC.2:ud             0x00070003:ud {NoDDChk}                 // Block width and height (4x8)\r
+    send (8)   INTRA_REF_LEFT_D(0)     MSGHDRUV        MSGSRC<8;8,1>:ud        DAPREAD MSGDSC\r
+\r
+// End of load_Intra_Ref_UV\r
+#endif // !defined(__LOAD_INTRA_REF_UV__)\r
diff --git a/i965_drv_video/shaders/h264/mc/load_Intra_Ref_Y.asm b/i965_drv_video/shaders/h264/mc/load_Intra_Ref_Y.asm
new file mode 100644 (file)
index 0000000..de8ec49
--- /dev/null
@@ -0,0 +1,37 @@
+/*\r
+ * Load all reference Y samples from neighboring macroblocks\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+#if !defined(__LOAD_INTRA_REF_Y__)             // Make sure this is only included once\r
+#define __LOAD_INTRA_REF_Y__\r
+\r
+// Module name: load_Intra_Ref_Y.asm\r
+//\r
+// Load all reference Y samples from neighboring macroblocks\r
+//\r
+load_Intra_Ref_Y:\r
+//     shl (2) I_ORIX<1>:uw    ORIX<2;2,1>:ub  4:w             // Convert MB origin to pixel unit\r
+\r
+// First load top 28x1 row reference samples\r
+// 4 from macroblock D (actually use 1), 16 from macroblock B, and 8 from macroblock C\r
+//\r
+    add        (2)     MSGSRC.0<1>:d   I_ORIX<2;2,1>:w TOP_REF_OFFSET<2;2,1>:b {NoDDClr}       // Reference samples positioned at (-4, -1)\r
+    mov (1)    MSGSRC.2:ud             0x0000001B:ud {NoDDChk}                                                         // Block width and height (28x1)\r
+    add (1)    MSGDSC  REG_MBAFF_FIELD<0;1,0>:uw       RESP_LEN(1)+DWBRMSGDSC_RC+DESTY:ud  // Set message descriptor\r
+    send (8)   INTRA_REF_TOP_D(0)      MSGHDRY0        MSGSRC<8;8,1>:ud        DAPREAD MSGDSC\r
+\r
+// Then load left 4x16 reference samples (actually use 1x16 column)\r
+//\r
+    add        (1)     MSGSRC.1<1>:d   MSGSRC.1<0;1,0>:d       1:w {NoDDClr}   // Reference samples positioned next row\r
+    mov (1)    MSGSRC.2:ud             0x00F0003:ud    {NoDDChk}                       // Block width and height (4x16)\r
+    add (1)    MSGDSC                  MSGDSC  RESP_LEN(1):ud  // Need to read 1 more GRF register\r
+    send (8)   INTRA_REF_LEFT_D(0)     MSGHDRY1        MSGSRC<8;8,1>:ud        DAPREAD MSGDSC\r
+\r
+       RETURN\r
+// End of load_Intra_Ref_Y\r
+#endif // !defined(__LOAD_INTRA_REF_Y__)\r
diff --git a/i965_drv_video/shaders/h264/mc/null.g4a b/i965_drv_video/shaders/h264/mc/null.g4a
new file mode 100644 (file)
index 0000000..f138029
--- /dev/null
@@ -0,0 +1,43 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Author:
+ *    Zou Nan hai <nanhai.zou@intel.com>
+ *    Xiang Haihao <haihao.xiang@intel.com>
+ *
+ */
+define(`YUV_color',`0xFFFFFFFFUD')
+shl(2) g62.0<1>UD g3.4<2,2,1>UB 4UW {align1};
+mov(1) g62.8<1>UD 0x000f000fUD {align1};
+mov(16) m1<1>UD YUV_color {align1 compr};
+mov(16) m3<1>UD YUV_color {align1 compr};
+mov(16) m5<1>UD YUV_color {align1 compr};
+mov(16) m7<1>UD YUV_color {align1 compr};
+send(16) 0 acc0<1>UW g62<8,8,1>UW write(0, 0, 2, 0) mlen 9 rlen 0 {align1};
+
+shr(1) g62.4<1>UD g62.4<1,1,1>UD 1UW {align1};
+mov(1) g62.8<1>UD 0x0007000fUD {align1};
+mov(16) m1<1>UD YUV_color {align1 compr};
+mov(16) m3<1>UD YUV_color {align1 compr};
+send(16) 0 acc0<1>UW g62<8,8,1>UW write(1, 0, 2, 0) mlen 5 rlen 0 {align1};
+
+send(16) 0 acc0<1>UW g0<8,8,1>UW thread_spawner(0, 0, 0) mlen 1 rlen 0 {align1 EOT};
diff --git a/i965_drv_video/shaders/h264/mc/null.g4b b/i965_drv_video/shaders/h264/mc/null.g4b
new file mode 100644 (file)
index 0000000..fdd18e6
--- /dev/null
@@ -0,0 +1,13 @@
+   { 0x00200009, 0x27c02e21, 0x00450064, 0x00040004 },
+   { 0x00000001, 0x27c80061, 0x00000000, 0x000f000f },
+   { 0x00802001, 0x20200062, 0x00000000, 0xffffffff },
+   { 0x00802001, 0x20600062, 0x00000000, 0xffffffff },
+   { 0x00802001, 0x20a00062, 0x00000000, 0xffffffff },
+   { 0x00802001, 0x20e00062, 0x00000000, 0xffffffff },
+   { 0x00800031, 0x24001d28, 0x008d07c0, 0x05902000 },
+   { 0x00000008, 0x27c42c21, 0x002107c4, 0x00010001 },
+   { 0x00000001, 0x27c80061, 0x00000000, 0x0007000f },
+   { 0x00802001, 0x20200062, 0x00000000, 0xffffffff },
+   { 0x00802001, 0x20600062, 0x00000000, 0xffffffff },
+   { 0x00800031, 0x24001d28, 0x008d07c0, 0x05502001 },
+   { 0x00800031, 0x24001d28, 0x008d0000, 0x87100000 },
diff --git a/i965_drv_video/shaders/h264/mc/null.g4b.gen5 b/i965_drv_video/shaders/h264/mc/null.g4b.gen5
new file mode 100644 (file)
index 0000000..7ecb90d
--- /dev/null
@@ -0,0 +1,13 @@
+   { 0x00200009, 0x27c02e21, 0x00450064, 0x00040004 },
+   { 0x00000001, 0x27c80061, 0x00000000, 0x000f000f },
+   { 0x00802001, 0x20200062, 0x00000000, 0xffffffff },
+   { 0x00802001, 0x20600062, 0x00000000, 0xffffffff },
+   { 0x00802001, 0x20a00062, 0x00000000, 0xffffffff },
+   { 0x00802001, 0x20e00062, 0x00000000, 0xffffffff },
+   { 0x00800031, 0x24001d28, 0x508d07c0, 0x12082000 },
+   { 0x00000008, 0x27c42c21, 0x002107c4, 0x00010001 },
+   { 0x00000001, 0x27c80061, 0x00000000, 0x0007000f },
+   { 0x00802001, 0x20200062, 0x00000000, 0xffffffff },
+   { 0x00802001, 0x20600062, 0x00000000, 0xffffffff },
+   { 0x00800031, 0x24001d28, 0x508d07c0, 0x0a082001 },
+   { 0x00800031, 0x24001d28, 0x748d0000, 0x82000000 },
diff --git a/i965_drv_video/shaders/h264/mc/recon_C_4x4.asm b/i965_drv_video/shaders/h264/mc/recon_C_4x4.asm
new file mode 100644 (file)
index 0000000..3a2a921
--- /dev/null
@@ -0,0 +1,37 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Kernel name: Recon_C_4x4.asm\r
+//\r
+//  $Revision: 11 $\r
+//  $Date: 10/03/06 5:28p $\r
+//\r
+\r
+\r
+//#if !defined(__RECON_C_4x4__)                // Make sure this is only included once\r
+//#define __RECON_C_4x4__\r
+\r
+\r
+       // TODO: Use instruction compression\r
+       add.sat (4) r[pERRORC,0]<2>:ub                  r[pERRORC,0]<4;4,1>:w                   gubCPRED(0)<16;4,4>\r
+       add.sat (4) r[pERRORC,128]<2>:ub                r[pERRORC,128]<4;4,1>:w                 gubCPRED(0,2)<16;4,4>\r
+       add.sat (4) r[pERRORC,32]<2>:ub                 r[pERRORC,32]<4;4,1>:w                  gubCPRED(1)<16;4,4>\r
+       add.sat (4) r[pERRORC,128+32]<2>:ub             r[pERRORC,128+32]<4;4,1>:w              gubCPRED(1,2)<16;4,4>\r
+       \r
+       add.sat (4) r[pERRORC,16]<2>:ub                 r[pERRORC,16]<4;4,1>:w                  gubCPRED(0,16)<16;4,4>\r
+       add.sat (4) r[pERRORC,128+16]<2>:ub             r[pERRORC,128+16]<4;4,1>:w              gubCPRED(0,18)<16;4,4>\r
+       add.sat (4) r[pERRORC,48]<2>:ub                 r[pERRORC,48]<4;4,1>:w                  gubCPRED(1,16)<16;4,4>\r
+       add.sat (4) r[pERRORC,128+48]<2>:ub             r[pERRORC,128+48]<4;4,1>:w              gubCPRED(1,18)<16;4,4>\r
+\r
+       // Increase chroma error block offset   \r
+#ifndef MONO\r
+       add (1)         pERRORC:w                       pERRORC:w                                               8:w\r
+#endif\r
+\r
+        \r
+//#endif       // !defined(__RECON_C_4x4__)\r
diff --git a/i965_drv_video/shaders/h264/mc/recon_Y_8x8.asm b/i965_drv_video/shaders/h264/mc/recon_Y_8x8.asm
new file mode 100644 (file)
index 0000000..6017790
--- /dev/null
@@ -0,0 +1,27 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Kernel name: Recon_Y_8x8.asm\r
+//\r
+//  $Revision: 10 $\r
+//  $Date: 9/22/06 2:50p $\r
+//\r
+\r
+\r
+//#if !defined(__RECON_Y_8x8__)                // Make sure this is only included once\r
+//#define __RECON_Y_8x8__\r
+\r
+\r
+       add.sat (16)            r[pERRORY,0]<2>:ub                      r[pERRORY,0]<16;16,1>:w                         gubYPRED(0)\r
+       add.sat (16)            r[pERRORY,nGRFWIB]<2>:ub        r[pERRORY,nGRFWIB]<16;16,1>:w           gubYPRED(1)\r
+       add.sat (16)            r[pERRORY,nGRFWIB*2]<2>:ub      r[pERRORY,nGRFWIB*2]<16;16,1>:w         gubYPRED(2)\r
+       add.sat (16)            r[pERRORY,nGRFWIB*3]<2>:ub      r[pERRORY,nGRFWIB*3]<16;16,1>:w         gubYPRED(3)\r
+       \r
+       add (1)                         pERRORY:w                                       pERRORY:w                                                       128:w\r
+\r
+//#endif       // !defined(__RECON_Y_8x8__)\r
diff --git a/i965_drv_video/shaders/h264/mc/roundShift_C_4x4.asm b/i965_drv_video/shaders/h264/mc/roundShift_C_4x4.asm
new file mode 100644 (file)
index 0000000..c609159
--- /dev/null
@@ -0,0 +1,26 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+//     Kernel name: RoundShift_C_4x4.asm\r
+//\r
+//     Do (...+32)>>6 to 4x4 (NV12 8x4) interpolated chrominance data\r
+//\r
+\r
+\r
+//#if !defined(__RoundShift_C_4x4__)           // Make sure this is only included once\r
+//#define __RoundShift_C_4x4__\r
+\r
+\r
+       // TODO: Optimize using instruction compression\r
+       add (16)        acc0<1>:w                                       r[pRESULT,0]<16;16,1>:w                 32:w\r
+       add (16)        acc1<1>:w                                       r[pRESULT,nGRFWIB]<16;16,1>:w   32:w\r
+       asr.sat (16) r[pRESULT,0]<2>:ub                 acc0<16;16,1>:w                                 6:w\r
+       asr.sat (16) r[pRESULT,nGRFWIB]<2>:ub   acc1<16;16,1>:w                                 6:w\r
+       \r
+\r
+//#endif       // !defined(__RoundShift_C_4x4__)\r
diff --git a/i965_drv_video/shaders/h264/mc/save_16x16_Y.asm b/i965_drv_video/shaders/h264/mc/save_16x16_Y.asm
new file mode 100644 (file)
index 0000000..713e12c
--- /dev/null
@@ -0,0 +1,42 @@
+/*\r
+ * Save decoded Y picture data to frame buffer\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+#if !defined(__SAVE_16X16_Y__)         // Make sure this is only included once\r
+#define __SAVE_16X16_Y__\r
+\r
+// Module name: save_16x16_Y.asm\r
+//\r
+// Save decoded Y picture data to frame buffer\r
+//\r
+\r
+save_16x16_Y:\r
+\r
+    mov (1) MSGSRC.2:ud                0x000F000F:ud {NoDDClr}         // Block width and height (16x16)\r
+    mov (2) MSGSRC.0:ud                I_ORIX<2;2,1>:w {NoDDChk}       // X, Y offset\r
+#ifdef DEV_ILK\r
+    add (1)            MSGDSC  MSGDSC MSG_LEN(8)+DWBWMSGDSC_WC-DWBRMSGDSC_RC-0x00200000:ud  // Set message descriptor\r
+#else\r
+    add (1)            MSGDSC  MSGDSC MSG_LEN(8)+DWBWMSGDSC_WC-DWBRMSGDSC_RC-0x00020000:ud  // Set message descriptor\r
+#endif // DEV_ILK\r
+\r
+    mov (1) PDECBUF_UD<1>:ud   0x10001*DECBUF*GRFWIB+0x00400000:ud     // Pointers to row 0 and 2 of decoded data\r
+\r
+    $for(0,0; <8; 2,4) {\r
+       mov (32)        MSGPAYLOAD(%1)<1>       r[PDECBUF, %2*GRFWIB]REGION(16,2) {Compr}               // Block Y0/Y2\r
+       mov (32)        MSGPAYLOAD(%1,16)<1>    r[PDECBUF, (1+%2)*GRFWIB]REGION(16,2) {Compr}   // Block Y1/Y3\r
+    }\r
+\r
+//  Update message descriptor based on previous read setup\r
+//\r
+    send (8)   REG_WRITE_COMMIT_Y<1>:ud        MSGHDR  MSGSRC<8;8,1>:ud        DAPWRITE        MSGDSC\r
+\r
+    RETURN\r
+// End of save_16x16_Y\r
+\r
+#endif // !defined(__SAVE_16X16_Y__)\r
diff --git a/i965_drv_video/shaders/h264/mc/save_4x4_Y.asm b/i965_drv_video/shaders/h264/mc/save_4x4_Y.asm
new file mode 100644 (file)
index 0000000..415034a
--- /dev/null
@@ -0,0 +1,43 @@
+/*\r
+ * Save Intra_4x4 decoded Y picture data to frame buffer\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+#if !defined(__SAVE_4X4_Y__)           // Make sure this is only included once\r
+#define __SAVE_4X4_Y__\r
+\r
+// Module name: save_4x4_Y.asm\r
+//\r
+// Save Intra_4x4 decoded Y picture data to frame buffer\r
+// Note: Each 4x4 block is stored in 1 GRF register in the order of block raster scan order,\r
+// i.e. 0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15\r
+\r
+save_4x4_Y:\r
+\r
+    mov (1) MSGSRC.2:ud                0x000F000F:ud {NoDDClr}         // Block width and height (16x16)\r
+    mov (2) MSGSRC.0:ud                I_ORIX<2;2,1>:w {NoDDChk}       // X, Y offset\r
+#ifdef DEV_ILK\r
+    add (1)            MSGDSC  MSGDSC MSG_LEN(8)+DWBWMSGDSC_WC-DWBRMSGDSC_RC-0x00200000:ud  // Set message descriptor\r
+#else\r
+    add (1)            MSGDSC  MSGDSC MSG_LEN(8)+DWBWMSGDSC_WC-DWBRMSGDSC_RC-0x00020000:ud  // Set message descriptor\r
+#endif // DEV_ILK\r
+\r
+    $for(0; <8; 2) {\r
+       mov (16)        MSGPAYLOAD(%1)<1>               DEC_Y(%1)<16;4,1>\r
+       mov (16)        MSGPAYLOAD(%1,16)<1>    DEC_Y(%1,4)<16;4,1>\r
+       mov (16)        MSGPAYLOAD(%1+1)<1>             DEC_Y(%1,8)<16;4,1>\r
+       mov (16)        MSGPAYLOAD(%1+1,16)<1>  DEC_Y(%1,12)<16;4,1>\r
+    }\r
+\r
+//  Update message descriptor based on previous read setup\r
+//\r
+    send (8)   REG_WRITE_COMMIT_Y<1>:ud        MSGHDR  MSGSRC<8;8,1>:ud        DAPWRITE        MSGDSC\r
+\r
+    RETURN\r
+// End of save_4x4_Y\r
+\r
+#endif // !defined(__SAVE_4X4_Y__)\r
diff --git a/i965_drv_video/shaders/h264/mc/save_8x8_UV.asm b/i965_drv_video/shaders/h264/mc/save_8x8_UV.asm
new file mode 100644 (file)
index 0000000..aa76af9
--- /dev/null
@@ -0,0 +1,51 @@
+/*\r
+ * Save decoded U/V picture data to frame buffer\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+#if !defined(__SAVE_8x8_UV__)          // Make sure this is only included once\r
+#define __SAVE_8x8_UV__\r
+\r
+// Module name: save_8x8_UV.asm\r
+//\r
+// Save decoded U/V picture data to frame buffer\r
+//\r
+\r
+    mov (1) MSGSRC.2:ud            0x0007000F:ud {NoDDClr}             // Block width and height (16x8)\r
+    mov (2) MSGSRC.0<1>:ud  I_ORIX<2;2,1>:w    {NoDDChk}       // I_ORIX has already been adjusted for NV12\r
+\r
+//  Update message descriptor based on previous read setup\r
+//\r
+#ifdef DEV_ILK\r
+    add (1)            MSGDSC  MSGDSC MSG_LEN(4)+DWBWMSGDSC_WC-DWBRMSGDSC_RC-0x00100000:ud  // Set message descriptor\r
+#else\r
+    add (1)            MSGDSC  MSGDSC MSG_LEN(4)+DWBWMSGDSC_WC-DWBRMSGDSC_RC-0x00010000:ud  // Set message descriptor\r
+#endif // DEV_ILK\r
+\r
+// Write U/V picture data\r
+//\r
+#ifndef MONO\r
+    mov            MSGPAYLOAD(0,0)<1>  DEC_UV(0)REGION(16,2)   // U/V row 0\r
+    mov            MSGPAYLOAD(0,16)<1> DEC_UV(1)REGION(16,2)   // U/V row 1\r
+    mov            MSGPAYLOAD(1,0)<1>  DEC_UV(2)REGION(16,2)   // U/V row 2\r
+    mov            MSGPAYLOAD(1,16)<1> DEC_UV(3)REGION(16,2)   // U/V row 3\r
+    mov            MSGPAYLOAD(2,0)<1>  DEC_UV(4)REGION(16,2)   // U/V row 4\r
+    mov            MSGPAYLOAD(2,16)<1> DEC_UV(5)REGION(16,2)   // U/V row 5\r
+    mov            MSGPAYLOAD(3,0)<1>  DEC_UV(6)REGION(16,2)   // U/V row 6\r
+    mov            MSGPAYLOAD(3,16)<1> DEC_UV(7)REGION(16,2)   // U/V row 7\r
+#else  // defined(MONO)\r
+    $for(0; <4; 2) {\r
+       mov (16)        MSGPAYLOADD(%1)<1>              0x80808080:ud {Compr}\r
+       }\r
+\r
+#endif // !defined(MONO)\r
+\r
+       send (8)        REG_WRITE_COMMIT_UV<1>:ud       MSGHDR  MSGSRC<8;8,1>:ud        DAPWRITE        MSGDSC\r
+\r
+// End of save_8x8_UV\r
+\r
+#endif // !defined(__SAVE_8x8_UV__)\r
diff --git a/i965_drv_video/shaders/h264/mc/save_8x8_Y.asm b/i965_drv_video/shaders/h264/mc/save_8x8_Y.asm
new file mode 100644 (file)
index 0000000..3ffca79
--- /dev/null
@@ -0,0 +1,56 @@
+/*\r
+ * Save Intra_8x8 decoded Y picture data to frame buffer\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+#if !defined(__SAVE_8X8_Y__)           // Make sure this is only included once\r
+#define __SAVE_8X8_Y__\r
+\r
+// Module name: save_8x8_Y.asm\r
+//\r
+// Save Intra_8x8 decoded Y picture data to frame buffer\r
+// NotE: Every 4 rows of Y data are interleaved with the horizontal neighboring blcok\r
+//\r
+save_8x8_Y:\r
+\r
+    mov (1) MSGSRC.2:ud                0x000F000F:ud {NoDDClr}         // Block width and height (16x16)\r
+    mov (2) MSGSRC.0:ud                I_ORIX<2;2,1>:w {NoDDChk}       // X, Y offset\r
+\r
+//  Update message descriptor based on previous read setup\r
+//\r
+#ifdef DEV_ILK\r
+    add (1)            MSGDSC  MSGDSC MSG_LEN(8)+DWBWMSGDSC_WC-DWBRMSGDSC_RC-0x00200000:ud  // Set message descriptor\r
+#else\r
+    add (1)            MSGDSC  MSGDSC MSG_LEN(8)+DWBWMSGDSC_WC-DWBRMSGDSC_RC-0x00020000:ud  // Set message descriptor\r
+#endif // DEV_ILK\r
+\r
+       mov (16)        MSGPAYLOAD(0)<1>        DEC_Y(0)<32;8,1>\r
+       mov (16)        MSGPAYLOAD(0,16)<1>     DEC_Y(0,8)<32;8,1>\r
+       mov (16)        MSGPAYLOAD(1,0)<1>      DEC_Y(0,16)<32;8,1>\r
+       mov (16)        MSGPAYLOAD(1,16)<1>     DEC_Y(0,24)<32;8,1>\r
+\r
+       mov (16)        MSGPAYLOAD(2)<1>        DEC_Y(2)<32;8,1>\r
+       mov (16)        MSGPAYLOAD(2,16)<1>     DEC_Y(2,8)<32;8,1>\r
+       mov (16)        MSGPAYLOAD(3,0)<1>      DEC_Y(2,16)<32;8,1>\r
+       mov (16)        MSGPAYLOAD(3,16)<1>     DEC_Y(2,24)<32;8,1>\r
+\r
+       mov (16)        MSGPAYLOAD(4)<1>        DEC_Y(4)<32;8,1>\r
+       mov (16)        MSGPAYLOAD(4,16)<1>     DEC_Y(4,8)<32;8,1>\r
+       mov (16)        MSGPAYLOAD(5,0)<1>      DEC_Y(4,16)<32;8,1>\r
+       mov (16)        MSGPAYLOAD(5,16)<1>     DEC_Y(4,24)<32;8,1>\r
+\r
+       mov (16)        MSGPAYLOAD(6)<1>        DEC_Y(6)<32;8,1>\r
+       mov (16)        MSGPAYLOAD(6,16)<1>     DEC_Y(6,8)<32;8,1>\r
+       mov (16)        MSGPAYLOAD(7,0)<1>      DEC_Y(6,16)<32;8,1>\r
+       mov (16)        MSGPAYLOAD(7,16)<1>     DEC_Y(6,24)<32;8,1>\r
+\r
+    send (8)   REG_WRITE_COMMIT_Y<1>:ud        MSGHDR  MSGSRC<8;8,1>:ud        DAPWRITE        MSGDSC\r
+\r
+    RETURN\r
+// End of save_8x8_Y\r
+\r
+#endif // !defined(__SAVE_8X8_Y__)\r
diff --git a/i965_drv_video/shaders/h264/mc/save_I_PCM.asm b/i965_drv_video/shaders/h264/mc/save_I_PCM.asm
new file mode 100644 (file)
index 0000000..77be35e
--- /dev/null
@@ -0,0 +1,56 @@
+/*\r
+ * Save I_PCM Y samples to Y picture buffer\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Module name: save_I_PCM.asm\r
+//\r
+// First save I_PCM Y samples to Y picture buffer\r
+//\r
+    mov (1) MSGSRC.2:ud                0x000F000F:ud {NoDDClr}                 // Block width and height (16x16)\r
+       shl (2) MSGSRC.0:ud             ORIX<2;2,1>:ub  4:w     {NoDDChk}       // Convert MB origin in pixel unit\r
+\r
+    add (1)    MSGDSC  REG_MBAFF_FIELD<0;1,0>:uw       MSG_LEN(8)+DWBWMSGDSC_WC+DESTY:ud  // Set message descriptor\r
+\r
+    $for(0; <8; 2) {\r
+       mov (32)        MSGPAYLOAD(%1)<1>               I_PCM_Y(%1)REGION(16,1) {Compr,NoDDClr}\r
+       mov (32)        MSGPAYLOAD(%1,16)<1>    I_PCM_Y(%1,16)REGION(16,1) {Compr,NoDDChk}\r
+    }\r
+\r
+    send (8)   REG_WRITE_COMMIT_Y<1>:ud        MSGHDR  MSGSRC<8;8,1>:ud        DAPWRITE        MSGDSC\r
+\r
+// Then save I_PCM U/V samples to U/V picture buffer\r
+//\r
+    mov (1) MSGHDR.2:ud                0x0007000F:ud                   {NoDDClr}       // Block width and height (16x8)\r
+    asr (1) MSGHDR.1:ud                MSGSRC.1<0;1,0>:ud      1:w {NoDDChk}   // Y offset should be halved\r
+    add (1)    MSGDSC                  MSGDSC                  0x0-MSG_LEN(4)+0x1:d    // Set message descriptor for U/V\r
+\r
+#if 0\r
+    and.z.f0.0 (1)  NULLREG REG_CHROMA_FORMAT_IDC  CHROMA_FORMAT_IDC:ud\r
+       (f0.0) jmpi (1) MONOCHROME_I_PCM\r
+#endif\r
+\r
+#ifndef MONO\r
+// Non-monochrome picture\r
+//\r
+    $for(0,0; <4; 2,1) {\r
+       mov (16)        MSGPAYLOAD(%1)<2>               I_PCM_UV(%2)REGION(16,1)                // U data\r
+       mov (16)        MSGPAYLOAD(%1,1)<2>             I_PCM_UV(%2+2)REGION(16,1)              // V data\r
+       mov (16)        MSGPAYLOAD(%1+1)<2>             I_PCM_UV(%2,16)REGION(16,1)             // U data\r
+       mov (16)        MSGPAYLOAD(%1+1,1)<2>   I_PCM_UV(%2+2,16)REGION(16,1)   // V data\r
+       }\r
+#else  // defined(MONO)\r
+MONOCHROME_I_PCM:\r
+    $for(0; <4; 2) {\r
+       mov (16)        MSGPAYLOADD(%1)<1>              0x80808080:ud {Compr}\r
+       }\r
+\r
+#endif // !defined(MONO)\r
+\r
+    send (8)   REG_WRITE_COMMIT_UV<1>:ud       MSGHDR  null:ud DAPWRITE        MSGDSC\r
+\r
+// End of save_I_PCM\r
diff --git a/i965_drv_video/shaders/h264/mc/scoreboard.asm b/i965_drv_video/shaders/h264/mc/scoreboard.asm
new file mode 100644 (file)
index 0000000..6fb41cf
--- /dev/null
@@ -0,0 +1,282 @@
+/*\r
+ * Dependency control scoreboard kernel\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Kernel name: scoreboard.asm\r
+//\r
+// Dependency control scoreboard kernel\r
+//\r
+//  $Revision: 16 $\r
+//  $Date: 10/18/06 4:10p $\r
+//\r
+\r
+// ----------------------------------------------------\r
+//  Main: scoreboard\r
+// ----------------------------------------------------\r
+// ----------------------------------------------------\r
+//  Scoreboard structure\r
+// ----------------------------------------------------\r
+//\r
+//     1 DWORD per thread\r
+//\r
+//     Bit 31: "Checking" thread, i.e. an intra MB that sends "check dependency" message\r
+//     Bit 30: "Completed" thread. This bit set by an "update" message from intra/inter MB.\r
+//     Bits 29:28:     Must set to 0\r
+//     Bits 27:24:     EUID\r
+//     Bits 23:18: Reserved\r
+//     Bits 17:16: TID\r
+//     Bits 15:8:      X offset of current MB\r
+//     Bits 15:5:      Reserved\r
+//     Bits 4:0: 5 bits of available neighbor MB flags\r
+\r
+.kernel scoreboard\r
+SCOREBOARD:\r
+\r
+#ifdef _DEBUG\r
+// WA for FULSIM so we'll know which kernel is being debugged\r
+mov (1) acc0:ud 0xf0aa55a5:ud\r
+#endif\r
+\r
+#include "header.inc"\r
+#include "scoreboard_header.inc"\r
+\r
+//\r
+//  Now, begin source code....\r
+//\r
+\r
+.code\r
+\r
+#ifdef AS_ENABLED\r
+       and.z.f0.1      (1)     NULLREG r0.2<0;1,0>:ud  TH_RES  // Is this a restarted thread previously interrupted?\r
+       (f0.1) jmpi     (1)     Scoreboard_Init\r
+\r
+       #include "scoreboard_restore_AS.asm"\r
+\r
+       jmpi (1)        Scoreboard_OpenGW\r
+Scoreboard_Init:\r
+#endif // End AS_ENABLED\r
+\r
+// Scoreboard must be initialized to 0xc000ffff, meaning all "completed"\r
+// And it also avoids message mis-handling for the first MB\r
+    $for(0; <32; 2) {\r
+       mov (16)        CMD_SB(%1)<1>   0xc000ffff:ud {Compr}\r
+       }\r
+#ifdef DOUBLE_SB                                       // Scoreboard size needs to be doubled\r
+    $for(32; <64; 2) {\r
+       mov (16)        CMD_SB(%1)<1>   0xc000ffff:ud {Compr}\r
+       }\r
+#endif // DOUBLE_SB\r
+\r
+//----------------------------------------------------------\r
+//     Open message gateway for the scoreboard thread\r
+//\r
+//     RegBase = r4 (0x04)\r
+//     Gateway Size = 64 GRF registers (0x6)\r
+//     Dispatch ID = r0.20:ub\r
+//     Scoreboard Thread Key = 0\r
+//----------------------------------------------------------\r
+Scoreboard_OpenGW:\r
+    mov (8)    MSGHDRY0<1>:ud  0x00000000:ud                   // Initialize message header payload with 0\r
+\r
+       // Send a message with register base RegBase=0x04(r4) and Gateway size = 0x6 = 64 GRF reg and Key = 0\r
+       // 000 00000100 00000 00000 110 00000000 ==> 0000 0000 1000 0000 0000 0110 0000 0000\r
+#ifdef AS_ENABLED\r
+       add (1) MSGHDRY0.5<1>:ud r0.20:ub       0x00800700:ud   // Allocate 128 GRFs for message gateway - for SIP to send notification MSG\r
+#else\r
+  #ifdef       DOUBLE_SB\r
+       add (1) MSGHDRY0.5<1>:ud r0.20:ub       0x00800600:ud   // 64 GRF's for CTG-B\r
+  #else\r
+       add (1) MSGHDRY0.5<1>:ud r0.20:ub       0x00800500:ud   // 32 GRF's for CTG-A\r
+  #endif       // DOUBLE_SB\r
+#endif\r
+       send (8)        NULLREG  MSGHDRY0       null:ud    MSG_GW       OGWMSGDSC\r
+\r
+//------------------------------------------------------------------------\r
+//     Send Thread Spawning Message to start dispatching macroblock threads\r
+//\r
+//------------------------------------------------------------------------\r
+#ifdef AS_ENABLED\r
+       mov (8) acc0<1>:ud      CMD_SB(31)<8;8,1>                       // Ensure scoreboard data have been completely restored\r
+#endif // End AS_ENABLED\r
+    mov (8)    MSGHDRY1<1>:ud          r0<8;8,1>:ud            // Initialize message header payload with R0\r
+    mov (1)    MSGHDRY1.4<1>:ud        0x00000400:ud           // Dispatch URB length = 1\r
+\r
+       send (8)        NULLREG  MSGHDRY1       null:ud    TS   TSMSGDSC\r
+\r
+    mov (8)    MSGHDRY0<1>:ud          0x00000000:ud           // Initialize message header payload with 0\r
+\r
+//------------------------------------------------------------------------\r
+//     Scoreboard control data initialization\r
+//------------------------------------------------------------------------\r
+#ifdef AS_ENABLED\r
+       or      (1)     cr0.1:ud        cr0.1:ud        AS_INT_EN               // Enable interrupt\r
+       (f0.1) jmpi     (1)     Scoreboard_State_Init   // Jump if not restarted thread\r
+\r
+       // Restore scoreboard kernel control data to r1 - r3\r
+    mov (1)    m4.1:ud 64:ud                           // Starting r1\r
+    mov (1)    m4.2:ud 0x0002001f:ud           // for 3 registers\r
+    send (8)   r1.0<1>:ud      m4      null:ud DWBRMSGDSC_SC+0x00030000+AS_SAVE        // Restore r1 - r3\r
+       mov     (8)     a0.0<1>:uw      AR_SAVE<8;8,1>:uw                               // Restore all address registers\r
+\r
+// Check whether all MBs have been decoded\r
+       cmp.e.f0.0 (1)  NULLREG TotalMB<0;1,0>:w        0:w     // Set "Last MB" flag\r
+       (-f0.0) jmpi (1)        Before_First_MB\r
+    END_THREAD\r
+\r
+// Check whether it is before the first MB\r
+Before_First_MB:\r
+       cmp.e.f0.0 (1)  NULLREG AVAILFLAGD<1>:ud        0x08020401:ud   // in ACBD order\r
+       (f0.0) jmpi (1) Wavefront_Walk\r
+\r
+Scoreboard_State_Init:\r
+#endif // End AS_ENABLED\r
+       mov (2) WFLen_B<2>:w            HEIGHTINMB_1<0;1,0>:w\r
+       mov (1) AVAILFLAGD<1>:ud        0x08020401:ud   // in ACBD order\r
+       mov     (1) CASE00PTR<1>:ud     Notify_MSG_IP-No_Message_IP:ud          // Inter kernel starts\r
+       mov     (1) CASE10PTR<1>:ud     Dependency_Check_IP-No_Message_IP:ud    // Intra kernel starts\r
+#ifdef AS_ENABLED\r
+       mov     (1) CASE11PTR<1>:ud     0:ud            // No message\r
+#else\r
+       mov     (1) CASE11PTR<1>:ud     MB_Loop_IP-No_Message_IP:ud             // No message\r
+#endif // End AS_ENABLED\r
+       mov     (1) StartXD<1>:ud       0:ud\r
+       mov     (1) NewWFOffsetD<1>:ud  0x01ffff00:ud\r
+\r
+       mov (4) WFStart(0)<1>   0xffff:w\r
+       mov (1) WFStart(0)<1>   0:w\r
+\r
+       mov     (8)     a0.0<1>:uw      0x0:uw                                          // Initialize all pointers to 0\r
+\r
+//------------------------------------------------------------------------\r
+//     Scoreboard message handling loop\r
+//------------------------------------------------------------------------\r
+//\r
+Scoreboard_Loop:\r
+       // Calculate current wavefront length\r
+       add.ge.f0.1 (16)        acc0<1>:w       StartX<0;1,0>:w 0:w             // Used for x>2*y check\r
+       mac.g.f0.0 (16) NULLREGW        WFLenY<0;1,0>:w -2:w            // X - 2*Y > 0 ??\r
+       (f0.0) mov (1)  WFLen<1>:w      WFLenY<0;1,0>:w                         // Use smaller vertical wavefront length\r
+       (-f0.0) asr.sat (1)     WFLen<1>:uw     StartX<0;1,0>:w 1:w             // Horizontal wavefront length is smaller\r
+\r
+       // Initialize 5-MB group\r
+#ifdef ONE_MB_WA\r
+       mov (2) MBINDEX(0)<1>           WFStart(0)<2;2,1>\r
+       (f0.1) add (4) MBINDEX(0,2)<1>          WFStart(0,1)<4;4,1>     -1:w\r
+       (-f0.1) add (4) MBINDEX(0,2)<1>         WFStart(0,0)<4;4,1>     -1:w\r
+       (-f0.1) mov (1) StartX<1>:w             0:w                                     // WA for 1-MB wide pictures\r
+#else\r
+       mov (2) MBINDEX(0)<1>           WFStart(0)<2;2,1>                       {NoDDClr}\r
+       add (4) MBINDEX(0,2)<1>         WFStart(0,1)<4;4,1>     -1:w    {NoDDChk}\r
+#endif\r
+\r
+       // Update WFStart\r
+       mov (8) acc0<1>:w       WFStart(0)<0;1,0>                                       // Move WFStart(0) to acc0 to remove dependency later\r
+       mov (4) WFStart(0,1)<1> WFStart(0)<4;4,1>       {NoDDClr}       // Shift WFStart(0:2) to WFStart(1:3)\r
+       add (1) WFStart(0)<1>   acc0.0<0;1,0>:w         WFLen<0;1,0>:w  {NoDDChk}       // WFStart(0) = WFStart(0) + WFLen\r
+\r
+       mul (8) MBINDEX(0)<1>   MBINDEX(0)<8;8,1>       4:w     // Adjust MB order # to be DWORD aligned\r
+       and (1) DEPPTR<1>:uw    acc0<0;1,0>:w   SB_MASK*4:uw {NoDDClr}  // Wrap around scoreboard entries for current MB\r
+       and (4) DEPPTRL<1>:uw   acc0.1<4;4,1>:w SB_MASK*4:uw {NoDDChk}  // Wrap around scoreboard entries for neighbor MBs\r
+\r
+Wavefront_Walk:\r
+       wait    n0:ud\r
+//     Check for combined "checking" or "completed" threads in forwarded message\r
+//     2 MSB of scoreboard message indicate:\r
+//     0b00 = "inter start" message\r
+//     0b10 = "intra start" message\r
+//     0b11 = "No Message" or "inter complete" message\r
+//     0b01 = Reserved (should never occur)\r
+//\r
+MB_Loop:\r
+       shr     (1)     PMSGSEL<1>:uw   r[CMDPTR,CMD_SB_REG_OFF*GRFWIB+2]<0;1,0>:uw     12:w                                    // DWORD aligned pointer to message handler\r
+       and.nz.f0.1 (4) NULLREG r[CMDPTR,CMD_SB_REG_OFF*GRFWIB]<0;1,0>:ub       AVAILFLAG<4;4,1>:ub             // f0.1 4 LSB will have the available flags in ACBD order\r
+       mov (1) MSGHDRY0.4<1>:ud        r[CMDPTR,CMD_SB_REG_OFF*GRFWIB]<0;1,0>:ud               // Copy MB thread info from scoreboard\r
+       jmpi (1)        r[PMSGSEL, INLINE_REG_OFF*GRFWIB+16]<0;1,0>:d\r
+\r
+//     Now determine whether this is "inter done" or "no message"\r
+//     through checking debug_counter\r
+//\r
+No_Message:\r
+#ifdef AS_ENABLED\r
+       cmp.z.f0.1 (1)  NULLREG n0:ud   0       // Are all messages handled?\r
+       and.z.f0.0 (1)  NULLREG cr0.1:ud        AS_INT  // Poll interrupt bit\r
+       (-f0.1) jmpi (1)        MB_Loop                 // Continue polling the remaining message from current thread\r
+\r
+// All messages have been handled\r
+       (f0.0) jmpi (1) Wavefront_Walk          // No interrupt occurs. Wait for next one\r
+\r
+// Interrupt has been detected\r
+// Save all contents and terminate the scoreboard\r
+//\r
+       #include "scoreboard_save_AS.asm"\r
+\r
+       // Save scoreboard control data as well\r
+       //\r
+       mov (8) AR_SAVE<1>:uw   a0.0<8;8,1>:uw          // All address registers needs to be saved\r
+    mov (1)    MSGHDR.1:ud             64:ud\r
+    mov (1)    MSGHDR.2:ud             0x0002001f:ud   // for 3 registers\r
+       $for(0; <3; 1) {\r
+       mov (8) MSGPAYLOADD(%1)<1>      CMD_SB(%1-3)REGION(8,1)\r
+       }\r
+    send (8)   NULLREG MSGHDR  null:ud DWBWMSGDSC+0x00300000+AS_SAVE   // Save r1 - r3\r
+\r
+       send (8) NULLREG MSGHDR r0:ud EOTMSGDSC+TH_INT  // Terminate with "Thread Interrupted" bit set\r
+#endif // End AS_ENABLED\r
+\r
+Dependency_Check:\r
+//     Current thread is "checking" but not "completed" (0b10 case).\r
+//     Check for dependency clear using all availability bits\r
+//\r
+       (f0.1) and.z.f0.1 (4)   NULLREG r[DEPPTRL,CMD_SB_REG_OFF*GRFWIB+3]<1,0>:ub      DONEFLAG:uw     // f0.1 4 LSB contains dependency clear\r
+       (f0.1.any4h) jmpi (1)   Dependency_Check                // Dependency not clear, keep polling..\r
+\r
+//     "Checking" thread and dependency cleared, send a message to let the thread go\r
+//\r
+Notify_MSG:\r
+       send (8)        NULLREG  MSGHDRY0       null:ud    MSG_GW       FWDMSGDSC+NOTIFYMSG\r
+\r
+//     Current macroblock has been serviced. Update to next macroblock in special zig-zag order\r
+//\r
+Update_CurMB:\r
+#if 0\r
+       add.ge.f0.0 (1) WFLen<1>:w      WFLen<0;1,0>:w  -1:w                    // Set "End of wavefront" flag\r
+       add (1) TotalMB<1>:w    TotalMB<0;1,0>:w        -1:w                    // Decrement "TotalMB"\r
+#else\r
+       add.ge.f0.0 (2) TotalMB<2>:w    TotalMB<4;2,2>:w        -1:w    // Set "End of wavefront" flag and decrement "TotalMB"\r
+#endif\r
+       add (8) MBINDEX(0)<1>   MBINDEX(0)<8;8,1>       4:w                             // Increment MB indices\r
+       and (1) DEPPTR<1>:uw    acc0<0;1,0>:w   SB_MASK*4:uw {NoDDClr}  // Wrap around 256 scoreboard entries for current MB\r
+       and (4) DEPPTRL<1>:uw   acc0.1<4;4,1>:w SB_MASK*4:uw {NoDDChk}  // Wrap around 256 scoreboard entries for neighbor MBs\r
+       cmp.e.f0.1 (16) NULLREGW  StartX<0;1,0>:uw      WIDTHINMB_1<0;1,0>:uw   // Set "on picture right boundary" flag\r
+#if 0\r
+       (f0.0) jmpi (1) Wavefront_Walk                  // Continue wavefront walking\r
+#else\r
+       (f0.0.all2h) jmpi (1) Wavefront_Walk    // Continue wavefront walking\r
+#endif\r
+\r
+//     Start new wavefront\r
+//\r
+       cmp.e.f0.0 (1)  NULLREG TotalMB<0;1,0>:w                0:w     // Set "Last MB" flag\r
+       (f0.1) add (4)  WFLen<1>:w      WFLen<4;4,1>:w          NewWFOffset<4;4,1>:b\r
+       (f0.1) add (4)  WFStart(0)<1>   WFStart(0)<4;4,1>       1:w\r
+       (-f0.1) add (1) StartX<1>:w             StartX<0;1,0>:w 1:w             // Move to right MB\r
+       (-f0.1) add (1) WFStart(0)<1>   WFStart(0)<0;1,0>       1:w\r
+\r
+       (-f0.0) jmpi (1)        Scoreboard_Loop                         // Not last MB, start new wavefront walking\r
+\r
+// All MBs have decoded. Terminate the thread now\r
+//\r
+    END_THREAD\r
+\r
+#if !defined(COMBINED_KERNEL)          // For standalone kernel only\r
+.end_code\r
+\r
+.end_kernel\r
+#endif\r
+\r
+// End of scoreboard\r
diff --git a/i965_drv_video/shaders/h264/mc/scoreboard_MBAFF.asm b/i965_drv_video/shaders/h264/mc/scoreboard_MBAFF.asm
new file mode 100644 (file)
index 0000000..02a49d8
--- /dev/null
@@ -0,0 +1,299 @@
+/*\r
+ * Dependency control scoreboard kernel for MBAFF frame\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Kernel name: scoreboard_MBAFF.asm\r
+//\r
+// Dependency control scoreboard kernel for MBAFF frame\r
+//\r
+//  $Revision: 16 $\r
+//  $Date: 10/18/06 4:10p $\r
+//\r
+\r
+// ----------------------------------------------------\r
+//  Main: scoreboard_MBAFF\r
+// ----------------------------------------------------\r
+// ----------------------------------------------------\r
+//  Scoreboard structure\r
+// ----------------------------------------------------\r
+//\r
+//     1 DWORD per thread\r
+//\r
+//     Bit 31: "Checking" thread, i.e. an intra MB that sends "check dependency" message\r
+//     Bit 30: "Completed" thread. This bit set by an "update" message from intra/inter MB.\r
+//     Bits 29:28:     Must set to 0\r
+//     Bits 27:24:     EUID\r
+//     Bits 23:18: Reserved\r
+//     Bits 17:16: TID\r
+//     Bits 15:8:      X offset of current MB\r
+//     Bits 15:5:      Reserved\r
+//     Bits 4:0: 5 bits of available neighbor MB flags\r
+\r
+.kernel scoreboard_MBAFF\r
+SCOREBOARD_MBAFF:\r
+\r
+#ifdef _DEBUG\r
+// WA for FULSIM so we'll know which kernel is being debugged\r
+mov (1) acc0:ud 0xffaa55a5:ud\r
+#endif\r
+\r
+#include "header.inc"\r
+#include "scoreboard_header.inc"\r
+\r
+//\r
+//  Now, begin source code....\r
+//\r
+\r
+.code\r
+\r
+#ifdef AS_ENABLED\r
+       and.z.f0.1      (1)     NULLREG r0.2<0;1,0>:ud  TH_RES  // Is this a restarted thread previously interrupted?\r
+       (f0.1) jmpi     (1)     MBAFF_Scoreboard_Init\r
+\r
+       #include "scoreboard_restore_AS.asm"\r
+\r
+       jmpi (1)        MBAFF_Scoreboard_OpenGW\r
+MBAFF_Scoreboard_Init:\r
+#endif // End AS_ENABLED\r
+\r
+// Scoreboard must be initialized to 0xc000ffff, meaning all "completed"\r
+// And it also avoids message mis-handling for the first MB\r
+    $for(0; <32; 2) {\r
+       mov (16)        CMD_SB(%1)<1>   0xc000ffff:ud {Compr}\r
+       }\r
+#ifdef DOUBLE_SB                                       // Scoreboard size needs to be doubled\r
+    $for(32; <64; 2) {\r
+       mov (16)        CMD_SB(%1)<1>   0xc000ffff:ud {Compr}\r
+       }\r
+#endif // DOUBLE_SB\r
+\r
+//----------------------------------------------------------\r
+//     Open message gateway for the scoreboard thread\r
+//\r
+//     RegBase = r4 (0x04)\r
+//     Gateway Size = 64 GRF registers (0x6)\r
+//     Dispatch ID = r0.20:ub\r
+//     Scoreboard Thread Key = 0\r
+//----------------------------------------------------------\r
+MBAFF_Scoreboard_OpenGW:\r
+    mov (8)    MSGHDRY0<1>:ud  0x00000000:ud                   // Initialize message header payload with 0\r
+\r
+       // Send a message with register base RegBase=0x04(r4) and Gateway size = 0x6 = 64 GRF reg and Key = 0\r
+       // 000 00000100 00000 00000 110 00000000 ==> 0000 0000 1000 0000 0000 0110 0000 0000\r
+#ifdef AS_ENABLED\r
+       add (1) MSGHDRY0.5<1>:ud r0.20:ub       0x00800700:ud   // Allocate 128 GRFs for message gateway - for SIP to send notification MSG\r
+#else\r
+  #ifdef       DOUBLE_SB\r
+       add (1) MSGHDRY0.5<1>:ud r0.20:ub       0x00800600:ud   // 64 GRF's for CTG-B\r
+  #else\r
+       add (1) MSGHDRY0.5<1>:ud r0.20:ub       0x00800500:ud   // 32 GRF's for CTG-A\r
+  #endif       // DOUBLE_SB\r
+#endif\r
+\r
+       send (8)        NULLREG  MSGHDRY0       null:ud    MSG_GW       OGWMSGDSC\r
+\r
+//------------------------------------------------------------------------\r
+//     Send Thread Spawning Message to start dispatching macroblock threads\r
+//\r
+//------------------------------------------------------------------------\r
+#ifdef AS_ENABLED\r
+       mov (8) acc0<1>:ud      CMD_SB(31)<8;8,1>                       // Ensure scoreboard data have been completely restored\r
+#endif // End AS_ENABLED\r
+    mov (8)    MSGHDRY1<1>:ud          r0<8;8,1>:ud            // Initialize message header payload with R0\r
+    mov (1)    MSGHDRY1.4<1>:ud        0x00000400:ud           // Dispatch URB length = 1\r
+\r
+       send (8)        NULLREG  MSGHDRY1       null:ud    TS   TSMSGDSC\r
+\r
+    mov (8)    MSGHDRY0<1>:ud          0x00000000:ud           // Initialize message header payload with 0\r
+\r
+//------------------------------------------------------------------------\r
+//     Scoreboard control data initialization\r
+//------------------------------------------------------------------------\r
+#ifdef AS_ENABLED\r
+       or      (1)     cr0.1:ud        cr0.1:ud        AS_INT_EN               // Enable interrupt\r
+       (f0.1) jmpi     (1)     MBAFF_Scoreboard_State_Init     // Jump if not restarted thread\r
+\r
+       // Restore scoreboard kernel control data to r1 - r3\r
+    mov (1)    m4.1:ud 64:ud                           // Starting r1\r
+    mov (1)    m4.2:ud 0x0002001f:ud           // for 3 registers\r
+    send (8)   r1.0<1>:ud      m4      null:ud DWBRMSGDSC_SC+0x00030000+AS_SAVE        // Restore r1 - r3\r
+       and (1) CMDPTR<1>:uw    MBINDEX(0)<0;1,0>       SB_MASK*4:uw    // Restore scoreboard entries for current MB\r
+\r
+// EOT if all MBs have been decoded\r
+       cmp.e.f0.0 (1)  NULLREG TotalMB<0;1,0>:w        0:w     // Set "Last MB" flag\r
+       (-f0.0) jmpi (1)        MBAFF_Before_First_MB\r
+    END_THREAD\r
+\r
+// Check whether it is before the first MB\r
+MBAFF_Before_First_MB:\r
+       cmp.e.f0.0 (1)  NULLREG AVAILFLAGD<1>:ud        0x08020401:ud   // in ACBD order\r
+       (f0.0) jmpi (1) MBAFF_Wavefront_Walk\r
+\r
+MBAFF_Scoreboard_State_Init:\r
+#endif // End AS_ENABLED\r
+       mov (2) WFLen_B<2>:w            HEIGHTINMB_1<0;1,0>:w\r
+       mov (1) AVAILFLAGD<1>:ud        0x08020401:ud   // in ACBD order\r
+       mov (1) AVAILFLAG1D<1>:ud       0x08020410:ud   // in A_C_B_D_ order\r
+       mov     (1) CASE00PTR<1>:ud     MBAFF_Notify_MSG_IP-MBAFF_No_Message_IP:ud              // Inter kernel starts\r
+       mov     (1) CASE10PTR<1>:ud     MBAFF_Dependency_Check_IP-MBAFF_No_Message_IP:ud        // Intra kernel starts\r
+#ifdef AS_ENABLED\r
+       mov     (1) CASE11PTR<1>:ud     0:ud            // No message\r
+#else\r
+       mov     (1) CASE11PTR<1>:ud     MBAFF_MB_Loop_IP-MBAFF_No_Message_IP:ud         // No message\r
+#endif // End AS_ENABLED\r
+       mov     (1) StartXD<1>:ud       0:ud\r
+       mov     (1) NewWFOffsetD<1>:ud  0x01ffff00:ud\r
+\r
+       mov (8) WFStart_T(0)<1> 0xffff:w\r
+       mov (1) WFStart_T(0)<1> 0:w\r
+\r
+       mov     (8)     a0.0<1>:uw      0x0:uw                                          // Initialize all pointers to 0\r
+\r
+//------------------------------------------------------------------------\r
+//     Scoreboard message handling loop\r
+//------------------------------------------------------------------------\r
+//\r
+MBAFF_Scoreboard_Loop:\r
+// Calculate current wavefront length (same for top and bottom MB wavefronts)\r
+       add.ge.f0.1 (16)        acc0<1>:w       StartX<0;1,0>:w 0:w     // Used for x>2*y check\r
+       mac.g.f0.0 (16) NULLREGW        WFLenY<0;1,0>:w -2:w    // X - 2*Y > 0 ??\r
+       (f0.0) mov (2)  WFLen_B<1>:w    WFLenY<0;1,0>:w         // Use smaller vertical wavefront length\r
+       (f0.0) mov (1)  WFLen_Save<1>:w WFLenY<0;1,0>:w         // Save current wave front length\r
+       (-f0.0) asr.sat (2)     WFLen_B<1>:uw   StartX<0;1,0>:w 1:w     // Horizontal wavefront length is smaller\r
+       (-f0.0) asr.sat (1)     WFLen_Save<1>:uw        StartX<0;1,0>:w 1:w     // Save current wave front length\r
+\r
+// Initialize 9-MB group for top macroblock wavefront\r
+#ifdef ONE_MB_WA_MBAFF\r
+       mov (2) MBINDEX(0)<1>           WFStart_T(0)<2;2,1>\r
+       (f0.1) add (4) MBINDEX(0,2)<1>          WFStart_B(0,1)<4;4,1>   -1:w\r
+       (-f0.1) add (4) MBINDEX(0,2)<1>         WFStart_B(0,0)<4;4,1>   -1:w\r
+       mov (1) MBINDEX(0,5)<1>         WFStart_B(0,1)<0;1,0>\r
+       (-f0.1) mov (1) StartX<1>:w             0:w                                     // WA for 1-MB wide pictures\r
+#else\r
+       mov (2) MBINDEX(0)<1>           WFStart_T(0)<2;2,1>                     {NoDDClr}\r
+       add (4) MBINDEX(0,2)<1>         WFStart_B(0,1)<4;4,1>   -1:w    {NoDDChk,NoDDClr}\r
+       mov (1) MBINDEX(0,5)<1>         WFStart_B(0,1)<0;1,0>           {NoDDChk,NoDDClr}\r
+       add (4) MBINDEX(0,6)<1>         WFStart_T(0,1)<4;4,1>   -1:w    {NoDDChk}       // Upper MB group (C_B_D_x)\r
+#endif\r
+\r
+// Update WFStart_B[0]\r
+       add (8) acc0<1>:w       WFLen<0;1,0>:w  1:w                             // WFLen + 1\r
+       add (1) WFStart_B(0,0)<1>       acc0<0;1,0>:w   WFStart_T(0,0)<0;1,0>   // WFStart_T[0] + WFLen + 1\r
+\r
+MBAFF_Start_Wavefront:\r
+       mul (16)        MBINDEX(0)<1>   MBINDEX(0)REGION(16,1)  4:w             // Adjust MB order # to be DWORD aligned\r
+       and (1) CMDPTR<1>:uw    acc0<0;1,0>:w   SB_MASK*4:uw    // Wrap around scoreboard entries for current MB\r
+\r
+MBAFF_Wavefront_Walk:\r
+       wait    n0:ud\r
+\r
+//     Check for combined "checking" or "completed" threads in forwarded message\r
+//     2 MSB of scoreboard message indicate:\r
+//     0b00 = "inter start" message\r
+//     0b10 = "intra start" message\r
+//     0b11 = "No Message" or "inter complete" message\r
+//     0b01 = Reserved (should never occur)\r
+//\r
+MBAFF_MB_Loop:\r
+       shr     (1)     PMSGSEL<1>:uw   r[CMDPTR,CMD_SB_REG_OFF*GRFWIB+2]<0;1,0>:uw     12:w                                    // DWORD aligned pointer to message handler\r
+       and.nz.f0.1 (8) NULLREG r[CMDPTR,CMD_SB_REG_OFF*GRFWIB]<0;1,0>:ub       AVAILFLAG<8;8,1>:ub             // f0.1 8 LSB will have the available flags in ACBDA_C_B_D_ order\r
+       mov (1) MSGHDRY0.4<1>:ud        r[CMDPTR,CMD_SB_REG_OFF*GRFWIB]<0;1,0>:ud               // Copy MB thread info from scoreboard\r
+       jmpi (1)        r[PMSGSEL, INLINE_REG_OFF*GRFWIB+16]<0;1,0>:d\r
+\r
+//     Now determine whether this is "inter done" or "no message"\r
+//     through checking debug_counter\r
+//\r
+MBAFF_No_Message:\r
+#ifdef AS_ENABLED\r
+       cmp.z.f0.1 (1)  NULLREG n0:ud   0       // Are all messages handled?\r
+       and.z.f0.0 (1)  NULLREG cr0.1:ud        AS_INT  // Poll interrupt bit\r
+       (-f0.1) jmpi (1)        MBAFF_MB_Loop                   // Continue polling the remaining message from current thread\r
+\r
+// All messages have been handled\r
+       (f0.0) jmpi (1) MBAFF_Wavefront_Walk            // No interrupt occurs. Wait for next one\r
+\r
+// Interrupt has been detected\r
+// Save all contents and terminate the scoreboard\r
+//\r
+       #include "scoreboard_save_AS.asm"\r
+\r
+       // Save scoreboard control data as well\r
+       //\r
+    mov (1)    MSGHDR.1:ud             64:ud\r
+    mov (1)    MSGHDR.2:ud             0x0002001f:ud   // for 3 registers\r
+       $for(0; <3; 1) {\r
+       mov (8) MSGPAYLOADD(%1)<1>      CMD_SB(%1-3)REGION(8,1)\r
+       }\r
+    send (8)   NULLREG MSGHDR  null:ud DWBWMSGDSC+0x00300000+AS_SAVE   // Save r1 - r3\r
+\r
+       send (8) NULLREG MSGHDR r0:ud EOTMSGDSC+TH_INT  // Terminate with "Thread Interrupted" bit set\r
+#endif // End AS_ENABLED\r
+\r
+MBAFF_Dependency_Check:\r
+//     Current thread is "checking" but not "completed" (0b10 case).\r
+//     Check for dependency clear using all availability bits\r
+//\r
+       and (8) DEPPTR<1>:uw    MBINDEX(0,1)REGION(8,1) SB_MASK*4:uw    // Wrap around scoreboard entries for current MB\r
+MBAFF_Dependency_Polling:\r
+       (f0.1) and.z.f0.1 (8)   NULLREG r[DEPPTR,CMD_SB_REG_OFF*GRFWIB+3]<1,0>:ub       DONEFLAG:uw     // f0.1 8 LSB contains dependency clear\r
+       (f0.1.any8h) jmpi (1)   MBAFF_Dependency_Polling                // Dependency not clear, keep polling..\r
+\r
+//     "Checking" thread and dependency cleared, send a message to let the thread go\r
+//\r
+MBAFF_Notify_MSG:\r
+       send (8)        NULLREG  MSGHDRY0       null:ud    MSG_GW       FWDMSGDSC+NOTIFYMSG\r
+\r
+//     Current macroblock has been serviced. Update to next macroblock in special zig-zag order\r
+//\r
+MBAFF_Update_CurMB:\r
+       add.ge.f0.0 (2) TotalMB<2>:w    TotalMB<4;2,2>:w        -1:w    // Set "End of wavefront" flag and decrement "TotalMB"\r
+       add (16)        MBINDEX(0)<1>   MBINDEX(0)REGION(16,1)  4:w             // Increment MB indices\r
+       and (1) CMDPTR<1>:uw    acc0<0;1,0>:w   SB_MASK*4:uw // Wrap around scoreboard entries for current MB\r
+       (f0.0.all2h) jmpi (1) MBAFF_Wavefront_Walk      // Continue wavefront walking\r
+\r
+// Top macroblock wavefront walk done, start bottom MB wavefront\r
+       add.ge.f0.0 (1) WFLen<1>:w      WFLen_B<0;1,0>:w        0:w     {NoDDClr}               // Set bottom MB wavefront length\r
+       mov (1) WFLen_B<1>:w    -1:w    {NoDDChk}                       // Reset bottom MB wavefront length\r
+       \r
+// Initialize 9-MB group for bottom macroblock wavefront\r
+       mov (8) MBINDEX(0)<1>           WFStart_B(0)<1;4,0>                     {NoDDClr}       // Initialize with WFStart_B[0] and WFStart_B[1]\r
+       mov (4) MBINDEX(0,1)<1>         WFStart_T(0,1)<0;1,0>           {NoDDChk,NoDDClr}       // Initialize with WFStart_T[1]\r
+       mov (2) MBINDEX(0,2)<1>         WFStart_T(0)<0;1,0>                     {NoDDChk,NoDDClr}       // Initialize with WFStart_T[0]\r
+       add (4) MBINDEX(0,6)<1>         WFStart_B(0,1)<4;4,1>   -1:w    {NoDDChk}       // Upper MB group (C_B_D_x)\r
+\r
+       (f0.0) jmpi (1) MBAFF_Start_Wavefront                           // Start bottom MB wavefront walk\r
+\r
+//     Start new wavefront\r
+//\r
+       cmp.e.f0.1 (16) NULLREGW  StartX<0;1,0>:uw      WIDTHINMB_1<0;1,0>:uw   // Set "on picture right boundary" flag\r
+\r
+       // Update WFStart_T and WFStart_B\r
+       add (8) acc0<1>:w       WFStart_T(0)REGION(1,0) 1:w                             // Move WFStart_T[0]+1 to acc0 to remove dependency later\r
+       mov (8) WFStart_T(0,1)<1>       WFStart_T(0)<8;8,1>     {NoDDClr}       // Shift WFStart_T(B)[0:2] to WFStart_T(B)[1:3]\r
+       mac (1) WFStart_T(0,0)<1>       WFLen_Save<0;1,0>:w     2:w {NoDDChk}   // WFStart_T[0] = WFStart_T[0] + 2*WFLen\r
+\r
+       cmp.e.f0.0 (1)  NULLREG TotalMB<0;1,0>:w        0:w     // Set "Last MB" flag\r
+\r
+       (f0.1) add (4)  WFLen<1>:w      WFLen<4;4,1>:w  NewWFOffset<4;4,1>:b    // + (0, -1, -1, 1)\r
+       (f0.1) add (8)  WFStart_T(0)<1> WFStart_T(0)REGION(4,1) 1:w\r
+       (-f0.1) add (1) StartX<1>:w             StartX<0;1,0>:w 1:w             // Move to right MB\r
+       (-f0.1) add (1) WFStart_T(0)<1> WFStart_T(0)REGION(1,0) 1:w\r
+\r
+       (-f0.0) jmpi (1)        MBAFF_Scoreboard_Loop                           // Not last MB, start new wavefront walking\r
+\r
+// All MBs have decoded. Terminate the thread now\r
+//\r
+    END_THREAD\r
+\r
+#if !defined(COMBINED_KERNEL)          // For standalone kernel only\r
+.end_code\r
+\r
+.end_kernel\r
+#endif\r
+\r
+// End of scoreboard_MBAFF\r
diff --git a/i965_drv_video/shaders/h264/mc/scoreboard_restore_AS.asm b/i965_drv_video/shaders/h264/mc/scoreboard_restore_AS.asm
new file mode 100644 (file)
index 0000000..7d95330
--- /dev/null
@@ -0,0 +1,54 @@
+/*\r
+ * Restore previously stored scoreboard data after content switching back\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Module name: scoreboard_restore_AS.asm\r
+//\r
+// Restore previously stored scoreboard data after content switching back\r
+//\r
+//\r
+       // Restore scoreboard data to r4 - r67\r
+       // They are saved in a 2D surface with width of 32 and height of 80.\r
+       // Each row corresponds to one GRF register in the following order\r
+       // r4 - r67     : Scoreboard message\r
+       //\r
+    mov (8)    MSGSRC<1>:ud    r0.0<8;8,1>:ud {NoDDClr}        // Initialize message header payload with r0\r
+\r
+    mov (2)    MSGSRC.0:ud             0:ud {NoDDClr, NoDDChk}         // Starting r4\r
+    mov (1)    MSGSRC.2:ud             0x0007001f:ud {NoDDChk}         // for 8 registers\r
+    send (8)   CMD_SB(0)<1>    m1      MSGSRC<8;8,1>:ud        DWBRMSGDSC_SC+0x00080000+AS_SAVE        // Restore r4 - r11\r
+\r
+    mov (8)    m2:ud           MSGSRC<8;8,1>:ud\r
+    mov (1)    m2.1:ud         8:ud\r
+    send (8)   CMD_SB(8)<1>    m2      null:ud DWBRMSGDSC_SC+0x00080000+AS_SAVE        // Restore r12 - r19\r
+\r
+    mov (8)    m3:ud           MSGSRC<8;8,1>:ud\r
+    mov (1)    m3.1:ud         16:ud\r
+    send (8)   CMD_SB(16)<1>   m3      null:ud DWBRMSGDSC_SC+0x00080000+AS_SAVE        // Restore r20 - r27\r
+\r
+    mov (8)    m4:ud           MSGSRC<8;8,1>:ud\r
+    mov (1)    m4.1:ud         24:ud\r
+    send (8)   CMD_SB(24)<1>   m4      null:ud DWBRMSGDSC_SC+0x00080000+AS_SAVE        // Restore r28 - r35\r
+\r
+    mov (8)    m5:ud           MSGSRC<8;8,1>:ud\r
+    mov (1)    m5.1:ud         32:ud\r
+    send (8)   CMD_SB(32)<1>   m5      null:ud DWBRMSGDSC_SC+0x00080000+AS_SAVE        // Restore r36 - r43\r
+\r
+    mov (8)    m6:ud           MSGSRC<8;8,1>:ud\r
+    mov (1)    m6.1:ud         40:ud\r
+    send (8)   CMD_SB(40)<1>   m6      null:ud DWBRMSGDSC_SC+0x00080000+AS_SAVE        // Restore r44 - r51\r
+\r
+    mov (8)    m7:ud           MSGSRC<8;8,1>:ud\r
+    mov (1)    m7.1:ud         48:ud\r
+    send (8)   CMD_SB(48)<1>   m7      null:ud DWBRMSGDSC_SC+0x00080000+AS_SAVE        // Restore r52 - r59\r
+\r
+    mov (8)    m8:ud           MSGSRC<8;8,1>:ud\r
+    mov (1)    m8.1:ud         56:ud\r
+    send (8)   CMD_SB(56)<1>   m8      null:ud DWBRMSGDSC_SC+0x00080000+AS_SAVE        // Restore r60 - r67\r
+\r
+// End of scoreboard_restore_AS\r
diff --git a/i965_drv_video/shaders/h264/mc/scoreboard_save_AS.asm b/i965_drv_video/shaders/h264/mc/scoreboard_save_AS.asm
new file mode 100644 (file)
index 0000000..13abe0e
--- /dev/null
@@ -0,0 +1,72 @@
+/*\r
+ * Save scoreboard data before content switching\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Module name: scoreboard_save_AS.asm\r
+//\r
+// Save scoreboard data before content switching\r
+//\r
+//\r
+       //      r1 - r35 need to be saved\r
+       // They are saved in a 2D surface with width of 32 and height of 64.\r
+       // Each row corresponds to one GRF register in the following order\r
+       // r4 - r35     : Scoreboard message\r
+       // r1 - r3  : Scoreboard kernel control data\r
+\r
+    mov (8)    MSGHDR<1>:ud    r0.0<8;8,1>:ud  // Initialize message header payload with r0\r
+    mov (1)    MSGHDR.2:ud             0x0007001f:ud   // for 8 registers\r
+\r
+    mov (2)    MSGHDR.0:ud             0:ud\r
+       $for(0; <8; 2) {\r
+       mov (16)        MSGPAYLOADD(%1)<1>      CMD_SB(%1)REGION(8,1) {Compr}\r
+       }\r
+    send (8)   NULLREG MSGHDR  null:ud DWBWMSGDSC+0x00800000+AS_SAVE   // Save r4 - r11\r
+\r
+    mov (1)    MSGHDR.1:ud             8:ud\r
+       $for(0; <8; 2) {\r
+       mov (16)        MSGPAYLOADD(%1)<1>      CMD_SB(%1+8)REGION(8,1) {Compr}\r
+       }\r
+    send (8)   NULLREG MSGHDR  null:ud DWBWMSGDSC+0x00800000+AS_SAVE   // Save r12 - r19\r
+\r
+    mov (1)    MSGHDR.1:ud             16:ud\r
+       $for(0; <8; 2) {\r
+       mov (16)        MSGPAYLOADD(%1)<1>      CMD_SB(%1+16)REGION(8,1) {Compr}\r
+       }\r
+    send (8)   NULLREG MSGHDR  null:ud DWBWMSGDSC+0x00800000+AS_SAVE   // Save r20 - r27\r
+\r
+    mov (1)    MSGHDR.1:ud             24:ud\r
+       $for(0; <8; 2) {\r
+       mov (16)        MSGPAYLOADD(%1)<1>      CMD_SB(%1+24)REGION(8,1) {Compr}\r
+       }\r
+    send (8)   NULLREG MSGHDR  null:ud DWBWMSGDSC+0x00800000+AS_SAVE   // Save r28 - r35\r
+\r
+    mov (1)    MSGHDR.1:ud             32:ud\r
+       $for(0; <8; 2) {\r
+       mov (16)        MSGPAYLOADD(%1)<1>      CMD_SB(%1+32)REGION(8,1) {Compr}\r
+       }\r
+    send (8)   NULLREG MSGHDR  null:ud DWBWMSGDSC+0x00800000+AS_SAVE   // Save r36 - r43\r
+\r
+    mov (1)    MSGHDR.1:ud             40:ud\r
+       $for(0; <8; 2) {\r
+       mov (16)        MSGPAYLOADD(%1)<1>      CMD_SB(%1+40)REGION(8,1) {Compr}\r
+       }\r
+    send (8)   NULLREG MSGHDR  null:ud DWBWMSGDSC+0x00800000+AS_SAVE   // Save r44 - r51\r
+\r
+    mov (1)    MSGHDR.1:ud             48:ud\r
+       $for(0; <8; 2) {\r
+       mov (16)        MSGPAYLOADD(%1)<1>      CMD_SB(%1+48)REGION(8,1) {Compr}\r
+       }\r
+    send (8)   NULLREG MSGHDR  null:ud DWBWMSGDSC+0x00800000+AS_SAVE   // Save r52 - r59\r
+\r
+    mov (1)    MSGHDR.1:ud             56:ud\r
+       $for(0; <8; 2) {\r
+       mov (16)        MSGPAYLOADD(%1)<1>      CMD_SB(%1+56)REGION(8,1) {Compr}\r
+       }\r
+    send (8)   NULLREG MSGHDR  null:ud DWBWMSGDSC+0x00800000+AS_SAVE   // Save r60 - r67\r
+\r
+// End of scoreboard_save_AS\r
diff --git a/i965_drv_video/shaders/h264/mc/scoreboard_sip.asm b/i965_drv_video/shaders/h264/mc/scoreboard_sip.asm
new file mode 100644 (file)
index 0000000..6330ea1
--- /dev/null
@@ -0,0 +1,34 @@
+/*\r
+ * Scoreboard interrupt handler\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Kernel name: scoreboard_sip.asm\r
+//\r
+// scoreboard interrupt handler\r
+//\r
+// Simply send a notification message to scoreboard thread\r
+\r
+    mov (8)            m0<1>:ud        0x00000000:ud                   // Initialize message header payload with 0\r
+#ifdef DOUBLE_SB\r
+       mov (1)         m0.5<1>:ud      0x08000200:ud                   // Message length = 1 DWORD, sent to GRF offset 64 registers\r
+#else\r
+       mov (1)         m0.5<1>:ud      0x04000200:ud                   // Message length = 1 DWORD, sent to GRF offset 32 registers\r
+#endif\r
+       send (8)        null<1>:ud  m0  null:ud    0x03108002   // Send notification message to scoreboard kernel\r
+\r
+       and (1)         cr0.1:ud        cr0.1:ud        0x00800000              // Clear preempt exception bit\r
+       and (1)         cr0.0:ud        cr0.0:ud        0x7fffffff:ud   // Exit SIP routine\r
+       nop                                                                                                     // Required by B-spec\r
+\r
+.end_code\r
+\r
+\r
+\r
+\r
+\r
+\r
diff --git a/i965_drv_video/shaders/h264/mc/scoreboard_start_inter.asm b/i965_drv_video/shaders/h264/mc/scoreboard_start_inter.asm
new file mode 100644 (file)
index 0000000..831b841
--- /dev/null
@@ -0,0 +1,47 @@
+/*\r
+ * Scoreboard function for starting inter prediction kernels\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+#if !defined(__SCOREBOARD_START_INTER__)\r
+#define __SCOREBOARD_START_INTER__\r
+//\r
+// Module name: scoreboard_start_inter.asm\r
+//\r
+//     Scoreboard function for starting inter prediction kernels\r
+//     This function is only used by inter prediction kernels to send message to\r
+//     scoreboard in order to announce the inter kernel has started\r
+//\r
+//  $Revision: 5 $\r
+//  $Date: 10/18/06 4:11p $\r
+//\r
+scoreboard_start_inter:\r
+\r
+// First open message gateway since intra kernels need wake-up message to resume\r
+// \r
+    mov (8)    MSGHDRY0<1>:ud  0x00000000:ud                   // Initialize message header payload with 0\r
+\r
+    // Send a message with register base RegBase = r0 (0x0) and Size = 0x0\r
+    // 000 00000000 00000 00000 000 00000000 ==> 0000 0000 0000 0000 0000 0000 0000 0000\r
+    // ---------------------------------------------------------------------------------\r
+       send (8)        NULLREG  MSGHDRY0       null:ud    MSG_GW       OGWMSGDSC\r
+\r
+//     Derive the scoreboard location where the inter thread writes to\r
+//\r
+    mov (8)            MSGHDRY1<1>:ud  0x00000000:ud                   // Initialize message header payload with 0\r
+\r
+       // Compose M0.5:ud\r
+       #include "set_SB_offset.asm"\r
+\r
+       // Compose M0.0:ud, i.e. message payload\r
+       or      (1)             MSGHDRY1.1<1>:uw        sr0.0<0;1,0>:uw         0x0000:uw       // Set EUID/TID bits + inter start bit\r
+\r
+       send (8)        NULLREG  MSGHDRY1       null:ud    MSG_GW       FWDMSGDSC+NOTIFYMSG     // Send "Inter start" message to scoreboard kernel\r
+\r
+    RETURN\r
+\r
+#endif // !defined(__SCOREBOARD_START_INTER__)\r
diff --git a/i965_drv_video/shaders/h264/mc/scoreboard_start_intra.asm b/i965_drv_video/shaders/h264/mc/scoreboard_start_intra.asm
new file mode 100644 (file)
index 0000000..6d6d916
--- /dev/null
@@ -0,0 +1,52 @@
+/*\r
+ * Scoreboard function for starting intra prediction kernels\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+#if !defined(__SCOREBOARD_START_INTRA__)\r
+#define __SCOREBOARD_START_INTRA__\r
+//\r
+// Module name: scoreboard_start_intra.asm\r
+//\r
+//     Scoreboard function for starting intra prediction kernels\r
+//     This function is only used by intra prediction kernels to send message to\r
+//     scoreboard in order to check dependency clearance\r
+//\r
+//  $Revision: 5 $\r
+//  $Date: 10/18/06 4:11p $\r
+//\r
+scoreboard_start_intra:\r
+\r
+// First open message gateway since intra kernels need wake-up message to resume\r
+// \r
+    mov (8)    MSGHDRY0<1>:ud  0x00000000:ud                   // Initialize message header payload with 0\r
+\r
+    // Send a message with register base RegBase = r0 (0x0) and Size = 0x0\r
+    // 000 00000000 00000 00000 000 00000000 ==> 0000 0000 0000 0000 0000 0000 0000 0000\r
+    // ---------------------------------------------------------------------------------\r
+       and (1)         MSGHDRY0.8<1>:uw        REG_INTRA_PRED_AVAIL_FLAG_BYTE<0;1,0>:ub        0x1f:uw         // Set lower word of key\r
+       send (8)        NULLREG  MSGHDRY0       null:ud    MSG_GW       OGWMSGDSC\r
+\r
+// Send "check dependency" message to scoreboard thread\r
+// --------------------------\r
+\r
+//     Derive the scoreboard location where the intra thread writes to\r
+//\r
+    mov (8)            MSGHDRY1<1>:ud  0x00000000:ud                   // Initialize message header payload with 0\r
+\r
+       // Compose M0.5:ud\r
+       #include "set_SB_offset.asm"\r
+\r
+       // Compose M0.0:ud, i.e. message payload\r
+       and (1)         MSGHDRY1.0<1>:uw        REG_INTRA_PRED_AVAIL_FLAG_BYTE<0;1,0>:ub        0x1f:uw         // Set lower word of message\r
+       or      (1)             MSGHDRY1.1<1>:uw        sr0.0<0;1,0>:uw         0x8000:uw       // Set EUID/TID bits + intra start bit\r
+\r
+       send (8)        NULLREG  MSGHDRY1       null:ud    MSG_GW       FWDMSGDSC+NOTIFYMSG     // Send "Intra start" message to scoreboard kernel\r
+\r
+    RETURN\r
+\r
+#endif // !defined(__SCOREBOARD_START_INTRA__)\r
diff --git a/i965_drv_video/shaders/h264/mc/scoreboard_update.asm b/i965_drv_video/shaders/h264/mc/scoreboard_update.asm
new file mode 100644 (file)
index 0000000..f519e4a
--- /dev/null
@@ -0,0 +1,41 @@
+/*\r
+ * Scoreboard update function for decoding kernels\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+//\r
+// Module name: scoreboard_update.asm\r
+//\r
+//     Scoreboard update function for decoding kernels\r
+//\r
+//     This module is used by decoding kernels to send message to scoreboard to update the\r
+//     "complete" status, thus the dependency of the MB can be cleared.\r
+//\r
+//  $Revision: 6 $\r
+//  $Date: 10/16/06 5:19p $\r
+//\r
+    mov (8)            MSGHDRY1<1>:ud  0x00000000:ud                           // Initialize message header payload with 0\r
+\r
+       // Compose M0.5:ud information\r
+       add (1) MSGHDRY1.10<1>:uw       r0.20:ub        0x0200:uw                               // Message length = 1 DWORD\r
+       and (1) MSGHDRY1.11<1>:uw       M05_STORE<0;1,0>:uw     SB_MASK*4:uw    // Retrieve stored value and wrap around scoreboard\r
+\r
+       or (1)  MSGHDRY1.0<1>:ud        M05_STORE<0;1,0>:uw     0xc0000000:ud   // Set "Completed" bits\r
+\r
+#ifndef BSDRESET_ENABLE\r
+#ifdef INTER_KERNEL\r
+       mov     (1)     gREG_WRITE_COMMIT_Y<1>:ud       gREG_WRITE_COMMIT_Y<0;1,0>:ud           // Make sure Y write is committed\r
+       mov     (1)     gREG_WRITE_COMMIT_UV<1>:ud      gREG_WRITE_COMMIT_UV<0;1,0>:ud          // Make sure U/V write is committed\r
+#else\r
+       mov     (1)     REG_WRITE_COMMIT_Y<1>:ud        REG_WRITE_COMMIT_Y<0;1,0>:ud            // Make sure Y write is committed\r
+       mov     (1)     REG_WRITE_COMMIT_UV<1>:ud       REG_WRITE_COMMIT_UV<0;1,0>:ud           // Make sure U/V write is committed\r
+#endif // INTER_KERNEL\r
+#endif // BSDRESET_ENABLE\r
+\r
+       send (8)        NULLREG  MSGHDRY1       null:ud    MSG_GW       FWDMSGDSC\r
+\r
+// End of scoreboard_update\r
diff --git a/i965_drv_video/shaders/h264/mc/set_SB_offset.asm b/i965_drv_video/shaders/h264/mc/set_SB_offset.asm
new file mode 100644 (file)
index 0000000..0b166e4
--- /dev/null
@@ -0,0 +1,26 @@
+/*\r
+ * Common module to set offset into the scoreboard\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+//\r
+// Module name: set_SB_offset.asm\r
+//\r
+// Common module to set offset into the scoreboard\r
+//     Note: This is to encapsulate the way M0.5:ud in ForwardMsg is filled.\r
+//\r
+//  $Revision: 2 $\r
+//  $Date: 10/16/06 5:19p $\r
+//\r
+       add (1)         MSGHDRY1.10<1>:uw r0.20:ub      0x0200:uw                       // Message length = 1 DWORD\r
+\r
+       add     (16)    acc0<1>:w       r0.12<0;1,0>:uw -LEADING_THREAD:w       // 0-based thread count derived from r0.6:ud\r
+       shl (1)         M05_STORE<1>:uw         acc0<0;1,0>:uw  0x2:uw          // Store for future "update" use, in DWORD unit\r
+       and     (16)    acc0<1>:w       acc0<16;16,1>:uw        SB_MASK:uw              // Wrap around scoreboard\r
+       shl (1)         MSGHDRY1.11<1>:uw       acc0<0;1,0>:uw  0x2:uw          // Convert to DWORD offset\r
+\r
+// End of set_SB_offset
\ No newline at end of file
diff --git a/i965_drv_video/shaders/h264/mc/weightedPred.asm b/i965_drv_video/shaders/h264/mc/weightedPred.asm
new file mode 100644 (file)
index 0000000..76525f9
--- /dev/null
@@ -0,0 +1,140 @@
+/*\r
+ * Weighted prediction of luminance and chrominance\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Kernel name: WeightedPred.asm\r
+//\r
+// Weighted prediction of luminance and chrominance\r
+//\r
+\r
+\r
+//#if !defined(__WeightedPred__)               // Make sure this is only included once\r
+//#define __WeightedPred__\r
+\r
+\r
+       and.z.f0.0 (1) gWEIGHTFLAG:w    gWPREDFLAG:ub                                   nWBIDIR_MASK:w\r
+       cmp.e.f0.1 (1) null:w                   gPREDFLAG:w                                             2:w\r
+       (-f0.0) jmpi INTERLABEL(WeightedPred)\r
+       (f0.1) jmpi INTERLABEL(DefaultWeightedPred_BiPred)\r
+       \r
+INTERLABEL(DefaultWeightedPred_UniPred):\r
+\r
+       cmp.e.f0.0 (1) null:w                   gPREDFLAG:w                                             0:w\r
+       (f0.0) jmpi INTERLABEL(Return_WeightedPred)\r
+\r
+       // luma\r
+       mov (32)        gubYPRED(0)<2>          gubINTPY1(0)    {Compr}\r
+       mov (32)        gubYPRED(2)<2>          gubINTPY1(2)    {Compr}\r
+\r
+#ifndef MONO\r
+       // chroma       \r
+       mov (32)        gubCPRED(0)<2>          gubINTPC1(0)    {Compr}\r
+#endif\r
+\r
+       jmpi INTERLABEL(Return_WeightedPred)\r
+       \r
+INTERLABEL(DefaultWeightedPred_BiPred):\r
+\r
+       // luma\r
+       avg.sat (32) gubYPRED(0)<2>             gubINTPY0(0)                                    gubINTPY1(0)    {Compr}\r
+       avg.sat (32) gubYPRED(2)<2>             gubINTPY0(2)                                    gubINTPY1(2)    {Compr}\r
+       \r
+#ifndef MONO\r
+       // chroma\r
+       avg.sat (32) gubCPRED(0)<2>             gubINTPC0(0)                                    gubINTPC1(0)    {Compr}\r
+#endif\r
+       \r
+       jmpi INTERLABEL(Return_WeightedPred)\r
+       \r
+INTERLABEL(WeightedPred):\r
+       cmp.e.f0.1 (1) null:w                   gWEIGHTFLAG:w                                   0x80:w\r
+       (-f0.1) jmpi INTERLABEL(WeightedPred_Explicit)\r
+       \r
+       cmp.e.f0.0 (1) null:w                   gPREDFLAG:w                                             2:w\r
+       (-f0.0) jmpi INTERLABEL(DefaultWeightedPred_UniPred)\r
+\r
+       mov (2)         gYADD<1>:w                      32:w                                                            {NoDDClr}       \r
+       mov (2)         gYSHIFT<1>:w            6:w                                                                     {NoDDChk}\r
+       mov (4)         gOFFSET<1>:w            0:w\r
+       mov (8)         gWT0<2>:w                       r[pWGT,0]<0;2,1>:w\r
+       \r
+       jmpi INTERLABEL(WeightedPred_LOOP)\r
+       \r
+       // Explicit Prediction\r
+INTERLABEL(WeightedPred_Explicit):\r
+       \r
+       // WA for weighted prediction - 2007/09/06      \r
+#ifdef SW_W_128                // CTG SW WA\r
+       cmp.e.f0.1 (8) null:ud                  r[pWGT,0]<8;8,1>:uw                             gudW128(0)<0;1,0>\r
+#else                                  // ILK HW solution\r
+       and.ne.f0.1 (8) null:uw                 r[pWGT,12]<0;1,0>:ub                    0x88848421:v    // Expand W=128 flag to all components. 2 MSB are don't care\r
+#endif \r
+       asr.nz.f0.0 (2) gBIPRED<1>:w    gPREDFLAG<0;1,0>:w                              1:w\r
+       asr (1)         gWEIGHTFLAG:w           gWEIGHTFLAG:w                                   6:w     \r
+       (-f0.0) mov (2) gPREDFLAG1<1>:w gPREDFLAG<0;1,0>:w                                                              \r
+       (f0.0) mov (2)  gPREDFLAG0<1>:ud 0x00010001:ud\r
+       (-f0.0) add (2) gPREDFLAG0<1>:w -gPREDFLAG1<2;2,1>:w                    1:w\r
+       \r
+       // WA for weighted prediction - 2007/09/06      \r
+       (f0.1) mov (8)  gWT0<1>:ud              0x00000080:ud\r
+       (-f0.1) mov (8) gWT0<2>:w               r[pWGT,0]<16;8,2>:b\r
+       (-f0.1) mov (8) gO0<2>:w                r[pWGT,1]<16;8,2>:b\r
+       mul (16)                gWT0<1>:w               gWT0<16;16,1>:w                                 gPREDFLAG0<0;4,1>:w\r
+\r
+       // Compute addition\r
+       cmp.e.f0.1 (2) null<1>:w                gYWDENOM<2;2,1>:ub                              0:w\r
+       (-f0.1) shl (2) gW0<1>:w                gWEIGHTFLAG<0;1,0>:w                    gYWDENOM<2;2,1>:ub\r
+       (f0.1) mov (2) gW0<1>:w                 0:w\r
+       (-f0.1) asr (2) gW0<1>:w                gW0<2;2,1>:w                                    1:w\r
+       shl (2)         gYADD<1>:w                      gW0<2;2,1>:w                                    gBIPRED<0;1,0>:w\r
+       (f0.1) add (2)  gYADD<1>:w              gYADD<2;2,1>:w                                  gBIPRED<0;1,0>:w\r
+       \r
+       // Compute shift\r
+       add (2)         gYSHIFT<1>:w            gYWDENOM<2;2,1>:ub                              gBIPRED<0;1,0>:w\r
+       \r
+       // Compute offset\r
+       add (4)         acc0<1>:w                       gO0<16;4,4>:w                                   gO1<16;4,4>:w\r
+       add (4)         acc0<1>:w                       acc0<4;4,1>:w                                   gBIPRED<0;1,0>:w\r
+       asr (4)         gOFFSET<1>:w            acc0<4;4,1>:w                                   gBIPRED<0;1,0>:w\r
+\r
+INTERLABEL(WeightedPred_LOOP): \r
+       // luma\r
+       $for(0;<4;2) {  \r
+       mul (16)        acc0<1>:w                       gubINTPY0(%1)                                   gWT0<0;1,0>:w\r
+       mul (16)        acc1<1>:w                       gubINTPY0(%1+1)                                 gWT0<0;1,0>:w\r
+       mac (16)        acc0<1>:w                       gubINTPY1(%1)                                   gWT1<0;1,0>:w\r
+       mac (16)        acc1<1>:w                       gubINTPY1(%1+1)                                 gWT1<0;1,0>:w\r
+       add (16)        acc0<1>:w                       acc0<16;16,1>:w                                 gYADD:w\r
+       add (16)        acc1<1>:w                       acc1<16;16,1>:w                                 gYADD:w\r
+       // Accumulator cannot be used as destination for ASR\r
+       asr (16)        gwINTERIM_BUF3(0)<1> acc0<16;16,1>:w                            gYSHIFT:w\r
+       asr (16)        gwINTERIM_BUF3(1)<1> acc1<16;16,1>:w                            gYSHIFT:w\r
+       add.sat (16) gubYPRED(%1)<2>    gwINTERIM_BUF3(0)                               gOFFSET:w\r
+       add.sat (16) gubYPRED(%1+1)<2>  gwINTERIM_BUF3(1)                               gOFFSET:w\r
+       }       \r
+\r
+#ifndef MONO\r
+       // chroma\r
+       mul (16)        acc0<1>:w                       gubINTPC0(0)                                    gUW0<0;2,4>:w\r
+       mul (16)        acc1<1>:w                       gubINTPC0(1)                                    gUW0<0;2,4>:w\r
+       mac (16)        acc0<1>:w                       gubINTPC1(0)                                    gUW1<0;2,4>:w\r
+       mac (16)        acc1<1>:w                       gubINTPC1(1)                                    gUW1<0;2,4>:w\r
+       add (16)        acc0<1>:w                       acc0<16;16,1>:w                                 gCADD:w\r
+       add (16)        acc1<1>:w                       acc1<16;16,1>:w                                 gCADD:w\r
+       // Accumulator cannot be used as destination for ASR\r
+       asr (16)        gwINTERIM_BUF3(0)<1> acc0<16;16,1>:w                            gCSHIFT:w\r
+       asr (16)        gwINTERIM_BUF3(1)<1> acc1<16;16,1>:w                            gCSHIFT:w\r
+       add.sat (16) gubCPRED(0)<2>             gwINTERIM_BUF3(0)                               gUOFFSET<0;2,1>:w\r
+       add.sat (16) gubCPRED(1)<2>             gwINTERIM_BUF3(1)                               gUOFFSET<0;2,1>:w\r
+#endif\r
+\r
+\r
+INTERLABEL(Return_WeightedPred):\r
+\r
+        \r
+//#endif       // !defined(__WeightedPred__)\r
diff --git a/i965_drv_video/shaders/h264/mc/writeRecon_C_8x4.asm b/i965_drv_video/shaders/h264/mc/writeRecon_C_8x4.asm
new file mode 100644 (file)
index 0000000..be7585e
--- /dev/null
@@ -0,0 +1,46 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Kernel name: WriteRecon_C_8x4.asm\r
+//\r
+//  $Revision: 10 $\r
+//  $Date: 10/03/06 5:28p $\r
+//\r
+\r
+\r
+//#if !defined(__WRITERECON_C_8x4__)           // Make sure this is only included once\r
+//#define __WRITERECON_C_8x4__\r
+\r
+\r
+       // TODO: Why did I use p0?\r
+#ifndef MONO\r
+    add (1)            p0:w                                    pERRORC:w                               -16:w\r
+       mov (16)        mbMSGPAYLOADC(0,0)<2>   r[p0,0]<32;16,2>:ub                             {NoDDClr}\r
+       mov (16)        mbMSGPAYLOADC(0,1)<2>   r[p0,128]<32;16,2>:ub                   {NoDDChk}\r
+       mov (16)        mbMSGPAYLOADC(1,0)<2>   r[p0,32]<32;16,2>:ub                    {NoDDClr}\r
+       mov (16)        mbMSGPAYLOADC(1,1)<2>   r[p0,128+32]<32;16,2>:ub                {NoDDChk}\r
+#else  // defined(MONO)\r
+       mov (16)        mbMSGPAYLOADC(0)<1>             0x80808080:ud {Compr}\r
+#endif // !defined(MONO)\r
+\r
+ #if defined(MBAFF)\r
+       add (1)         pMSGDSC:ud                              gFIELDFLAGS:uw                  MSG_LEN(2)+nDWBWMSGDSC+nBDIX_DESTC+ENWRCOM:ud\r
+ #elif defined(FIELD)\r
+       add (1)         pMSGDSC:ud                              gFIELDFLAGS:uw                  MSG_LEN(2)+nDWBWMSGDSC_TF+nBDIX_DESTC+ENWRCOM:ud\r
+ #endif\r
+\r
+    asr (1)            gMSGSRC.1:d                             gMSGSRC.1:d                                     1:w     {NoDDClr}\r
+    mov (1)            gMSGSRC.2:ud                    0x0003000f:ud                                   {NoDDChk} // NV12 (16x4)\r
+\r
+#if defined(FRAME)\r
+    send (8)   gREG_WRITE_COMMIT_UV<1>:ud              mMSGHDRCW                               gMSGSRC<8;8,1>:ud               DAPWRITE        MSG_LEN(2)+nDWBWMSGDSC+nBDIX_DESTC+ENWRCOM\r
+#else\r
+    send (8)   gREG_WRITE_COMMIT_UV<1>:ud              mMSGHDRCW                               gMSGSRC<8;8,1>:ud               DAPWRITE        pMSGDSC:ud\r
+#endif // defined(FRAME)\r
+\r
+//#endif       // !defined(__WRITERECON_C_8x4__)\r
diff --git a/i965_drv_video/shaders/h264/mc/writeRecon_YC.asm b/i965_drv_video/shaders/h264/mc/writeRecon_YC.asm
new file mode 100644 (file)
index 0000000..ff84aff
--- /dev/null
@@ -0,0 +1,79 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Kernel name: WriteRecon_YC.asm\r
+//\r
+//  $Revision: 10 $\r
+//  $Date: 10/03/06 5:28p $\r
+//\r
+\r
+\r
+//#if !defined(__WRITERECON_YC__)              // Make sure this is only included once\r
+//#define __WRITERECON_YC__\r
+\r
+    // TODO: Merge two inst to one.\r
+    mov (1)            p0:w                                    nOFFSET_ERRORY:w\r
+    mov (1)            p1:w                                    nOFFSET_ERRORY+128:w\r
+    \r
+       $for(0; <4; 1) {\r
+    mov (16)   mbMSGPAYLOADY(%1,0)<1>  r[p0,%1*32+0]<8,2>:ub           {NoDDClr}\r
+    mov (16)   mbMSGPAYLOADY(%1,16)<1> r[p0,%1*32+16]<8,2>:ub          {NoDDChk}\r
+    }    \r
+       $for(0; <4; 1) {\r
+    mov (16)   mbMSGPAYLOADY(%1+4,0)<1>        r[p0,%1*32+256]<8,2>:ub                 {NoDDClr}\r
+    mov (16)   mbMSGPAYLOADY(%1+4,16)<1>       r[p0,%1*32+16+256]<8,2>:ub              {NoDDChk}\r
+    }    \r
+    \r
\r
+ #if defined(MBAFF)\r
+       add (1)         pMSGDSC:ud                              gFIELDFLAGS:uw                  MSG_LEN(8)+nDWBWMSGDSC+nBDIX_DESTY+ENWRCOM:ud\r
+ #elif defined(FIELD)\r
+       add (1)         pMSGDSC:ud                              gFIELDFLAGS:uw                  MSG_LEN(8)+nDWBWMSGDSC_TF+nBDIX_DESTY+ENWRCOM:ud\r
+ #endif\r
+\r
+    mov        (2)             gMSGSRC.0<1>:d                  gX<2;2,1>:w             {NoDDClr}\r
+    mov (1)            gMSGSRC.2:ud                    0x000f000f:ud   {NoDDChk}\r
+    \r
+#if defined(FRAME)\r
+    send (8)   gREG_WRITE_COMMIT_Y<1>:ud               mMSGHDRYW                               gMSGSRC<8;8,1>:ud               DAPWRITE        MSG_LEN(8)+nDWBWMSGDSC+nBDIX_DESTY+ENWRCOM\r
+#else\r
+    send (8)   gREG_WRITE_COMMIT_Y<1>:ud               mMSGHDRYW                               gMSGSRC<8;8,1>:ud               DAPWRITE        pMSGDSC:ud\r
+#endif\r
+\r
+#ifndef MONO\r
+       // TODO: Why did I use p0?\r
+    mov (1)            p0:w                                    nOFFSET_ERRORC:w\r
+       mov (16)        mbMSGPAYLOADC(0,0)<2>   r[p0,0]<32;16,2>:ub                             {NoDDClr}\r
+       mov (16)        mbMSGPAYLOADC(0,1)<2>   r[p0,128]<32;16,2>:ub                   {NoDDChk}\r
+       mov (16)        mbMSGPAYLOADC(1,0)<2>   r[p0,32]<32;16,2>:ub                    {NoDDClr}\r
+       mov (16)        mbMSGPAYLOADC(1,1)<2>   r[p0,128+32]<32;16,2>:ub                {NoDDChk}\r
+       mov (16)        mbMSGPAYLOADC(2,0)<2>   r[p0,64]<32;16,2>:ub                    {NoDDClr}\r
+       mov (16)        mbMSGPAYLOADC(2,1)<2>   r[p0,128+64]<32;16,2>:ub                {NoDDChk}\r
+       mov (16)        mbMSGPAYLOADC(3,0)<2>   r[p0,96]<32;16,2>:ub                    {NoDDClr}\r
+       mov (16)        mbMSGPAYLOADC(3,1)<2>   r[p0,128+96]<32;16,2>:ub                {NoDDChk}\r
+\r
+\r
+ #if defined(MBAFF)\r
+       add (1)         pMSGDSC:ud                              gFIELDFLAGS:uw                  MSG_LEN(4)+nDWBWMSGDSC+nBDIX_DESTC+ENWRCOM:ud\r
+ #elif defined(FIELD)\r
+       add (1)         pMSGDSC:ud                              gFIELDFLAGS:uw                  MSG_LEN(4)+nDWBWMSGDSC_TF+nBDIX_DESTC+ENWRCOM:ud\r
+ #endif\r
+\r
+    asr (1)            gMSGSRC.1:d                             gMSGSRC.1:d                                     1:w     {NoDDClr}\r
+    mov (1)            gMSGSRC.2:ud                    0x0007000f:ud                                   {NoDDChk} // NV12 (16x4)\r
+\r
+#if defined(FRAME)\r
+    send (8)   gREG_WRITE_COMMIT_UV<1>:ud              mMSGHDRCW                               gMSGSRC<8;8,1>:ud               DAPWRITE        MSG_LEN(4)+nDWBWMSGDSC+nBDIX_DESTC+ENWRCOM\r
+#else\r
+    send (8)   gREG_WRITE_COMMIT_UV<1>:ud              mMSGHDRCW                               gMSGSRC<8;8,1>:ud               DAPWRITE        pMSGDSC:ud\r
+#endif // defined(FRAME)\r
+\r
+#endif // !defined(MONO)\r
+\r
+\r
+//#endif       // !defined(__WRITERECON_YC__)\r
diff --git a/i965_drv_video/shaders/h264/mc/writeRecon_Y_16x8.asm b/i965_drv_video/shaders/h264/mc/writeRecon_Y_16x8.asm
new file mode 100644 (file)
index 0000000..509a2ec
--- /dev/null
@@ -0,0 +1,43 @@
+/*\r
+ * Copyright © <2010>, Intel Corporation.\r
+ *\r
+ * This program is licensed under the terms and conditions of the\r
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
+ * http://www.opensource.org/licenses/eclipse-1.0.php.\r
+ *\r
+ */\r
+// Kernel name: WriteRecon_Y_16x8.asm\r
+//\r
+//  $Revision: 10 $\r
+//  $Date: 10/03/06 5:28p $\r
+//\r
+\r
+\r
+//#if !defined(__WRITERECON_Y_16x8__)          // Make sure this is only included once\r
+//#define __WRITERECON_Y_16x8__\r
+\r
+\r
+    add (1)            p0:w                                    pERRORY:w                               -256:w\r
+    add (1)            p1:w                                    pERRORY:w                               -128:w\r
+    \r
+       $for(0; <4; 1) {\r
+    mov (16)   mbMSGPAYLOADY(%1,0)<1>  r[p0,%1*32+0]<8,2>:ub           {NoDDClr}\r
+    mov (16)   mbMSGPAYLOADY(%1,16)<1> r[p0,%1*32+16]<8,2>:ub          {NoDDChk}\r
+    }    \r
\r
+ #if defined(MBAFF)\r
+       add (1)         pMSGDSC:ud                              gFIELDFLAGS:uw                  MSG_LEN(4)+nDWBWMSGDSC+nBDIX_DESTY+ENWRCOM:ud\r
+ #elif defined(FIELD)\r
+       add (1)         pMSGDSC:ud                              gFIELDFLAGS:uw                  MSG_LEN(4)+nDWBWMSGDSC_TF+nBDIX_DESTY+ENWRCOM:ud\r
+ #endif\r
+\r
+    mov        (2)             gMSGSRC.0<1>:d                  gX<2;2,1>:w             {NoDDClr}\r
+    mov (1)            gMSGSRC.2:ud                    0x0007000f:ud   {NoDDChk}\r
+    \r
+#if defined(FRAME)\r
+    send (8)   gREG_WRITE_COMMIT_Y<1>:ud               mMSGHDRYW                               gMSGSRC<8;8,1>:ud               DAPWRITE        MSG_LEN(4)+nDWBWMSGDSC+nBDIX_DESTY+ENWRCOM\r
+#else\r
+    send (8)   gREG_WRITE_COMMIT_Y<1>:ud               mMSGHDRYW                               gMSGSRC<8;8,1>:ud               DAPWRITE        pMSGDSC:ud\r
+#endif\r
+\r
+//#endif       // !defined(__WRITERECON_Y_16x8__)\r
index 5d3d45b..8163de5 100644 (file)
@@ -35,6 +35,9 @@ define(`dst_y_uw',  `g1.10<2,4,0>UW')
 define(`screen_x0', `g1.0<0,1,0>F')
 define(`screen_y0', `g1.4<0,1,0>F')
 
+/* UV flag */
+define(`interleaved_uv', `g2.0<0,1,0>UW')
+
 /* Source transformation parameters */
 define(`src_du_dx', `g3.0<0,1,0>F')
 define(`src_du_dy', `g3.4<0,1,0>F')
index ca77b48..8cbb289 100644 (file)
@@ -34,23 +34,18 @@ include(`exa_wm.g4i')
 /* use simd16 sampler, param 0 is u, param 1 is v. */
 /* 'payload' loading, assuming tex coord start from g4 */
 
+and.nz (1) null interleaved_uv 0x01UW {align1};
+(f0) jmpi INTERLEAVED_UV;
+
 /* load r */
 mov (1) g0.8<1>UD      0x0000e000UD { align1 mask_disable };
 
 /* src_msg will be copied with g0, as it contains send desc */
 /* emit sampler 'send' cmd */
 
-/* sample Y */
-send (16) src_msg_ind          /* msg reg index */
-       src_sample_g<1>UW       /* readback */
-       g0<8,8,1>UW             /* copy to msg start reg*/
-       sampler (1,0,F)         /* sampler message description, (binding_table,sampler_index,datatype)
-                               /* here(src->dst) we should use src_sampler and src_surface */
-       mlen 5 rlen 2 { align1 };   /* required message len 5, readback len 8 */
-       
 /* sample U (Cr) */
 send (16) src_msg_ind          /* msg reg index */
-       src_sample_r<1>UW       /* readback */
+       src_sample_g<1>UW       /* readback */
        g0<8,8,1>UW             /* copy to msg start reg*/
        sampler (3,2,F)         /* sampler message description, (binding_table,sampler_index,datatype)
                                /* here(src->dst) we should use src_sampler and src_surface */
@@ -63,3 +58,30 @@ send (16) src_msg_ind                /* msg reg index */
        sampler (5,4,F)         /* sampler message description, (binding_table,sampler_index,datatype)
                                /* here(src->dst) we should use src_sampler and src_surface */
        mlen 5 rlen 2 { align1 };   /* required message len 5, readback len 8 */
+
+jmpi SAMPLE_Y;
+
+INTERLEAVED_UV:
+/* load r */
+mov (1) g0.8<1>UD      0x0000c000UD { align1 mask_disable };
+
+/* sample UV (CrCb) */
+send (16) src_msg_ind          /* msg reg index */
+       src_sample_g<1>UW       /* readback */
+       g0<8,8,1>UW             /* copy to msg start reg*/
+       sampler (3,2,F)         /* sampler message description, (binding_table,sampler_index,datatype)
+                               /* here(src->dst) we should use src_sampler and src_surface */
+       mlen 5 rlen 4 { align1 };   /* required message len 5, readback len 8 */
+
+/* load r */
+mov (1) g0.8<1>UD      0x0000e000UD { align1 mask_disable };
+
+SAMPLE_Y:
+/* sample Y */
+send (16) src_msg_ind          /* msg reg index */
+       src_sample_r<1>UW       /* readback */
+       g0<8,8,1>UW             /* copy to msg start reg*/
+       sampler (1,0,F)         /* sampler message description, (binding_table,sampler_index,datatype)
+                               /* here(src->dst) we should use src_sampler and src_surface */
+       mlen 5 rlen 2 { align1 };   /* required message len 5, readback len 8 */
+       
index 77a5c23..94f2f3b 100644 (file)
@@ -1,4 +1,10 @@
+   { 0x02000005, 0x20002d3c, 0x00000040, 0x00010001 },
+   { 0x00010020, 0x34001c00, 0x00001400, 0x00000004 },
    { 0x00000201, 0x20080061, 0x00000000, 0x0000e000 },
-   { 0x01800031, 0x22001d29, 0x008d0000, 0x02520001 },
-   { 0x01800031, 0x21c01d29, 0x008d0000, 0x02520203 },
+   { 0x01800031, 0x22001d29, 0x008d0000, 0x02520203 },
    { 0x01800031, 0x22401d29, 0x008d0000, 0x02520405 },
+   { 0x00000020, 0x34001c00, 0x00001400, 0x00000003 },
+   { 0x00000201, 0x20080061, 0x00000000, 0x0000c000 },
+   { 0x01800031, 0x22001d29, 0x008d0000, 0x02540203 },
+   { 0x00000201, 0x20080061, 0x00000000, 0x0000e000 },
+   { 0x01800031, 0x21c01d29, 0x008d0000, 0x02520001 },
index a381e68..c645723 100644 (file)
@@ -1,4 +1,10 @@
+   { 0x02000005, 0x20002d3c, 0x00000040, 0x00010001 },
+   { 0x00010020, 0x34001c00, 0x00001400, 0x00000008 },
    { 0x00000201, 0x20080061, 0x00000000, 0x0000e000 },
-   { 0x01800031, 0x22001d29, 0x208d0000, 0x0a2a0001 },
-   { 0x01800031, 0x21c01d29, 0x208d0000, 0x0a2a0203 },
+   { 0x01800031, 0x22001d29, 0x208d0000, 0x0a2a0203 },
    { 0x01800031, 0x22401d29, 0x208d0000, 0x0a2a0405 },
+   { 0x00000020, 0x34001c00, 0x00001400, 0x00000006 },
+   { 0x00000201, 0x20080061, 0x00000000, 0x0000c000 },
+   { 0x01800031, 0x22001d29, 0x208d0000, 0x0a4a0203 },
+   { 0x00000201, 0x20080061, 0x00000000, 0x0000e000 },
+   { 0x01800031, 0x21c01d29, 0x208d0000, 0x0a2a0001 },
index c16037e..5b9e625 100644 (file)
@@ -30,25 +30,25 @@ include(`exa_wm.g4i')
 
 define(`YCbCr_base',   `src_sample_base')
 
-define(`Cr',           `src_sample_r')
-define(`Cr_01',                `src_sample_r_01')
-define(`Cr_23',                `src_sample_r_23')
+define(`Cr',           `src_sample_b')
+define(`Cr_01',                `src_sample_b_01')
+define(`Cr_23',                `src_sample_b_23')
 
-define(`Y',            `src_sample_g')
-define(`Y_01',         `src_sample_g_01')
-define(`Y_23',         `src_sample_g_23')
+define(`Y',            `src_sample_r')
+define(`Y_01',         `src_sample_r_01')
+define(`Y_23',         `src_sample_r_23')
 
-define(`Cb',           `src_sample_b')
-define(`Cb_01',                `src_sample_b_01')
-define(`Cb_23',                `src_sample_b_23')
+define(`Cb',           `src_sample_g')
+define(`Cb_01',                `src_sample_g_01')
+define(`Cb_23',                `src_sample_g_23')
 
-define(`Crn',          `mask_sample_r')
-define(`Crn_01',       `mask_sample_r_01')
-define(`Crn_23',       `mask_sample_r_23')
+define(`Crn',          `mask_sample_g')
+define(`Crn_01',       `mask_sample_g_01')
+define(`Crn_23',       `mask_sample_g_23')
 
-define(`Yn',           `mask_sample_g')
-define(`Yn_01',                `mask_sample_g_01')
-define(`Yn_23',                `mask_sample_g_23')
+define(`Yn',           `mask_sample_r')
+define(`Yn_01',                `mask_sample_r_01')
+define(`Yn_23',                `mask_sample_r_23')
 
 define(`Cbn',          `mask_sample_b')
 define(`Cbn_01',       `mask_sample_b_01')
index ce19e1a..6a76da4 100644 (file)
@@ -1,11 +1,11 @@
-   { 0x00802040, 0x23007fbd, 0x008d0200, 0xbd808081 },
-   { 0x00802041, 0x23007fbd, 0x008d0300, 0x3f94fdf4 },
-   { 0x00802040, 0x22c07fbd, 0x008d01c0, 0xbf008084 },
-   { 0x00802040, 0x23407fbd, 0x008d0240, 0xbf008084 },
-   { 0x00802001, 0x240003bc, 0x008d0300, 0x00000000 },
-   { 0x80802048, 0x21c07fbd, 0x008d02c0, 0x3fcc49ba },
-   { 0x00802001, 0x240003bc, 0x008d0300, 0x00000000 },
-   { 0x00802048, 0x24007fbc, 0x008d02c0, 0xbf5020c5 },
+   { 0x00802040, 0x22c07fbd, 0x008d01c0, 0xbd808081 },
+   { 0x00802041, 0x22c07fbd, 0x008d02c0, 0x3f94fdf4 },
+   { 0x00802040, 0x23007fbd, 0x008d0240, 0xbf008084 },
+   { 0x00802040, 0x23407fbd, 0x008d0200, 0xbf008084 },
+   { 0x00802001, 0x240003bc, 0x008d02c0, 0x00000000 },
+   { 0x80802048, 0x21c07fbd, 0x008d0300, 0x3fcc49ba },
+   { 0x00802001, 0x240003bc, 0x008d02c0, 0x00000000 },
+   { 0x00802048, 0x24007fbc, 0x008d0300, 0xbf5020c5 },
    { 0x80802048, 0x22007fbd, 0x008d0340, 0xbec8b439 },
-   { 0x00802001, 0x240003bc, 0x008d0300, 0x00000000 },
+   { 0x00802001, 0x240003bc, 0x008d02c0, 0x00000000 },
    { 0x80802048, 0x22407fbd, 0x008d0340, 0x40011687 },
index ce19e1a..6a76da4 100644 (file)
@@ -1,11 +1,11 @@
-   { 0x00802040, 0x23007fbd, 0x008d0200, 0xbd808081 },
-   { 0x00802041, 0x23007fbd, 0x008d0300, 0x3f94fdf4 },
-   { 0x00802040, 0x22c07fbd, 0x008d01c0, 0xbf008084 },
-   { 0x00802040, 0x23407fbd, 0x008d0240, 0xbf008084 },
-   { 0x00802001, 0x240003bc, 0x008d0300, 0x00000000 },
-   { 0x80802048, 0x21c07fbd, 0x008d02c0, 0x3fcc49ba },
-   { 0x00802001, 0x240003bc, 0x008d0300, 0x00000000 },
-   { 0x00802048, 0x24007fbc, 0x008d02c0, 0xbf5020c5 },
+   { 0x00802040, 0x22c07fbd, 0x008d01c0, 0xbd808081 },
+   { 0x00802041, 0x22c07fbd, 0x008d02c0, 0x3f94fdf4 },
+   { 0x00802040, 0x23007fbd, 0x008d0240, 0xbf008084 },
+   { 0x00802040, 0x23407fbd, 0x008d0200, 0xbf008084 },
+   { 0x00802001, 0x240003bc, 0x008d02c0, 0x00000000 },
+   { 0x80802048, 0x21c07fbd, 0x008d0300, 0x3fcc49ba },
+   { 0x00802001, 0x240003bc, 0x008d02c0, 0x00000000 },
+   { 0x00802048, 0x24007fbc, 0x008d0300, 0xbf5020c5 },
    { 0x80802048, 0x22007fbd, 0x008d0340, 0xbec8b439 },
-   { 0x00802001, 0x240003bc, 0x008d0300, 0x00000000 },
+   { 0x00802001, 0x240003bc, 0x008d02c0, 0x00000000 },
    { 0x80802048, 0x22407fbd, 0x008d0340, 0x40011687 },