+#include "util.h"
+
+/*
+ * GOB (Group Of Bytes) is the basic unit of the blocklinear layout.
+ * GOBs are arranged to blocks, where the height of the block (measured
+ * in GOBs) is configurable.
+ */
+#define NV_BLOCKLINEAR_GOB_HEIGHT 8
+#define NV_BLOCKLINEAR_GOB_WIDTH 64
+#define NV_DEFAULT_BLOCK_HEIGHT_LOG2 4
+#define NV_PREFERRED_PAGE_SIZE (128 * 1024)
+
+// clang-format off
+enum nv_mem_kind
+{
+ NV_MEM_KIND_PITCH = 0,
+ NV_MEM_KIND_C32_2CRA = 0xdb,
+ NV_MEM_KIND_GENERIC_16Bx2 = 0xfe,
+};
+
+enum tegra_map_type {
+ TEGRA_READ_TILED_BUFFER = 0,
+ TEGRA_WRITE_TILED_BUFFER = 1,
+};
+// clang-format on
+
+struct tegra_private_map_data {
+ void *tiled;
+ void *untiled;
+};
+
+static const uint32_t render_target_formats[] = { DRM_FORMAT_ARGB8888, DRM_FORMAT_XRGB8888 };
+
+static int compute_block_height_log2(int height)
+{
+ int block_height_log2 = NV_DEFAULT_BLOCK_HEIGHT_LOG2;
+
+ if (block_height_log2 > 0) {
+ /* Shrink, if a smaller block height could cover the whole
+ * surface height. */
+ int proposed = NV_BLOCKLINEAR_GOB_HEIGHT << (block_height_log2 - 1);
+ while (proposed >= height) {
+ block_height_log2--;
+ if (block_height_log2 == 0)
+ break;
+ proposed /= 2;
+ }
+ }
+ return block_height_log2;
+}
+
+static void compute_layout_blocklinear(int width, int height, int format, enum nv_mem_kind *kind,
+ uint32_t *block_height_log2, uint32_t *stride,
+ uint32_t *size)
+{
+ int pitch = drv_stride_from_format(format, width, 0);
+
+ /* Align to blocklinear blocks. */
+ pitch = ALIGN(pitch, NV_BLOCKLINEAR_GOB_WIDTH);
+
+ /* Compute padded height. */
+ *block_height_log2 = compute_block_height_log2(height);
+ int block_height = 1 << *block_height_log2;
+ int padded_height = ALIGN(height, NV_BLOCKLINEAR_GOB_HEIGHT * block_height);
+
+ int bytes = pitch * padded_height;
+
+ /* Pad the allocation to the preferred page size.
+ * This will reduce the required page table size (see discussion in NV
+ * bug 1321091), and also acts as a WAR for NV bug 1325421.
+ */
+ bytes = ALIGN(bytes, NV_PREFERRED_PAGE_SIZE);
+
+ *kind = NV_MEM_KIND_C32_2CRA;
+ *stride = pitch;
+ *size = bytes;
+}
+
+static void compute_layout_linear(int width, int height, int format, uint32_t *stride,
+ uint32_t *size)
+{
+ *stride = ALIGN(drv_stride_from_format(format, width, 0), 64);
+ *size = *stride * height;
+}
+
+static void transfer_tile(struct bo *bo, uint8_t *tiled, uint8_t *untiled, enum tegra_map_type type,
+ uint32_t bytes_per_pixel, uint32_t gob_top, uint32_t gob_left,
+ uint32_t gob_size_pixels, uint8_t *tiled_last)
+{
+ uint8_t *tmp;
+ uint32_t x, y, k;
+ for (k = 0; k < gob_size_pixels; k++) {
+ /*
+ * Given the kth pixel starting from the tile specified by
+ * gob_top and gob_left, unswizzle to get the standard (x, y)
+ * representation.
+ */
+ x = gob_left + (((k >> 3) & 8) | ((k >> 1) & 4) | (k & 3));
+ y = gob_top + ((k >> 7 << 3) | ((k >> 3) & 6) | ((k >> 2) & 1));
+
+ if (tiled >= tiled_last)
+ return;
+
+ if (x >= bo->width || y >= bo->height) {
+ tiled += bytes_per_pixel;
+ continue;
+ }
+
+ tmp = untiled + y * bo->strides[0] + x * bytes_per_pixel;