lib/pixops.hpp

   1 /* This file is part of MyPaint.
   2  * Copyright (C) 2008-2009 by Martin Renold <martinxyz@gmx.ch>
   3  *
   4  * This program is free software; you can redistribute it and/or modify
   5  * it under the terms of the GNU General Public License as published by
   6  * the Free Software Foundation; either version 2 of the License, or
   7  * (at your option) any later version.
   8  */
   9
  10 // downscale a tile to half its size using bilinear interpolation
  11 // used mainly for generating background mipmaps
  12 void tile_downscale_rgb16(PyObject *src, PyObject *dst, int dst_x, int dst_y) {
  13 #ifdef HEAVY_DEBUG
  14   assert(PyArray_DIM(src, 0) == TILE_SIZE);
  15   assert(PyArray_DIM(src, 1) == TILE_SIZE);
  16   assert(PyArray_TYPE(src) == NPY_UINT16);
  17   assert(PyArray_ISCARRAY(src));
  18
  19   assert(PyArray_TYPE(dst) == NPY_UINT16);
  20   assert(PyArray_ISCARRAY(dst));
  21 #endif
  22
  23   PyArrayObject* src_arr = ((PyArrayObject*)src);
  24   PyArrayObject* dst_arr = ((PyArrayObject*)dst);
  25
  26   for (int y=0; y<TILE_SIZE/2; y++) {
  27     uint16_t * src_p = (uint16_t*)(src_arr->data + (2*y)*src_arr->strides[0]);
  28     uint16_t * dst_p = (uint16_t*)(dst_arr->data + (y+dst_y)*dst_arr->strides[0]);
  29     dst_p += 3*dst_x;
  30     for(int x=0; x<TILE_SIZE/2; x++) {
  31       dst_p[0] = src_p[0]/4 + (src_p+3)[0]/4 + (src_p+3*TILE_SIZE)[0]/4 + (src_p+3*TILE_SIZE+3)[0]/4;
  32       dst_p[1] = src_p[1]/4 + (src_p+3)[1]/4 + (src_p+3*TILE_SIZE)[1]/4 + (src_p+3*TILE_SIZE+3)[1]/4;
  33       dst_p[2] = src_p[2]/4 + (src_p+3)[2]/4 + (src_p+3*TILE_SIZE)[2]/4 + (src_p+3*TILE_SIZE+3)[2]/4;
  34       src_p += 6;
  35       dst_p += 3;
  36     }
  37   }
  38 }
  39 // downscale a tile to half its size using bilinear interpolation
  40 // used mainly for generating tiledsurface mipmaps
  41 void tile_downscale_rgba16(PyObject *src, PyObject *dst, int dst_x, int dst_y) {
  42 #ifdef HEAVY_DEBUG
  43   assert(PyArray_DIM(src, 0) == TILE_SIZE);
  44   assert(PyArray_DIM(src, 1) == TILE_SIZE);
  45   assert(PyArray_TYPE(src) == NPY_UINT16);
  46   assert(PyArray_ISCARRAY(src));
  47
  48   assert(PyArray_DIM(dst, 0) == TILE_SIZE);
  49   assert(PyArray_DIM(dst, 1) == TILE_SIZE);
  50   assert(PyArray_TYPE(dst) == NPY_UINT16);
  51   assert(PyArray_ISCARRAY(dst));
  52 #endif
  53
  54   PyArrayObject* src_arr = ((PyArrayObject*)src);
  55   PyArrayObject* dst_arr = ((PyArrayObject*)dst);
  56
  57   for (int y=0; y<TILE_SIZE/2; y++) {
  58     uint16_t * src_p = (uint16_t*)(src_arr->data + (2*y)*src_arr->strides[0]);
  59     uint16_t * dst_p = (uint16_t*)(dst_arr->data + (y+dst_y)*dst_arr->strides[0]);
  60     dst_p += 4*dst_x;
  61     for(int x=0; x<TILE_SIZE/2; x++) {
  62       dst_p[0] = src_p[0]/4 + (src_p+4)[0]/4 + (src_p+4*TILE_SIZE)[0]/4 + (src_p+4*TILE_SIZE+4)[0]/4;
  63       dst_p[1] = src_p[1]/4 + (src_p+4)[1]/4 + (src_p+4*TILE_SIZE)[1]/4 + (src_p+4*TILE_SIZE+4)[1]/4;
  64       dst_p[2] = src_p[2]/4 + (src_p+4)[2]/4 + (src_p+4*TILE_SIZE)[2]/4 + (src_p+4*TILE_SIZE+4)[2]/4;
  65       dst_p[3] = src_p[3]/4 + (src_p+4)[3]/4 + (src_p+4*TILE_SIZE)[3]/4 + (src_p+4*TILE_SIZE+4)[3]/4;
  66       src_p += 8;
  67       dst_p += 4;
  68     }
  69   }
  70 }
  71
  72 void tile_composite_rgba16_over_rgb16(PyObject * src, PyObject * dst, float alpha) {
  73 #ifdef HEAVY_DEBUG
  74   assert(PyArray_DIM(src, 0) == TILE_SIZE);
  75   assert(PyArray_DIM(src, 1) == TILE_SIZE);
  76   assert(PyArray_DIM(src, 2) == 4);
  77   assert(PyArray_TYPE(src) == NPY_UINT16);
  78   assert(PyArray_ISCARRAY(src));
  79
  80   assert(PyArray_DIM(dst, 0) == TILE_SIZE);
  81   assert(PyArray_DIM(dst, 1) == TILE_SIZE);
  82   assert(PyArray_DIM(dst, 2) == 3);
  83   assert(PyArray_TYPE(dst) == NPY_UINT16);
  84   assert(PyArray_ISBEHAVED(dst));
  85 #endif
  86
  87   PyArrayObject* dst_arr = ((PyArrayObject*)dst);
  88 #ifdef HEAVY_DEBUG
  89   assert(dst_arr->strides[1] == 3*sizeof(uint16_t));
  90   assert(dst_arr->strides[2] ==   sizeof(uint16_t));
  91 #endif
  92
  93   uint32_t opac  = alpha * (1<<15) + 0.5;
  94   opac = CLAMP(opac, 0, 1<<15);
  95   if (opac == 0) return;
  96
  97   uint16_t * src_p  = (uint16_t*)((PyArrayObject*)src)->data;
  98   char * p = dst_arr->data;
  99   for (int y=0; y<TILE_SIZE; y++) {
 100     uint16_t  * dst_p  = (uint16_t*) (p);
 101     for (int x=0; x<TILE_SIZE; x++) {
 102       // resultAlpha = 1.0 (thus it does not matter if resultColor is premultiplied alpha or not)
 103       // resultColor = topColor + (1.0 - topAlpha) * bottomColor
 104       const uint16_t srcAlpha = CLAMP((uint32_t)(opac * src_p[3]) / (1<<15), 0, 1<<15);
 105       const uint32_t one_minus_topAlpha = (1<<15) - srcAlpha;
 106       dst_p[0] = ((uint32_t)src_p[0]*opac + one_minus_topAlpha*dst_p[0]) / (1<<15);
 107       dst_p[1] = ((uint32_t)src_p[1]*opac + one_minus_topAlpha*dst_p[1]) / (1<<15);
 108       dst_p[2] = ((uint32_t)src_p[2]*opac + one_minus_topAlpha*dst_p[2]) / (1<<15);
 109       src_p += 4;
 110       dst_p += 3;
 111     }
 112     p += dst_arr->strides[0];
 113   }
 114 }
 115
 116 void tile_composite_rgba16_multiply_rgb16(PyObject * src, PyObject * dst, float alpha) {
 117 #ifdef HEAVY_DEBUG
 118   assert(PyArray_DIM(src, 0) == TILE_SIZE);
 119   assert(PyArray_DIM(src, 1) == TILE_SIZE);
 120   assert(PyArray_DIM(src, 2) == 4);
 121   assert(PyArray_TYPE(src) == NPY_UINT16);
 122   assert(PyArray_ISCARRAY(src));
 123
 124   assert(PyArray_DIM(dst, 0) == TILE_SIZE);
 125   assert(PyArray_DIM(dst, 1) == TILE_SIZE);
 126   assert(PyArray_DIM(dst, 2) == 3);
 127   assert(PyArray_TYPE(dst) == NPY_UINT16);
 128   assert(PyArray_ISBEHAVED(dst));
 129 #endif
 130
 131   PyArrayObject* dst_arr = ((PyArrayObject*)dst);
 132 #ifdef HEAVY_DEBUG
 133   assert(dst_arr->strides[1] == 3*sizeof(uint16_t));
 134   assert(dst_arr->strides[2] ==   sizeof(uint16_t));
 135 #endif
 136
 137   uint32_t opac  = alpha * (1<<15) + 0.5;
 138   opac = CLAMP(opac, 0, 1<<15);
 139   if (opac == 0) return;
 140
 141   uint16_t * src_p  = (uint16_t*)((PyArrayObject*)src)->data;
 142   char * p = dst_arr->data;
 143   for (int y=0; y<TILE_SIZE; y++) {
 144     uint16_t  * dst_p  = (uint16_t*) (p);
 145     for (int x=0; x<TILE_SIZE; x++) {
 146       // resultAlpha = 1.0 (thus it does not matter if resultColor is premultiplied alpha or not)
 147       // resultColor = topColor + (1.0 - topAlpha) * bottomColor
 148       const uint32_t one_minus_topAlpha = (1<<15) - src_p[3]*opac/(1<<15);
 149       const uint32_t src_col0 = ((uint32_t) src_p[0] * opac) >> 15;
 150       const uint32_t src_col1 = ((uint32_t) src_p[1] * opac) >> 15;
 151       const uint32_t src_col2 = ((uint32_t) src_p[2] * opac) >> 15;
 152       dst_p[0] = ((uint32_t)src_col0*dst_p[0] + one_minus_topAlpha*dst_p[0]) / (1<<15);
 153       dst_p[1] = ((uint32_t)src_col1*dst_p[1] + one_minus_topAlpha*dst_p[1]) / (1<<15);
 154       dst_p[2] = ((uint32_t)src_col2*dst_p[2] + one_minus_topAlpha*dst_p[2]) / (1<<15);
 155       src_p += 4;
 156       dst_p += 3;
 157     }
 158     p += dst_arr->strides[0];
 159   }
 160 }
 161
 162 void tile_composite_rgba16_screen_rgb16(PyObject * src, PyObject * dst, float alpha) {
 163 #ifdef HEAVY_DEBUG
 164   assert(PyArray_DIM(src, 0) == TILE_SIZE);
 165   assert(PyArray_DIM(src, 1) == TILE_SIZE);
 166   assert(PyArray_DIM(src, 2) == 4);
 167   assert(PyArray_TYPE(src) == NPY_UINT16);
 168   assert(PyArray_ISCARRAY(src));
 169
 170   assert(PyArray_DIM(dst, 0) == TILE_SIZE);
 171   assert(PyArray_DIM(dst, 1) == TILE_SIZE);
 172   assert(PyArray_DIM(dst, 2) == 3);
 173   assert(PyArray_TYPE(dst) == NPY_UINT16);
 174   assert(PyArray_ISBEHAVED(dst));
 175 #endif
 176
 177   PyArrayObject* dst_arr = ((PyArrayObject*)dst);
 178 #ifdef HEAVY_DEBUG
 179   assert(dst_arr->strides[1] == 3*sizeof(uint16_t));
 180   assert(dst_arr->strides[2] ==   sizeof(uint16_t));
 181 #endif
 182
 183   uint32_t opac  = alpha * (1<<15) + 0.5;
 184   opac = CLAMP(opac, 0, 1<<15);
 185   if (opac == 0) return;
 186
 187   uint16_t * src_p  = (uint16_t*)((PyArrayObject*)src)->data;
 188   char * p = dst_arr->data;
 189   for (int y=0; y<TILE_SIZE; y++) {
 190     uint16_t  * dst_p  = (uint16_t*) (p);
 191     for (int x=0; x<TILE_SIZE; x++) {
 192       // resultAlpha = 1.0 (thus it does not matter if resultColor is premultiplied alpha or not)
 193       // resultColor = topColor + (1.0 - topAlpha) * bottomColor
 194       const uint32_t col0   = ((uint32_t)src_p[0]*opac) + (((uint32_t)dst_p[0]) << 15);
 195       const uint32_t col1   = ((uint32_t)src_p[1]*opac) + (((uint32_t)dst_p[1]) << 15);
 196       const uint32_t col2   = ((uint32_t)src_p[2]*opac) + (((uint32_t)dst_p[2]) << 15);
 197       const uint32_t src_col0 = ((uint32_t)src_p[0] * opac) >> 15;
 198       const uint32_t src_col1 = ((uint32_t)src_p[1] * opac) >> 15;
 199       const uint32_t src_col2 = ((uint32_t)src_p[2] * opac) >> 15;
 200       dst_p[0] = (col0 - ((uint32_t)src_col0*dst_p[0])) / (1<<15);
 201       dst_p[1] = (col1 - ((uint32_t)src_col1*dst_p[1])) / (1<<15);
 202       dst_p[2] = (col2 - ((uint32_t)src_col2*dst_p[2])) / (1<<15);
 203       src_p += 4;
 204       dst_p += 3;
 205     }
 206     p += dst_arr->strides[0];
 207   }
 208 }
 209
 210 void tile_composite_rgba16_dodge_rgb16(PyObject * src, PyObject * dst, float alpha) {
 211 #ifdef HEAVY_DEBUG
 212   assert(PyArray_DIM(src, 0) == TILE_SIZE);
 213   assert(PyArray_DIM(src, 1) == TILE_SIZE);
 214   assert(PyArray_DIM(src, 2) == 4);
 215   assert(PyArray_TYPE(src) == NPY_UINT16);
 216   assert(PyArray_ISCARRAY(src));
 217
 218   assert(PyArray_DIM(dst, 0) == TILE_SIZE);
 219   assert(PyArray_DIM(dst, 1) == TILE_SIZE);
 220   assert(PyArray_DIM(dst, 2) == 3);
 221   assert(PyArray_TYPE(dst) == NPY_UINT16);
 222   assert(PyArray_ISBEHAVED(dst));
 223 #endif
 224
 225   PyArrayObject* dst_arr = ((PyArrayObject*)dst);
 226 #ifdef HEAVY_DEBUG
 227   assert(dst_arr->strides[1] == 3*sizeof(uint16_t));
 228   assert(dst_arr->strides[2] ==   sizeof(uint16_t));
 229 #endif
 230
 231   uint32_t opac  = alpha * (1<<15) + 0.5;
 232   opac = CLAMP(opac, 0, 1<<15);
 233   if (opac == 0) return;
 234
 235   uint16_t * src_p  = (uint16_t*)((PyArrayObject*)src)->data;
 236   char * p = dst_arr->data;
 237   for (int y=0; y<TILE_SIZE; y++) {
 238     uint16_t  * dst_p  = (uint16_t*) (p);
 239     for (int x=0; x<TILE_SIZE; x++) {
 240       // resultAlpha = 1.0 (thus it does not matter if resultColor is premultiplied alpha or not)
 241       // resultColor = topColor + (1.0 - topAlpha) * bottomColor
 242       const uint32_t topAlpha32 = CLAMP((uint32_t)src_p[3]*opac,0,1<<30);
 243       const uint32_t topAlpha   = CLAMP((topAlpha32 >> 15), 0, 1<<15);
 244       const uint32_t one_minus_topAlpha = (1<<15) - topAlpha;
 245       for (int c=0; c < 3; c++) {
 246         const uint32_t topAlpha_minus_src = topAlpha32 - (uint32_t)src_p[c]*opac;
 247         if ((topAlpha_minus_src >> 15) == 0 && dst_p[c] == 0) {
 248           dst_p[c] = 0;
 249         } else if ((topAlpha_minus_src >> 15) == 0) {
 250           dst_p[c] = CLAMP((topAlpha32 + (uint32_t)dst_p[c] * one_minus_topAlpha)>>15, 0, (1<<15));
 251         } else {
 252           const uint32_t dst_times_topAlpha = (uint32_t)dst_p[c]*topAlpha;
 253           if (dst_times_topAlpha > topAlpha_minus_src)
 254             dst_p[c] = CLAMP((topAlpha32 + (uint32_t)dst_p[c]*one_minus_topAlpha) >> 15, 0, 1<<15);
 255           else
 256             dst_p[c] = CLAMP((uint32_t)topAlpha * (dst_times_topAlpha >> 15)/ (topAlpha_minus_src >> 15) + ((uint32_t)dst_p[c]*one_minus_topAlpha >> 15), 0, 1<<15);
 257         }
 258       }
 259       src_p += 4;
 260       dst_p += 3;
 261     }
 262     p += dst_arr->strides[0];
 263   }
 264 }
 265
 266 void tile_composite_rgba16_burn_rgb16(PyObject * src, PyObject * dst, float alpha) {
 267 #ifdef HEAVY_DEBUG
 268   assert(PyArray_DIM(src, 0) == TILE_SIZE);
 269   assert(PyArray_DIM(src, 1) == TILE_SIZE);
 270   assert(PyArray_DIM(src, 2) == 4);
 271   assert(PyArray_TYPE(src) == NPY_UINT16);
 272   assert(PyArray_ISCARRAY(src));
 273
 274   assert(PyArray_DIM(dst, 0) == TILE_SIZE);
 275   assert(PyArray_DIM(dst, 1) == TILE_SIZE);
 276   assert(PyArray_DIM(dst, 2) == 3);
 277   assert(PyArray_TYPE(dst) == NPY_UINT16);
 278   assert(PyArray_ISBEHAVED(dst));
 279 #endif
 280
 281   PyArrayObject* dst_arr = ((PyArrayObject*)dst);
 282 #ifdef HEAVY_DEBUG
 283   assert(dst_arr->strides[1] == 3*sizeof(uint16_t));
 284   assert(dst_arr->strides[2] ==   sizeof(uint16_t));
 285 #endif
 286
 287   uint32_t opac  = alpha * (1<<15) + 0.5;
 288   opac = CLAMP(opac, 0, 1<<15);
 289   if (opac == 0) return;
 290
 291   uint16_t * src_p  = (uint16_t*)((PyArrayObject*)src)->data;
 292   char * p = dst_arr->data;
 293   for (int y=0; y<TILE_SIZE; y++) {
 294     uint16_t  * dst_p  = (uint16_t*) (p);
 295     for (int x=0; x<TILE_SIZE; x++) {
 296       // resultAlpha = 1.0 (thus it does not matter if resultColor is premultiplied alpha or not)
 297       // resultColor = topColor + (1.0 - topAlpha) * bottomColor
 298       const uint32_t topAlpha32 = (uint32_t)src_p[3]*opac;
 299       const uint32_t topAlpha   = topAlpha32 >> 15;
 300       const uint32_t one_minus_topAlpha = (1<<15) - topAlpha;
 301       for (int c=0; c<3; c++) {
 302         const uint32_t src_col32 = (uint32_t)src_p[c] * opac;
 303         const uint32_t src_col   = src_col32 >> 15;
 304         if (src_col == 0 && dst_p[c] >= (1 << 15) - 1) {
 305           dst_p[c] = 1<<15;
 306         } else if (src_col == 0) {
 307           dst_p[c] = (dst_p[c] * one_minus_topAlpha)/(1<<15);
 308         } else {
 309           const uint32_t one_minus_dstcol = (1<<15) - dst_p[c];
 310           if (one_minus_dstcol * topAlpha > src_col32)
 311             dst_p[c] = (one_minus_topAlpha * dst_p[c]) / (1<<15);
 312           else {
 313             const uint32_t min_value = one_minus_dstcol * topAlpha / src_col;
 314             dst_p[c] = CLAMP((topAlpha32 - topAlpha * min_value + one_minus_topAlpha * dst_p[c]) / (1<<15), 0, 1<<15);
 315           }
 316         }
 317       }
 318       src_p += 4;
 319       dst_p += 3;
 320     }
 321     p += dst_arr->strides[0];
 322   }
 323 }
 324
 325 // used to copy the background before starting to composite over it
 326 //
 327 // simply array copying (numpy assignment operator is about 13 times slower, sadly)
 328 // The above comment is true when the array is sliced; it's only about two
 329 // times faster now, in the current usecae.
 330 void tile_blit_rgb16_into_rgb16(PyObject * src, PyObject * dst) {
 331   PyArrayObject* src_arr = ((PyArrayObject*)src);
 332   PyArrayObject* dst_arr = ((PyArrayObject*)dst);
 333
 334 #ifdef HEAVY_DEBUG
 335   assert(PyArray_DIM(dst, 0) == TILE_SIZE);
 336   assert(PyArray_DIM(dst, 1) == TILE_SIZE);
 337   assert(PyArray_DIM(dst, 2) == 3);
 338   assert(PyArray_TYPE(dst) == NPY_UINT16);
 339   assert(PyArray_ISCARRAY(dst));
 340   assert(dst_arr->strides[1] == 3*sizeof(uint16_t));
 341   assert(dst_arr->strides[2] ==   sizeof(uint16_t));
 342
 343   assert(PyArray_DIM(src, 0) == TILE_SIZE);
 344   assert(PyArray_DIM(src, 1) == TILE_SIZE);
 345   assert(PyArray_DIM(src, 2) == 3);
 346   assert(PyArray_TYPE(src) == NPY_UINT16);
 347   assert(PyArray_ISCARRAY(dst));
 348   assert(src_arr->strides[1] == 3*sizeof(uint16_t));
 349   assert(src_arr->strides[2] ==   sizeof(uint16_t));
 350 #endif
 351
 352   memcpy(dst_arr->data, src_arr->data, TILE_SIZE*TILE_SIZE*3*sizeof(uint16_t));
 353   /* the code below can be used if it is not ISCARRAY, but only ISBEHAVED:
 354   char * src_p = src_arr->data;
 355   char * dst_p = dst_arr->data;
 356   for (int y=0; y<TILE_SIZE; y++) {
 357     memcpy(dst_p, src_p, TILE_SIZE*3);
 358     src_p += src_arr->strides[0];
 359     dst_p += dst_arr->strides[0];
 360   }
 361   */
 362 }
 363
 364 void tile_clear(PyObject * dst) {
 365   PyArrayObject* dst_arr = ((PyArrayObject*)dst);
 366
 367 #ifdef HEAVY_DEBUG
 368   assert(PyArray_DIM(dst, 0) == TILE_SIZE);
 369   assert(PyArray_DIM(dst, 1) == TILE_SIZE);
 370   assert(PyArray_TYPE(dst) == NPY_UINT8);
 371   assert(PyArray_ISBEHAVED(dst));
 372   assert(dst_arr->strides[1] <= 8);
 373 #endif
 374
 375   for (int y=0; y<TILE_SIZE; y++) {
 376     uint8_t  * dst_p = (uint8_t*)(dst_arr->data + y*dst_arr->strides[0]);
 377     memset(dst_p, 0, TILE_SIZE*dst_arr->strides[1]);
 378     dst_p += dst_arr->strides[0];
 379   }
 380 }
 381
 382 // noise used for dithering (the same for each tile)
 383 static const int dithering_noise_size = 64*64*2;
 384 static uint16_t dithering_noise[dithering_noise_size];
 385 static void precalculate_dithering_noise_if_required()
 386 {
 387   static bool have_noise = false;
 388   if (!have_noise) {
 389     // let's make some noise
 390     for (int i=0; i<dithering_noise_size; i++) {
 391       // random number in range [0.03 .. 0.97] * (1<<15)
 392       //
 393       // We could use the full range, but like this it is much easier
 394       // to guarantee 8bpc load-save roundtrips don't alter the
 395       // image. With the full range we would have to pay a lot
 396       // attention to rounding converting 8bpc to our internal format.
 397       dithering_noise[i] = (rand() % (1<<15)) * 240/256 + (1<<15) * 8/256;
 398     }
 399     have_noise = true;
 400   }
 401 }
 402
 403 // used mainly for saving layers (transparent PNG)
 404 void tile_convert_rgba16_to_rgba8(PyObject * src, PyObject * dst) {
 405   PyArrayObject* src_arr = ((PyArrayObject*)src);
 406   PyArrayObject* dst_arr = ((PyArrayObject*)dst);
 407
 408 #ifdef HEAVY_DEBUG
 409   assert(PyArray_DIM(dst, 0) == TILE_SIZE);
 410   assert(PyArray_DIM(dst, 1) == TILE_SIZE);
 411   assert(PyArray_DIM(dst, 2) == 4);
 412   assert(PyArray_TYPE(dst) == NPY_UINT8);
 413   assert(PyArray_ISBEHAVED(dst));
 414   assert(dst_arr->strides[1] == 4*sizeof(uint8_t));
 415   assert(dst_arr->strides[2] ==   sizeof(uint8_t));
 416
 417   assert(PyArray_DIM(src, 0) == TILE_SIZE);
 418   assert(PyArray_DIM(src, 1) == TILE_SIZE);
 419   assert(PyArray_DIM(src, 2) == 4);
 420   assert(PyArray_TYPE(src) == NPY_UINT16);
 421   assert(PyArray_ISBEHAVED(src));
 422   assert(src_arr->strides[1] == 4*sizeof(uint16_t));
 423   assert(src_arr->strides[2] ==   sizeof(uint16_t));
 424 #endif
 425
 426   precalculate_dithering_noise_if_required();
 427   int noise_idx = 0;
 428
 429   for (int y=0; y<TILE_SIZE; y++) {
 430     uint16_t * src_p = (uint16_t*)(src_arr->data + y*src_arr->strides[0]);
 431     uint8_t  * dst_p = (uint8_t*)(dst_arr->data + y*dst_arr->strides[0]);
 432     for (int x=0; x<TILE_SIZE; x++) {
 433       uint32_t r, g, b, a;
 434       r = *src_p++;
 435       g = *src_p++;
 436       b = *src_p++;
 437       a = *src_p++;
 438 #ifdef HEAVY_DEBUG
 439       assert(a<=(1<<15));
 440       assert(r<=(1<<15));
 441       assert(g<=(1<<15));
 442       assert(b<=(1<<15));
 443       assert(r<=a);
 444       assert(g<=a);
 445       assert(b<=a);
 446 #endif
 447       // un-premultiply alpha (with rounding)
 448       if (a != 0) {
 449         r = ((r << 15) + a/2) / a;
 450         g = ((g << 15) + a/2) / a;
 451         b = ((b << 15) + a/2) / a;
 452       } else {
 453         r = g = b = 0;
 454       }
 455 #ifdef HEAVY_DEBUG
 456       assert(a<=(1<<15));
 457       assert(r<=(1<<15));
 458       assert(g<=(1<<15));
 459       assert(b<=(1<<15));
 460 #endif
 461
 462       /*
 463       // Variant A) rounding
 464       const uint32_t add_r = (1<<15)/2;
 465       const uint32_t add_g = (1<<15)/2;
 466       const uint32_t add_b = (1<<15)/2;
 467       const uint32_t add_a = (1<<15)/2;
 468       */
 469
 470       /*
 471       // Variant B) naive dithering
 472       // This can alter the alpha channel during a load->save cycle.
 473       const uint32_t add_r = rand() % (1<<15);
 474       const uint32_t add_g = rand() % (1<<15);
 475       const uint32_t add_b = rand() % (1<<15);
 476       const uint32_t add_a = rand() % (1<<15);
 477       */
 478
 479       /*
 480       // Variant C) slightly better dithering
 481       // make sure we don't dither rounding errors (those did occur when converting 8bit-->16bit)
 482       // this preserves the alpha channel, but we still add noise to the highly transparent colors
 483       const uint32_t add_r = (rand() % (1<<15)) * 240/256 + (1<<15) * 8/256;
 484       const uint32_t add_g = add_r; // hm... do not produce too much color noise
 485       const uint32_t add_b = add_r;
 486       const uint32_t add_a = (rand() % (1<<15)) * 240/256 + (1<<15) * 8/256;
 487       // TODO: error diffusion might work better than random dithering...
 488       */
 489
 490       // Variant C) but with precalculated noise (much faster)
 491       //
 492       const uint32_t add_r = dithering_noise[noise_idx++];
 493       const uint32_t add_g = add_r; // hm... do not produce too much color noise
 494       const uint32_t add_b = add_r;
 495       const uint32_t add_a = dithering_noise[noise_idx++];
 496
 497 #ifdef HEAVY_DEBUG
 498       assert(add_a < (1<<15));
 499       assert(add_a >= 0);
 500       assert(noise_idx <= dithering_noise_size);
 501 #endif
 502
 503       *dst_p++ = (r * 255 + add_r) / (1<<15);
 504       *dst_p++ = (g * 255 + add_g) / (1<<15);
 505       *dst_p++ = (b * 255 + add_b) / (1<<15);
 506       *dst_p++ = (a * 255 + add_a) / (1<<15);
 507     }
 508     src_p += src_arr->strides[0];
 509     dst_p += dst_arr->strides[0];
 510   }
 511 }
 512
 513 // used after compositing (when displaying, or when saving solid PNG or JPG)
 514 void tile_convert_rgb16_to_rgb8(PyObject * src, PyObject * dst) {
 515   PyArrayObject* src_arr = ((PyArrayObject*)src);
 516   PyArrayObject* dst_arr = ((PyArrayObject*)dst);
 517
 518 #ifdef HEAVY_DEBUG
 519   assert(PyArray_DIM(dst, 0) == TILE_SIZE);
 520   assert(PyArray_DIM(dst, 1) == TILE_SIZE);
 521   assert(PyArray_TYPE(dst) == NPY_UINT8);
 522   assert(PyArray_ISBEHAVED(dst));
 523   assert(PyArray_STRIDE(dst, 1) == PyArray_DIM(dst, 2)*sizeof(uint8_t));
 524   assert(PyArray_STRIDE(dst, 2) == sizeof(uint8_t));
 525
 526   assert(PyArray_DIM(src, 0) == TILE_SIZE);
 527   assert(PyArray_DIM(src, 1) == TILE_SIZE);
 528   assert(PyArray_DIM(src, 2) == 3);
 529   assert(PyArray_TYPE(src) == NPY_UINT16);
 530   assert(PyArray_ISBEHAVED(src));
 531   assert(PyArray_STRIDE(src, 1) == 3*sizeof(uint16_t));
 532   assert(PyArray_STRIDE(src, 2) ==   sizeof(uint16_t));
 533 #endif
 534
 535   precalculate_dithering_noise_if_required();
 536   int noise_idx = 0;
 537
 538   bool dst_has_alpha = PyArray_DIM(dst, 2) == 4;
 539
 540   for (int y=0; y<TILE_SIZE; y++) {
 541     uint16_t * src_p = (uint16_t*)(src_arr->data + y*src_arr->strides[0]);
 542     uint8_t  * dst_p = (uint8_t*)(dst_arr->data + y*dst_arr->strides[0]);
 543     if (dst_has_alpha) {
 544       for (int x=0; x<TILE_SIZE; x++) {
 545         uint32_t r, g, b;
 546         r = *src_p++;
 547         g = *src_p++;
 548         b = *src_p++;
 549 #ifdef HEAVY_DEBUG
 550         assert(r<=(1<<15));
 551         assert(g<=(1<<15));
 552         assert(b<=(1<<15));
 553 #endif
 554
 555         /*
 556         // rounding
 557         const uint32_t add = (1<<15)/2;
 558         */
 559         // dithering
 560         const uint32_t add = dithering_noise[noise_idx++];
 561
 562         *dst_p++ = (r * 255 + add) / (1<<15);
 563         *dst_p++ = (g * 255 + add) / (1<<15);
 564         *dst_p++ = (b * 255 + add) / (1<<15);
 565         *dst_p++ = 255;
 566       }
 567     } else {
 568       for (int x=0; x<TILE_SIZE; x++) {
 569         uint32_t r, g, b;
 570         r = *src_p++;
 571         g = *src_p++;
 572         b = *src_p++;
 573 #ifdef HEAVY_DEBUG
 574         assert(r<=(1<<15));
 575         assert(g<=(1<<15));
 576         assert(b<=(1<<15));
 577 #endif
 578
 579         /*
 580         // rounding
 581         const uint32_t add = (1<<15)/2;
 582         */
 583         // dithering
 584         const uint32_t add = dithering_noise[noise_idx++];
 585
 586         *dst_p++ = (r * 255 + add) / (1<<15);
 587         *dst_p++ = (g * 255 + add) / (1<<15);
 588         *dst_p++ = (b * 255 + add) / (1<<15);
 589       }
 590     }
 591 #ifdef HEAVY_DEBUG
 592     assert(noise_idx <= dithering_noise_size);
 593 #endif
 594     src_p += src_arr->strides[0];
 595     dst_p += dst_arr->strides[0];
 596   }
 597 }
 598
 599 // used mainly for loading layers (transparent PNG)
 600 void tile_convert_rgba8_to_rgba16(PyObject * src, PyObject * dst) {
 601   PyArrayObject* src_arr = ((PyArrayObject*)src);
 602   PyArrayObject* dst_arr = ((PyArrayObject*)dst);
 603
 604 #ifdef HEAVY_DEBUG
 605   assert(PyArray_DIM(dst, 0) == TILE_SIZE);
 606   assert(PyArray_DIM(dst, 1) == TILE_SIZE);
 607   assert(PyArray_DIM(dst, 2) == 4);
 608   assert(PyArray_TYPE(dst) == NPY_UINT16);
 609   assert(PyArray_ISBEHAVED(dst));
 610   assert(dst_arr->strides[1] == 4*sizeof(uint16_t));
 611   assert(dst_arr->strides[2] ==   sizeof(uint16_t));
 612
 613   assert(PyArray_DIM(src, 0) == TILE_SIZE);
 614   assert(PyArray_DIM(src, 1) == TILE_SIZE);
 615   assert(PyArray_DIM(src, 2) == 4);
 616   assert(PyArray_TYPE(src) == NPY_UINT8);
 617   assert(PyArray_ISBEHAVED(src));
 618   assert(src_arr->strides[1] == 4*sizeof(uint8_t));
 619   assert(src_arr->strides[2] ==   sizeof(uint8_t));
 620 #endif
 621
 622   for (int y=0; y<TILE_SIZE; y++) {
 623     uint8_t  * src_p = (uint8_t*)(src_arr->data + y*src_arr->strides[0]);
 624     uint16_t * dst_p = (uint16_t*)(dst_arr->data + y*dst_arr->strides[0]);
 625     for (int x=0; x<TILE_SIZE; x++) {
 626       uint32_t r, g, b, a;
 627       r = *src_p++;
 628       g = *src_p++;
 629       b = *src_p++;
 630       a = *src_p++;
 631
 632       // convert to fixed point (with rounding)
 633       r = (r * (1<<15) + 255/2) / 255;
 634       g = (g * (1<<15) + 255/2) / 255;
 635       b = (b * (1<<15) + 255/2) / 255;
 636       a = (a * (1<<15) + 255/2) / 255;
 637
 638       // premultiply alpha (with rounding), save back
 639       *dst_p++ = (r * a + (1<<15)/2) / (1<<15);
 640       *dst_p++ = (g * a + (1<<15)/2) / (1<<15);
 641       *dst_p++ = (b * a + (1<<15)/2) / (1<<15);
 642       *dst_p++ = a;
 643     }
 644   }
 645 }
 646
 647 // used in strokemap.py
 648 //
 649 // Calculates a 1-bit bitmap of the stroke shape using two snapshots
 650 // of the layer (the layer before and after the stroke).
 651 //
 652 // If the alpha increases a lot, we want the stroke to appear in
 653 // the strokemap, even if the color did not change. If the alpha
 654 // decreases a lot, we want to ignore the stroke (eraser). If
 655 // the alpha decreases just a little, but the color changes a
 656 // lot (eg. heavy smudging or watercolor brushes) we want the
 657 // stroke still to be pickable.
 658 //
 659 // If the layer alpha was (near) zero, we record the stroke even if it
 660 // is barely visible. This gives a bigger target to point-and-select.
 661 //
 662 void tile_perceptual_change_strokemap(PyObject * a, PyObject * b, PyObject * res) {
 663
 664   assert(PyArray_TYPE(a) == NPY_UINT16);
 665   assert(PyArray_TYPE(b) == NPY_UINT16);
 666   assert(PyArray_TYPE(res) == NPY_UINT8);
 667   assert(PyArray_ISCARRAY(a));
 668   assert(PyArray_ISCARRAY(b));
 669   assert(PyArray_ISCARRAY(res));
 670
 671   uint16_t * a_p  = (uint16_t*)PyArray_DATA(a);
 672   uint16_t * b_p  = (uint16_t*)PyArray_DATA(b);
 673   uint8_t * res_p = (uint8_t*)PyArray_DATA(res);
 674
 675   for (int y=0; y<TILE_SIZE; y++) {
 676     for (int x=0; x<TILE_SIZE; x++) {
 677
 678       int32_t color_change = 0;
 679       // We want to compare a.color with b.color, but we only know
 680       // (a.color * a.alpha) and (b.color * b.alpha).  We multiply
 681       // each component with the alpha of the other image, so they are
 682       // scaled the same and can be compared.
 683
 684       for (int i=0; i<3; i++) {
 685         int32_t a_col = (uint32_t)a_p[i] * b_p[3] / (1<<15); // a.color * a.alpha*b.alpha
 686         int32_t b_col = (uint32_t)b_p[i] * a_p[3] / (1<<15); // b.color * a.alpha*b.alpha
 687         color_change += abs(b_col - a_col);
 688       }
 689       // "color_change" is in the range [0, 3*a_a]
 690       // if either old or new alpha is (near) zero, "color_change" is (near) zero
 691
 692       int32_t alpha_old = a_p[3];
 693       int32_t alpha_new = b_p[3];
 694
 695       // Note: the thresholds below are arbitrary choices found to work okay
 696
 697       // We report a color change only if both old and new color are
 698       // well-defined (big enough alpha).
 699       bool is_perceptual_color_change = color_change > MAX(alpha_old, alpha_new)/16;
 700
 701       int32_t alpha_diff = alpha_new - alpha_old; // no abs() here (ignore erasers)
 702       // We check the alpha increase relative to the previous alpha.
 703       bool is_perceptual_alpha_increase = alpha_diff > (1<<15)/4;
 704
 705       // this one is responsible for making fat big ugly easy-to-hit pointer targets
 706       bool is_big_relative_alpha_increase  = alpha_diff > (1<<15)/64 && alpha_diff > alpha_old/2;
 707
 708       if (is_perceptual_alpha_increase || is_big_relative_alpha_increase || is_perceptual_color_change) {
 709         res_p[0] = 1;
 710       } else {
 711         res_p[0] = 0;
 712       }
 713
 714       a_p += 4;
 715       b_p += 4;
 716       res_p += 1;
 717     }
 718   }
 719 }
 720