src/gallium/drivers/llvmpipe/lp_bld_arit.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2009 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28
  29 /**
  30  * @file
  31  * Helper
  32  *
  33  * LLVM IR doesn't support all basic arithmetic operations we care about (most
  34  * notably min/max and saturated operations), and it is often necessary to
  35  * resort machine-specific intrinsics directly. The functions here hide all
  36  * these implementation details from the other modules.
  37  *
  38  * We also do simple expressions simplification here. Reasons are:
  39  * - it is very easy given we have all necessary information readily available
  40  * - LLVM optimization passes fail to simplify several vector expressions
  41  * - We often know value constraints which the optimization passes have no way
  42  *   of knowing, such as when source arguments are known to be in [0, 1] range.
  43  *
  44  * @author Jose Fonseca <jfonseca@vmware.com>
  45  */
  46
  47
  48 #include "util/u_memory.h"
  49 #include "util/u_debug.h"
  50 #include "util/u_string.h"
  51 #include "util/u_cpu_detect.h"
  52
  53 #include "lp_bld_type.h"
  54 #include "lp_bld_const.h"
  55 #include "lp_bld_intr.h"
  56 #include "lp_bld_logic.h"
  57 #include "lp_bld_debug.h"
  58 #include "lp_bld_arit.h"
  59
  60
  61 /**
  62  * Generate min(a, b)
  63  * No checks for special case values of a or b = 1 or 0 are done.
  64  */
  65 static LLVMValueRef
  66 lp_build_min_simple(struct lp_build_context *bld,
  67                     LLVMValueRef a,
  68                     LLVMValueRef b)
  69 {
  70    const struct lp_type type = bld->type;
  71    const char *intrinsic = NULL;
  72    LLVMValueRef cond;
  73
  74    /* TODO: optimize the constant case */
  75
  76    if(type.width * type.length == 128) {
  77       if(type.floating) {
  78          if(type.width == 32 && util_cpu_caps.has_sse)
  79             intrinsic = "llvm.x86.sse.min.ps";
  80          if(type.width == 64 && util_cpu_caps.has_sse2)
  81             intrinsic = "llvm.x86.sse2.min.pd";
  82       }
  83       else {
  84          if(type.width == 8 && !type.sign && util_cpu_caps.has_sse2)
  85             intrinsic = "llvm.x86.sse2.pminu.b";
  86          if(type.width == 8 && type.sign && util_cpu_caps.has_sse4_1)
  87             intrinsic = "llvm.x86.sse41.pminsb";
  88          if(type.width == 16 && !type.sign && util_cpu_caps.has_sse4_1)
  89             intrinsic = "llvm.x86.sse41.pminuw";
  90          if(type.width == 16 && type.sign && util_cpu_caps.has_sse2)
  91             intrinsic = "llvm.x86.sse2.pmins.w";
  92          if(type.width == 32 && !type.sign && util_cpu_caps.has_sse4_1)
  93             intrinsic = "llvm.x86.sse41.pminud";
  94          if(type.width == 32 && type.sign && util_cpu_caps.has_sse4_1)
  95             intrinsic = "llvm.x86.sse41.pminsd";
  96       }
  97    }
  98
  99    if(intrinsic)
 100       return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b);
 101
 102    cond = lp_build_cmp(bld, PIPE_FUNC_LESS, a, b);
 103    return lp_build_select(bld, cond, a, b);
 104 }
 105
 106
 107 /**
 108  * Generate max(a, b)
 109  * No checks for special case values of a or b = 1 or 0 are done.
 110  */
 111 static LLVMValueRef
 112 lp_build_max_simple(struct lp_build_context *bld,
 113                     LLVMValueRef a,
 114                     LLVMValueRef b)
 115 {
 116    const struct lp_type type = bld->type;
 117    const char *intrinsic = NULL;
 118    LLVMValueRef cond;
 119
 120    /* TODO: optimize the constant case */
 121
 122    if(type.width * type.length == 128) {
 123       if(type.floating) {
 124          if(type.width == 32 && util_cpu_caps.has_sse)
 125             intrinsic = "llvm.x86.sse.max.ps";
 126          if(type.width == 64 && util_cpu_caps.has_sse2)
 127             intrinsic = "llvm.x86.sse2.max.pd";
 128       }
 129       else {
 130          if(type.width == 8 && !type.sign && util_cpu_caps.has_sse2)
 131             intrinsic = "llvm.x86.sse2.pmaxu.b";
 132          if(type.width == 8 && type.sign && util_cpu_caps.has_sse4_1)
 133             intrinsic = "llvm.x86.sse41.pmaxsb";
 134          if(type.width == 16 && !type.sign && util_cpu_caps.has_sse4_1)
 135             intrinsic = "llvm.x86.sse41.pmaxuw";
 136          if(type.width == 16 && type.sign && util_cpu_caps.has_sse2)
 137             intrinsic = "llvm.x86.sse2.pmaxs.w";
 138          if(type.width == 32 && !type.sign && util_cpu_caps.has_sse4_1)
 139             intrinsic = "llvm.x86.sse41.pmaxud";
 140          if(type.width == 32 && type.sign && util_cpu_caps.has_sse4_1)
 141             intrinsic = "llvm.x86.sse41.pmaxsd";
 142       }
 143    }
 144
 145    if(intrinsic)
 146       return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b);
 147
 148    cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, b);
 149    return lp_build_select(bld, cond, a, b);
 150 }
 151
 152
 153 /**
 154  * Generate 1 - a, or ~a depending on bld->type.
 155  */
 156 LLVMValueRef
 157 lp_build_comp(struct lp_build_context *bld,
 158               LLVMValueRef a)
 159 {
 160    const struct lp_type type = bld->type;
 161
 162    if(a == bld->one)
 163       return bld->zero;
 164    if(a == bld->zero)
 165       return bld->one;
 166
 167    if(type.norm && !type.floating && !type.fixed && !type.sign) {
 168       if(LLVMIsConstant(a))
 169          return LLVMConstNot(a);
 170       else
 171          return LLVMBuildNot(bld->builder, a, "");
 172    }
 173
 174    if(LLVMIsConstant(a))
 175       return LLVMConstSub(bld->one, a);
 176    else
 177       return LLVMBuildSub(bld->builder, bld->one, a, "");
 178 }
 179
 180
 181 /**
 182  * Generate a + b
 183  */
 184 LLVMValueRef
 185 lp_build_add(struct lp_build_context *bld,
 186              LLVMValueRef a,
 187              LLVMValueRef b)
 188 {
 189    const struct lp_type type = bld->type;
 190    LLVMValueRef res;
 191
 192    if(a == bld->zero)
 193       return b;
 194    if(b == bld->zero)
 195       return a;
 196    if(a == bld->undef || b == bld->undef)
 197       return bld->undef;
 198
 199    if(bld->type.norm) {
 200       const char *intrinsic = NULL;
 201
 202       if(a == bld->one || b == bld->one)
 203         return bld->one;
 204
 205       if(util_cpu_caps.has_sse2 &&
 206          type.width * type.length == 128 &&
 207          !type.floating && !type.fixed) {
 208          if(type.width == 8)
 209             intrinsic = type.sign ? "llvm.x86.sse2.padds.b" : "llvm.x86.sse2.paddus.b";
 210          if(type.width == 16)
 211             intrinsic = type.sign ? "llvm.x86.sse2.padds.w" : "llvm.x86.sse2.paddus.w";
 212       }
 213
 214       if(intrinsic)
 215          return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b);
 216    }
 217
 218    if(LLVMIsConstant(a) && LLVMIsConstant(b))
 219       res = LLVMConstAdd(a, b);
 220    else
 221       res = LLVMBuildAdd(bld->builder, a, b, "");
 222
 223    /* clamp to ceiling of 1.0 */
 224    if(bld->type.norm && (bld->type.floating || bld->type.fixed))
 225       res = lp_build_min_simple(bld, res, bld->one);
 226
 227    /* XXX clamp to floor of -1 or 0??? */
 228
 229    return res;
 230 }
 231
 232
 233 /**
 234  * Generate a - b
 235  */
 236 LLVMValueRef
 237 lp_build_sub(struct lp_build_context *bld,
 238              LLVMValueRef a,
 239              LLVMValueRef b)
 240 {
 241    const struct lp_type type = bld->type;
 242    LLVMValueRef res;
 243
 244    if(b == bld->zero)
 245       return a;
 246    if(a == bld->undef || b == bld->undef)
 247       return bld->undef;
 248    if(a == b)
 249       return bld->zero;
 250
 251    if(bld->type.norm) {
 252       const char *intrinsic = NULL;
 253
 254       if(b == bld->one)
 255         return bld->zero;
 256
 257       if(util_cpu_caps.has_sse2 &&
 258          type.width * type.length == 128 &&
 259          !type.floating && !type.fixed) {
 260          if(type.width == 8)
 261             intrinsic = type.sign ? "llvm.x86.sse2.psubs.b" : "llvm.x86.sse2.psubus.b";
 262          if(type.width == 16)
 263             intrinsic = type.sign ? "llvm.x86.sse2.psubs.w" : "llvm.x86.sse2.psubus.w";
 264       }
 265
 266       if(intrinsic)
 267          return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b);
 268    }
 269
 270    if(LLVMIsConstant(a) && LLVMIsConstant(b))
 271       res = LLVMConstSub(a, b);
 272    else
 273       res = LLVMBuildSub(bld->builder, a, b, "");
 274
 275    if(bld->type.norm && (bld->type.floating || bld->type.fixed))
 276       res = lp_build_max_simple(bld, res, bld->zero);
 277
 278    return res;
 279 }
 280
 281
 282 /**
 283  * Build shuffle vectors that match PUNPCKLxx and PUNPCKHxx instructions.
 284  */
 285 static LLVMValueRef
 286 lp_build_unpack_shuffle(unsigned n, unsigned lo_hi)
 287 {
 288    LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
 289    unsigned i, j;
 290
 291    assert(n <= LP_MAX_VECTOR_LENGTH);
 292    assert(lo_hi < 2);
 293
 294    for(i = 0, j = lo_hi*n/2; i < n; i += 2, ++j) {
 295       elems[i + 0] = LLVMConstInt(LLVMInt32Type(), 0 + j, 0);
 296       elems[i + 1] = LLVMConstInt(LLVMInt32Type(), n + j, 0);
 297    }
 298
 299    return LLVMConstVector(elems, n);
 300 }
 301
 302
 303 /**
 304  * Build constant int vector of width 'n' and value 'c'.
 305  */
 306 static LLVMValueRef
 307 lp_build_const_vec(LLVMTypeRef type, unsigned n, long long c)
 308 {
 309    LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
 310    unsigned i;
 311
 312    assert(n <= LP_MAX_VECTOR_LENGTH);
 313
 314    for(i = 0; i < n; ++i)
 315       elems[i] = LLVMConstInt(type, c, 0);
 316
 317    return LLVMConstVector(elems, n);
 318 }
 319
 320
 321 /**
 322  * Normalized 8bit multiplication.
 323  *
 324  * - alpha plus one
 325  *
 326  *     makes the following approximation to the division (Sree)
 327  *
 328  *       a*b/255 ~= (a*(b + 1)) >> 256
 329  *
 330  *     which is the fastest method that satisfies the following OpenGL criteria
 331  *
 332  *       0*0 = 0 and 255*255 = 255
 333  *
 334  * - geometric series
 335  *
 336  *     takes the geometric series approximation to the division
 337  *
 338  *       t/255 = (t >> 8) + (t >> 16) + (t >> 24) ..
 339  *
 340  *     in this case just the first two terms to fit in 16bit arithmetic
 341  *
 342  *       t/255 ~= (t + (t >> 8)) >> 8
 343  *
 344  *     note that just by itself it doesn't satisfies the OpenGL criteria, as
 345  *     255*255 = 254, so the special case b = 255 must be accounted or roundoff
 346  *     must be used
 347  *
 348  * - geometric series plus rounding
 349  *
 350  *     when using a geometric series division instead of truncating the result
 351  *     use roundoff in the approximation (Jim Blinn)
 352  *
 353  *       t/255 ~= (t + (t >> 8) + 0x80) >> 8
 354  *
 355  *     achieving the exact results
 356  *
 357  * @sa Alvy Ray Smith, Image Compositing Fundamentals, Tech Memo 4, Aug 15, 1995,
 358  *     ftp://ftp.alvyray.com/Acrobat/4_Comp.pdf
 359  * @sa Michael Herf, The "double blend trick", May 2000,
 360  *     http://www.stereopsis.com/doubleblend.html
 361  */
 362 static LLVMValueRef
 363 lp_build_mul_u8n(LLVMBuilderRef builder,
 364                  LLVMValueRef a, LLVMValueRef b)
 365 {
 366    static LLVMValueRef c01 = NULL;
 367    static LLVMValueRef c08 = NULL;
 368    static LLVMValueRef c80 = NULL;
 369    LLVMValueRef ab;
 370
 371    if(!c01) c01 = lp_build_const_vec(LLVMInt16Type(), 8, 0x01);
 372    if(!c08) c08 = lp_build_const_vec(LLVMInt16Type(), 8, 0x08);
 373    if(!c80) c80 = lp_build_const_vec(LLVMInt16Type(), 8, 0x80);
 374
 375 #if 0
 376
 377    /* a*b/255 ~= (a*(b + 1)) >> 256 */
 378    b = LLVMBuildAdd(builder, b, c01, "");
 379    ab = LLVMBuildMul(builder, a, b, "");
 380
 381 #else
 382
 383    /* t/255 ~= (t + (t >> 8) + 0x80) >> 8 */
 384    ab = LLVMBuildMul(builder, a, b, "");
 385    ab = LLVMBuildAdd(builder, ab, LLVMBuildLShr(builder, ab, c08, ""), "");
 386    ab = LLVMBuildAdd(builder, ab, c80, "");
 387
 388 #endif
 389
 390    ab = LLVMBuildLShr(builder, ab, c08, "");
 391
 392    return ab;
 393 }
 394
 395
 396 /**
 397  * Generate a * b
 398  */
 399 LLVMValueRef
 400 lp_build_mul(struct lp_build_context *bld,
 401              LLVMValueRef a,
 402              LLVMValueRef b)
 403 {
 404    const struct lp_type type = bld->type;
 405
 406    if(a == bld->zero)
 407       return bld->zero;
 408    if(a == bld->one)
 409       return b;
 410    if(b == bld->zero)
 411       return bld->zero;
 412    if(b == bld->one)
 413       return a;
 414    if(a == bld->undef || b == bld->undef)
 415       return bld->undef;
 416
 417    if(!type.floating && !type.fixed && type.norm) {
 418       if(util_cpu_caps.has_sse2 && type.width == 8 && type.length == 16) {
 419          LLVMTypeRef i16x8 = LLVMVectorType(LLVMInt16Type(), 8);
 420          LLVMTypeRef i8x16 = LLVMVectorType(LLVMInt8Type(), 16);
 421          static LLVMValueRef ml = NULL;
 422          static LLVMValueRef mh = NULL;
 423          LLVMValueRef al, ah, bl, bh;
 424          LLVMValueRef abl, abh;
 425          LLVMValueRef ab;
 426
 427          if(!ml) ml = lp_build_unpack_shuffle(16, 0);
 428          if(!mh) mh = lp_build_unpack_shuffle(16, 1);
 429
 430          /*  PUNPCKLBW, PUNPCKHBW */
 431          al = LLVMBuildShuffleVector(bld->builder, a, bld->zero, ml, "");
 432          bl = LLVMBuildShuffleVector(bld->builder, b, bld->zero, ml, "");
 433          ah = LLVMBuildShuffleVector(bld->builder, a, bld->zero, mh, "");
 434          bh = LLVMBuildShuffleVector(bld->builder, b, bld->zero, mh, "");
 435
 436          /* NOP */
 437          al = LLVMBuildBitCast(bld->builder, al, i16x8, "");
 438          bl = LLVMBuildBitCast(bld->builder, bl, i16x8, "");
 439          ah = LLVMBuildBitCast(bld->builder, ah, i16x8, "");
 440          bh = LLVMBuildBitCast(bld->builder, bh, i16x8, "");
 441
 442          /* PMULLW, PSRLW, PADDW */
 443          abl = lp_build_mul_u8n(bld->builder, al, bl);
 444          abh = lp_build_mul_u8n(bld->builder, ah, bh);
 445
 446          /* PACKUSWB */
 447          ab = lp_build_intrinsic_binary(bld->builder, "llvm.x86.sse2.packuswb.128" , i16x8, abl, abh);
 448
 449          /* NOP */
 450          ab = LLVMBuildBitCast(bld->builder, ab, i8x16, "");
 451
 452          return ab;
 453       }
 454
 455       /* FIXME */
 456       assert(0);
 457    }
 458
 459    if(LLVMIsConstant(a) && LLVMIsConstant(b))
 460       return LLVMConstMul(a, b);
 461
 462    return LLVMBuildMul(bld->builder, a, b, "");
 463 }
 464
 465
 466 /**
 467  * Generate a / b
 468  */
 469 LLVMValueRef
 470 lp_build_div(struct lp_build_context *bld,
 471              LLVMValueRef a,
 472              LLVMValueRef b)
 473 {
 474    const struct lp_type type = bld->type;
 475
 476    if(a == bld->zero)
 477       return bld->zero;
 478    if(a == bld->one)
 479       return lp_build_rcp(bld, b);
 480    if(b == bld->zero)
 481       return bld->undef;
 482    if(b == bld->one)
 483       return a;
 484    if(a == bld->undef || b == bld->undef)
 485       return bld->undef;
 486
 487    if(LLVMIsConstant(a) && LLVMIsConstant(b))
 488       return LLVMConstFDiv(a, b);
 489
 490    if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4)
 491       return lp_build_mul(bld, a, lp_build_rcp(bld, b));
 492
 493    return LLVMBuildFDiv(bld->builder, a, b, "");
 494 }
 495
 496
 497 LLVMValueRef
 498 lp_build_lerp(struct lp_build_context *bld,
 499               LLVMValueRef x,
 500               LLVMValueRef v0,
 501               LLVMValueRef v1)
 502 {
 503    return lp_build_add(bld, v0, lp_build_mul(bld, x, lp_build_sub(bld, v1, v0)));
 504 }
 505
 506
 507 LLVMValueRef
 508 lp_build_lerp_2d(struct lp_build_context *bld,
 509                  LLVMValueRef x,
 510                  LLVMValueRef y,
 511                  LLVMValueRef v00,
 512                  LLVMValueRef v01,
 513                  LLVMValueRef v10,
 514                  LLVMValueRef v11)
 515 {
 516    LLVMValueRef v0 = lp_build_lerp(bld, x, v00, v01);
 517    LLVMValueRef v1 = lp_build_lerp(bld, x, v10, v11);
 518    return lp_build_lerp(bld, y, v0, v1);
 519 }
 520
 521
 522 /**
 523  * Generate min(a, b)
 524  * Do checks for special cases.
 525  */
 526 LLVMValueRef
 527 lp_build_min(struct lp_build_context *bld,
 528              LLVMValueRef a,
 529              LLVMValueRef b)
 530 {
 531    if(a == bld->undef || b == bld->undef)
 532       return bld->undef;
 533
 534    if(a == b)
 535       return a;
 536
 537    if(bld->type.norm) {
 538       if(a == bld->zero || b == bld->zero)
 539          return bld->zero;
 540       if(a == bld->one)
 541          return b;
 542       if(b == bld->one)
 543          return a;
 544    }
 545
 546    return lp_build_min_simple(bld, a, b);
 547 }
 548
 549
 550 /**
 551  * Generate max(a, b)
 552  * Do checks for special cases.
 553  */
 554 LLVMValueRef
 555 lp_build_max(struct lp_build_context *bld,
 556              LLVMValueRef a,
 557              LLVMValueRef b)
 558 {
 559    if(a == bld->undef || b == bld->undef)
 560       return bld->undef;
 561
 562    if(a == b)
 563       return a;
 564
 565    if(bld->type.norm) {
 566       if(a == bld->one || b == bld->one)
 567          return bld->one;
 568       if(a == bld->zero)
 569          return b;
 570       if(b == bld->zero)
 571          return a;
 572    }
 573
 574    return lp_build_max_simple(bld, a, b);
 575 }
 576
 577
 578 /**
 579  * Generate abs(a)
 580  */
 581 LLVMValueRef
 582 lp_build_abs(struct lp_build_context *bld,
 583              LLVMValueRef a)
 584 {
 585    const struct lp_type type = bld->type;
 586    LLVMTypeRef vec_type = lp_build_vec_type(type);
 587
 588    if(!type.sign)
 589       return a;
 590
 591    if(type.floating) {
 592       /* Mask out the sign bit */
 593       LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
 594       LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long)1 << type.width) - 1);
 595       a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
 596       a = LLVMBuildAnd(bld->builder, a, mask, "");
 597       a = LLVMBuildBitCast(bld->builder, a, vec_type, "");
 598       return a;
 599    }
 600
 601    if(type.width*type.length == 128 && util_cpu_caps.has_ssse3) {
 602       switch(type.width) {
 603       case 8:
 604          return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.b.128", vec_type, a);
 605       case 16:
 606          return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.w.128", vec_type, a);
 607       case 32:
 608          return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.d.128", vec_type, a);
 609       }
 610    }
 611
 612    return lp_build_max(bld, a, LLVMBuildNeg(bld->builder, a, ""));
 613 }
 614
 615
 616 LLVMValueRef
 617 lp_build_sgn(struct lp_build_context *bld,
 618              LLVMValueRef a)
 619 {
 620    const struct lp_type type = bld->type;
 621    LLVMTypeRef vec_type = lp_build_vec_type(type);
 622    LLVMValueRef cond;
 623    LLVMValueRef res;
 624
 625    /* Handle non-zero case */
 626    if(!type.sign) {
 627       /* if not zero then sign must be positive */
 628       res = bld->one;
 629    }
 630    else if(type.floating) {
 631       /* Take the sign bit and add it to 1 constant */
 632       LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
 633       LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1));
 634       LLVMValueRef sign;
 635       LLVMValueRef one;
 636       sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
 637       sign = LLVMBuildAnd(bld->builder, sign, mask, "");
 638       one = LLVMConstBitCast(bld->one, int_vec_type);
 639       res = LLVMBuildOr(bld->builder, sign, one, "");
 640       res = LLVMBuildBitCast(bld->builder, res, vec_type, "");
 641    }
 642    else
 643    {
 644       LLVMValueRef minus_one = lp_build_const_scalar(type, -1.0);
 645       cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, bld->zero);
 646       res = lp_build_select(bld, cond, bld->one, minus_one);
 647    }
 648
 649    /* Handle zero */
 650    cond = lp_build_cmp(bld, PIPE_FUNC_EQUAL, a, bld->zero);
 651    res = lp_build_select(bld, cond, bld->zero, bld->one);
 652
 653    return res;
 654 }
 655
 656
 657 enum lp_build_round_sse41_mode
 658 {
 659    LP_BUILD_ROUND_SSE41_NEAREST = 0,
 660    LP_BUILD_ROUND_SSE41_FLOOR = 1,
 661    LP_BUILD_ROUND_SSE41_CEIL = 2,
 662    LP_BUILD_ROUND_SSE41_TRUNCATE = 3
 663 };
 664
 665
 666 static INLINE LLVMValueRef
 667 lp_build_round_sse41(struct lp_build_context *bld,
 668                      LLVMValueRef a,
 669                      enum lp_build_round_sse41_mode mode)
 670 {
 671    const struct lp_type type = bld->type;
 672    LLVMTypeRef vec_type = lp_build_vec_type(type);
 673    const char *intrinsic;
 674
 675    assert(type.floating);
 676    assert(type.width*type.length == 128);
 677    assert(lp_check_value(type, a));
 678    assert(util_cpu_caps.has_sse4_1);
 679
 680    switch(type.width) {
 681    case 32:
 682       intrinsic = "llvm.x86.sse41.round.ps";
 683       break;
 684    case 64:
 685       intrinsic = "llvm.x86.sse41.round.pd";
 686       break;
 687    default:
 688       assert(0);
 689       return bld->undef;
 690    }
 691
 692    return lp_build_intrinsic_binary(bld->builder, intrinsic, vec_type, a,
 693                                     LLVMConstInt(LLVMInt32Type(), mode, 0));
 694 }
 695
 696
 697 LLVMValueRef
 698 lp_build_trunc(struct lp_build_context *bld,
 699                LLVMValueRef a)
 700 {
 701    const struct lp_type type = bld->type;
 702
 703    assert(type.floating);
 704    assert(lp_check_value(type, a));
 705
 706    if(util_cpu_caps.has_sse4_1)
 707       return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_TRUNCATE);
 708    else {
 709       LLVMTypeRef vec_type = lp_build_vec_type(type);
 710       LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
 711       LLVMValueRef res;
 712       res = LLVMBuildFPToSI(bld->builder, a, int_vec_type, "");
 713       res = LLVMBuildSIToFP(bld->builder, res, vec_type, "");
 714       return res;
 715    }
 716 }
 717
 718
 719 LLVMValueRef
 720 lp_build_round(struct lp_build_context *bld,
 721                LLVMValueRef a)
 722 {
 723    const struct lp_type type = bld->type;
 724
 725    assert(type.floating);
 726    assert(lp_check_value(type, a));
 727
 728    if(util_cpu_caps.has_sse4_1)
 729       return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST);
 730    else {
 731       LLVMTypeRef vec_type = lp_build_vec_type(type);
 732       LLVMValueRef res;
 733       res = lp_build_iround(bld, a);
 734       res = LLVMBuildSIToFP(bld->builder, res, vec_type, "");
 735       return res;
 736    }
 737 }
 738
 739
 740 LLVMValueRef
 741 lp_build_floor(struct lp_build_context *bld,
 742                LLVMValueRef a)
 743 {
 744    const struct lp_type type = bld->type;
 745
 746    assert(type.floating);
 747
 748    if(util_cpu_caps.has_sse4_1)
 749       return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR);
 750    else {
 751       LLVMTypeRef vec_type = lp_build_vec_type(type);
 752       LLVMValueRef res;
 753       res = lp_build_ifloor(bld, a);
 754       res = LLVMBuildSIToFP(bld->builder, res, vec_type, "");
 755       return res;
 756    }
 757 }
 758
 759
 760 LLVMValueRef
 761 lp_build_ceil(struct lp_build_context *bld,
 762               LLVMValueRef a)
 763 {
 764    const struct lp_type type = bld->type;
 765
 766    assert(type.floating);
 767    assert(lp_check_value(type, a));
 768
 769    if(util_cpu_caps.has_sse4_1)
 770       return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL);
 771    else {
 772       LLVMTypeRef vec_type = lp_build_vec_type(type);
 773       LLVMValueRef res;
 774       res = lp_build_iceil(bld, a);
 775       res = LLVMBuildSIToFP(bld->builder, res, vec_type, "");
 776       return res;
 777    }
 778 }
 779
 780
 781 /**
 782  * Convert to integer, through whichever rounding method that's fastest,
 783  * typically truncating to zero.
 784  */
 785 LLVMValueRef
 786 lp_build_itrunc(struct lp_build_context *bld,
 787                 LLVMValueRef a)
 788 {
 789    const struct lp_type type = bld->type;
 790    LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
 791
 792    assert(type.floating);
 793    assert(lp_check_value(type, a));
 794
 795    return LLVMBuildFPToSI(bld->builder, a, int_vec_type, "");
 796 }
 797
 798
 799 LLVMValueRef
 800 lp_build_iround(struct lp_build_context *bld,
 801                 LLVMValueRef a)
 802 {
 803    const struct lp_type type = bld->type;
 804    LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
 805    LLVMValueRef res;
 806
 807    assert(type.floating);
 808    assert(lp_check_value(type, a));
 809
 810    if(util_cpu_caps.has_sse4_1) {
 811       res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST);
 812    }
 813    else {
 814       LLVMTypeRef vec_type = lp_build_vec_type(type);
 815       LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1));
 816       LLVMValueRef sign;
 817       LLVMValueRef half;
 818
 819       /* get sign bit */
 820       sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
 821       sign = LLVMBuildAnd(bld->builder, sign, mask, "");
 822
 823       /* sign * 0.5 */
 824       half = lp_build_const_scalar(type, 0.5);
 825       half = LLVMBuildBitCast(bld->builder, half, int_vec_type, "");
 826       half = LLVMBuildOr(bld->builder, sign, half, "");
 827       half = LLVMBuildBitCast(bld->builder, half, vec_type, "");
 828
 829       res = LLVMBuildAdd(bld->builder, a, half, "");
 830    }
 831
 832    res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, "");
 833
 834    return res;
 835 }
 836
 837
 838 LLVMValueRef
 839 lp_build_ifloor(struct lp_build_context *bld,
 840                 LLVMValueRef a)
 841 {
 842    const struct lp_type type = bld->type;
 843    LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
 844    LLVMValueRef res;
 845
 846    assert(type.floating);
 847    assert(lp_check_value(type, a));
 848
 849    if(util_cpu_caps.has_sse4_1) {
 850       res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR);
 851    }
 852    else {
 853       /* Take the sign bit and add it to 1 constant */
 854       LLVMTypeRef vec_type = lp_build_vec_type(type);
 855       unsigned mantissa = lp_mantissa(type);
 856       LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1));
 857       LLVMValueRef sign;
 858       LLVMValueRef offset;
 859
 860       /* sign = a < 0 ? ~0 : 0 */
 861       sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
 862       sign = LLVMBuildAnd(bld->builder, sign, mask, "");
 863       sign = LLVMBuildAShr(bld->builder, sign, lp_build_int_const_scalar(type, type.width - 1), "");
 864
 865       /* offset = -0.99999(9)f */
 866       offset = lp_build_const_scalar(type, -(double)(((unsigned long long)1 << mantissa) - 1)/((unsigned long long)1 << mantissa));
 867       offset = LLVMConstBitCast(offset, int_vec_type);
 868
 869       /* offset = a < 0 ? -0.99999(9)f : 0.0f */
 870       offset = LLVMBuildAnd(bld->builder, offset, sign, "");
 871       offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "");
 872
 873       res = LLVMBuildAdd(bld->builder, a, offset, "");
 874    }
 875
 876    res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, "");
 877
 878    return res;
 879 }
 880
 881
 882 LLVMValueRef
 883 lp_build_iceil(struct lp_build_context *bld,
 884                LLVMValueRef a)
 885 {
 886    const struct lp_type type = bld->type;
 887    LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
 888    LLVMValueRef res;
 889
 890    assert(type.floating);
 891    assert(lp_check_value(type, a));
 892
 893    if(util_cpu_caps.has_sse4_1) {
 894       res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL);
 895    }
 896    else {
 897       assert(0);
 898       res = bld->undef;
 899    }
 900
 901    res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, "");
 902
 903    return res;
 904 }
 905
 906
 907 LLVMValueRef
 908 lp_build_sqrt(struct lp_build_context *bld,
 909               LLVMValueRef a)
 910 {
 911    const struct lp_type type = bld->type;
 912    LLVMTypeRef vec_type = lp_build_vec_type(type);
 913    char intrinsic[32];
 914
 915    /* TODO: optimize the constant case */
 916    /* TODO: optimize the constant case */
 917
 918    assert(type.floating);
 919    util_snprintf(intrinsic, sizeof intrinsic, "llvm.sqrt.v%uf%u", type.length, type.width);
 920
 921    return lp_build_intrinsic_unary(bld->builder, intrinsic, vec_type, a);
 922 }
 923
 924
 925 LLVMValueRef
 926 lp_build_rcp(struct lp_build_context *bld,
 927              LLVMValueRef a)
 928 {
 929    const struct lp_type type = bld->type;
 930
 931    if(a == bld->zero)
 932       return bld->undef;
 933    if(a == bld->one)
 934       return bld->one;
 935    if(a == bld->undef)
 936       return bld->undef;
 937
 938    assert(type.floating);
 939
 940    if(LLVMIsConstant(a))
 941       return LLVMConstFDiv(bld->one, a);
 942
 943    if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4)
 944       /* FIXME: improve precision */
 945       return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", lp_build_vec_type(type), a);
 946
 947    return LLVMBuildFDiv(bld->builder, bld->one, a, "");
 948 }
 949
 950
 951 /**
 952  * Generate 1/sqrt(a)
 953  */
 954 LLVMValueRef
 955 lp_build_rsqrt(struct lp_build_context *bld,
 956                LLVMValueRef a)
 957 {
 958    const struct lp_type type = bld->type;
 959
 960    assert(type.floating);
 961
 962    if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4)
 963       return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rsqrt.ps", lp_build_vec_type(type), a);
 964
 965    return lp_build_rcp(bld, lp_build_sqrt(bld, a));
 966 }
 967
 968
 969 /**
 970  * Generate cos(a)
 971  */
 972 LLVMValueRef
 973 lp_build_cos(struct lp_build_context *bld,
 974               LLVMValueRef a)
 975 {
 976    const struct lp_type type = bld->type;
 977    LLVMTypeRef vec_type = lp_build_vec_type(type);
 978    char intrinsic[32];
 979
 980    /* TODO: optimize the constant case */
 981
 982    assert(type.floating);
 983    util_snprintf(intrinsic, sizeof intrinsic, "llvm.cos.v%uf%u", type.length, type.width);
 984
 985    return lp_build_intrinsic_unary(bld->builder, intrinsic, vec_type, a);
 986 }
 987
 988
 989 /**
 990  * Generate sin(a)
 991  */
 992 LLVMValueRef
 993 lp_build_sin(struct lp_build_context *bld,
 994               LLVMValueRef a)
 995 {
 996    const struct lp_type type = bld->type;
 997    LLVMTypeRef vec_type = lp_build_vec_type(type);
 998    char intrinsic[32];
 999
1000    /* TODO: optimize the constant case */
1001
1002    assert(type.floating);
1003    util_snprintf(intrinsic, sizeof intrinsic, "llvm.sin.v%uf%u", type.length, type.width);
1004
1005    return lp_build_intrinsic_unary(bld->builder, intrinsic, vec_type, a);
1006 }
1007
1008
1009 /**
1010  * Generate pow(x, y)
1011  */
1012 LLVMValueRef
1013 lp_build_pow(struct lp_build_context *bld,
1014              LLVMValueRef x,
1015              LLVMValueRef y)
1016 {
1017    /* TODO: optimize the constant case */
1018    if(LLVMIsConstant(x) && LLVMIsConstant(y))
1019       debug_printf("%s: inefficient/imprecise constant arithmetic\n",
1020                    __FUNCTION__);
1021
1022    return lp_build_exp2(bld, lp_build_mul(bld, lp_build_log2(bld, x), y));
1023 }
1024
1025
1026 /**
1027  * Generate exp(x)
1028  */
1029 LLVMValueRef
1030 lp_build_exp(struct lp_build_context *bld,
1031              LLVMValueRef x)
1032 {
1033    /* log2(e) = 1/log(2) */
1034    LLVMValueRef log2e = lp_build_const_scalar(bld->type, 1.4426950408889634);
1035
1036    return lp_build_mul(bld, log2e, lp_build_exp2(bld, x));
1037 }
1038
1039
1040 /**
1041  * Generate log(x)
1042  */
1043 LLVMValueRef
1044 lp_build_log(struct lp_build_context *bld,
1045              LLVMValueRef x)
1046 {
1047    /* log(2) */
1048    LLVMValueRef log2 = lp_build_const_scalar(bld->type, 1.4426950408889634);
1049
1050    return lp_build_mul(bld, log2, lp_build_exp2(bld, x));
1051 }
1052
1053
1054 #define EXP_POLY_DEGREE 3
1055 #define LOG_POLY_DEGREE 5
1056
1057
1058 /**
1059  * Generate polynomial.
1060  * Ex:  x^2 * coeffs[0] + x * coeffs[1] + coeffs[2].
1061  */
1062 static LLVMValueRef
1063 lp_build_polynomial(struct lp_build_context *bld,
1064                     LLVMValueRef x,
1065                     const double *coeffs,
1066                     unsigned num_coeffs)
1067 {
1068    const struct lp_type type = bld->type;
1069    LLVMValueRef res = NULL;
1070    unsigned i;
1071
1072    /* TODO: optimize the constant case */
1073    if(LLVMIsConstant(x))
1074       debug_printf("%s: inefficient/imprecise constant arithmetic\n",
1075                    __FUNCTION__);
1076
1077    for (i = num_coeffs; i--; ) {
1078       LLVMValueRef coeff = lp_build_const_scalar(type, coeffs[i]);
1079       if(res)
1080          res = lp_build_add(bld, coeff, lp_build_mul(bld, x, res));
1081       else
1082          res = coeff;
1083    }
1084
1085    if(res)
1086       return res;
1087    else
1088       return bld->undef;
1089 }
1090
1091
1092 /**
1093  * Minimax polynomial fit of 2**x, in range [-0.5, 0.5[
1094  */
1095 const double lp_build_exp2_polynomial[] = {
1096 #if EXP_POLY_DEGREE == 5
1097    9.9999994e-1, 6.9315308e-1, 2.4015361e-1, 5.5826318e-2, 8.9893397e-3, 1.8775767e-3
1098 #elif EXP_POLY_DEGREE == 4
1099    1.0000026, 6.9300383e-1, 2.4144275e-1, 5.2011464e-2, 1.3534167e-2
1100 #elif EXP_POLY_DEGREE == 3
1101    9.9992520e-1, 6.9583356e-1, 2.2606716e-1, 7.8024521e-2
1102 #elif EXP_POLY_DEGREE == 2
1103    1.0017247, 6.5763628e-1, 3.3718944e-1
1104 #else
1105 #error
1106 #endif
1107 };
1108
1109
1110 void
1111 lp_build_exp2_approx(struct lp_build_context *bld,
1112                      LLVMValueRef x,
1113                      LLVMValueRef *p_exp2_int_part,
1114                      LLVMValueRef *p_frac_part,
1115                      LLVMValueRef *p_exp2)
1116 {
1117    const struct lp_type type = bld->type;
1118    LLVMTypeRef vec_type = lp_build_vec_type(type);
1119    LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
1120    LLVMValueRef ipart = NULL;
1121    LLVMValueRef fpart = NULL;
1122    LLVMValueRef expipart = NULL;
1123    LLVMValueRef expfpart = NULL;
1124    LLVMValueRef res = NULL;
1125
1126    if(p_exp2_int_part || p_frac_part || p_exp2) {
1127       /* TODO: optimize the constant case */
1128       if(LLVMIsConstant(x))
1129          debug_printf("%s: inefficient/imprecise constant arithmetic\n",
1130                       __FUNCTION__);
1131
1132       assert(type.floating && type.width == 32);
1133
1134       x = lp_build_min(bld, x, lp_build_const_scalar(type,  129.0));
1135       x = lp_build_max(bld, x, lp_build_const_scalar(type, -126.99999));
1136
1137       /* ipart = int(x - 0.5) */
1138       ipart = LLVMBuildSub(bld->builder, x, lp_build_const_scalar(type, 0.5f), "");
1139       ipart = LLVMBuildFPToSI(bld->builder, ipart, int_vec_type, "");
1140
1141       /* fpart = x - ipart */
1142       fpart = LLVMBuildSIToFP(bld->builder, ipart, vec_type, "");
1143       fpart = LLVMBuildSub(bld->builder, x, fpart, "");
1144    }
1145
1146    if(p_exp2_int_part || p_exp2) {
1147       /* expipart = (float) (1 << ipart) */
1148       expipart = LLVMBuildAdd(bld->builder, ipart, lp_build_int_const_scalar(type, 127), "");
1149       expipart = LLVMBuildShl(bld->builder, expipart, lp_build_int_const_scalar(type, 23), "");
1150       expipart = LLVMBuildBitCast(bld->builder, expipart, vec_type, "");
1151    }
1152
1153    if(p_exp2) {
1154       expfpart = lp_build_polynomial(bld, fpart, lp_build_exp2_polynomial,
1155                                      Elements(lp_build_exp2_polynomial));
1156
1157       res = LLVMBuildMul(bld->builder, expipart, expfpart, "");
1158    }
1159
1160    if(p_exp2_int_part)
1161       *p_exp2_int_part = expipart;
1162
1163    if(p_frac_part)
1164       *p_frac_part = fpart;
1165
1166    if(p_exp2)
1167       *p_exp2 = res;
1168 }
1169
1170
1171 LLVMValueRef
1172 lp_build_exp2(struct lp_build_context *bld,
1173               LLVMValueRef x)
1174 {
1175    LLVMValueRef res;
1176    lp_build_exp2_approx(bld, x, NULL, NULL, &res);
1177    return res;
1178 }
1179
1180
1181 /**
1182  * Minimax polynomial fit of log2(x)/(x - 1), for x in range [1, 2[
1183  * These coefficients can be generate with
1184  * http://www.boost.org/doc/libs/1_36_0/libs/math/doc/sf_and_dist/html/math_toolkit/toolkit/internals2/minimax.html
1185  */
1186 const double lp_build_log2_polynomial[] = {
1187 #if LOG_POLY_DEGREE == 6
1188    3.11578814719469302614, -3.32419399085241980044, 2.59883907202499966007, -1.23152682416275988241, 0.318212422185251071475, -0.0344359067839062357313
1189 #elif LOG_POLY_DEGREE == 5
1190    2.8882704548164776201, -2.52074962577807006663, 1.48116647521213171641, -0.465725644288844778798, 0.0596515482674574969533
1191 #elif LOG_POLY_DEGREE == 4
1192    2.61761038894603480148, -1.75647175389045657003, 0.688243882994381274313, -0.107254423828329604454
1193 #elif LOG_POLY_DEGREE == 3
1194    2.28330284476918490682, -1.04913055217340124191, 0.204446009836232697516
1195 #else
1196 #error
1197 #endif
1198 };
1199
1200
1201 /**
1202  * See http://www.devmaster.net/forums/showthread.php?p=43580
1203  */
1204 void
1205 lp_build_log2_approx(struct lp_build_context *bld,
1206                      LLVMValueRef x,
1207                      LLVMValueRef *p_exp,
1208                      LLVMValueRef *p_floor_log2,
1209                      LLVMValueRef *p_log2)
1210 {
1211    const struct lp_type type = bld->type;
1212    LLVMTypeRef vec_type = lp_build_vec_type(type);
1213    LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
1214
1215    LLVMValueRef expmask = lp_build_int_const_scalar(type, 0x7f800000);
1216    LLVMValueRef mantmask = lp_build_int_const_scalar(type, 0x007fffff);
1217    LLVMValueRef one = LLVMConstBitCast(bld->one, int_vec_type);
1218
1219    LLVMValueRef i = NULL;
1220    LLVMValueRef exp = NULL;
1221    LLVMValueRef mant = NULL;
1222    LLVMValueRef logexp = NULL;
1223    LLVMValueRef logmant = NULL;
1224    LLVMValueRef res = NULL;
1225
1226    if(p_exp || p_floor_log2 || p_log2) {
1227       /* TODO: optimize the constant case */
1228       if(LLVMIsConstant(x))
1229          debug_printf("%s: inefficient/imprecise constant arithmetic\n",
1230                       __FUNCTION__);
1231
1232       assert(type.floating && type.width == 32);
1233
1234       i = LLVMBuildBitCast(bld->builder, x, int_vec_type, "");
1235
1236       /* exp = (float) exponent(x) */
1237       exp = LLVMBuildAnd(bld->builder, i, expmask, "");
1238    }
1239
1240    if(p_floor_log2 || p_log2) {
1241       logexp = LLVMBuildLShr(bld->builder, exp, lp_build_int_const_scalar(type, 23), "");
1242       logexp = LLVMBuildSub(bld->builder, logexp, lp_build_int_const_scalar(type, 127), "");
1243       logexp = LLVMBuildSIToFP(bld->builder, logexp, vec_type, "");
1244    }
1245
1246    if(p_log2) {
1247       /* mant = (float) mantissa(x) */
1248       mant = LLVMBuildAnd(bld->builder, i, mantmask, "");
1249       mant = LLVMBuildOr(bld->builder, mant, one, "");
1250       mant = LLVMBuildSIToFP(bld->builder, mant, vec_type, "");
1251
1252       logmant = lp_build_polynomial(bld, mant, lp_build_log2_polynomial,
1253                                     Elements(lp_build_log2_polynomial));
1254
1255       /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/
1256       logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildMul(bld->builder, mant, bld->one, ""), "");
1257
1258       res = LLVMBuildAdd(bld->builder, logmant, logexp, "");
1259    }
1260
1261    if(p_exp)
1262       *p_exp = exp;
1263
1264    if(p_floor_log2)
1265       *p_floor_log2 = logexp;
1266
1267    if(p_log2)
1268       *p_log2 = res;
1269 }
1270
1271
1272 LLVMValueRef
1273 lp_build_log2(struct lp_build_context *bld,
1274               LLVMValueRef x)
1275 {
1276    LLVMValueRef res;
1277    lp_build_log2_approx(bld, x, NULL, NULL, &res);
1278    return res;
1279 }