src/Reactor/SubzeroReactor.cpp

   1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
   2 //
   3 // Licensed under the Apache License, Version 2.0 (the "License");
   4 // you may not use this file except in compliance with the License.
   5 // You may obtain a copy of the License at
   6 //
   7 //    http://www.apache.org/licenses/LICENSE-2.0
   8 //
   9 // Unless required by applicable law or agreed to in writing, software
  10 // distributed under the License is distributed on an "AS IS" BASIS,
  11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 // See the License for the specific language governing permissions and
  13 // limitations under the License.
  14
  15 #include "Nucleus.hpp"
  16
  17 #include "Reactor.hpp"
  18 #include "Routine.hpp"
  19
  20 #include "Optimizer.hpp"
  21
  22 #include "src/IceTypes.h"
  23 #include "src/IceCfg.h"
  24 #include "src/IceELFStreamer.h"
  25 #include "src/IceGlobalContext.h"
  26 #include "src/IceCfgNode.h"
  27 #include "src/IceELFObjectWriter.h"
  28 #include "src/IceGlobalInits.h"
  29
  30 #include "llvm/Support/FileSystem.h"
  31 #include "llvm/Support/raw_os_ostream.h"
  32
  33 #if defined(_WIN32)
  34 #ifndef WIN32_LEAN_AND_MEAN
  35 #define WIN32_LEAN_AND_MEAN
  36 #endif // !WIN32_LEAN_AND_MEAN
  37 #ifndef NOMINMAX
  38 #define NOMINMAX
  39 #endif // !NOMINMAX
  40 #include <Windows.h>
  41 #else
  42 #include <sys/mman.h>
  43 #if !defined(MAP_ANONYMOUS)
  44 #define MAP_ANONYMOUS MAP_ANON
  45 #endif
  46 #endif
  47
  48 #include <mutex>
  49 #include <limits>
  50 #include <iostream>
  51 #include <cassert>
  52
  53 namespace
  54 {
  55         Ice::GlobalContext *context = nullptr;
  56         Ice::Cfg *function = nullptr;
  57         Ice::CfgNode *basicBlock = nullptr;
  58         Ice::CfgLocalAllocatorScope *allocator = nullptr;
  59         sw::Routine *routine = nullptr;
  60
  61         std::mutex codegenMutex;
  62
  63         Ice::ELFFileStreamer *elfFile = nullptr;
  64         Ice::Fdstream *out = nullptr;
  65 }
  66
  67 namespace
  68 {
  69         #if !defined(__i386__) && defined(_M_IX86)
  70                 #define __i386__ 1
  71         #endif
  72
  73         #if !defined(__x86_64__) && (defined(_M_AMD64) || defined (_M_X64))
  74                 #define __x86_64__ 1
  75         #endif
  76
  77         class CPUID
  78         {
  79         public:
  80                 const static bool ARM;
  81                 const static bool SSE4_1;
  82
  83         private:
  84                 static void cpuid(int registers[4], int info)
  85                 {
  86                         #if defined(__i386__) || defined(__x86_64__)
  87                                 #if defined(_WIN32)
  88                                         __cpuid(registers, info);
  89                                 #else
  90                                         __asm volatile("cpuid": "=a" (registers[0]), "=b" (registers[1]), "=c" (registers[2]), "=d" (registers[3]): "a" (info));
  91                                 #endif
  92                         #else
  93                                 registers[0] = 0;
  94                                 registers[1] = 0;
  95                                 registers[2] = 0;
  96                                 registers[3] = 0;
  97                         #endif
  98                 }
  99
 100                 static bool detectARM()
 101                 {
 102                         #if defined(__arm__)
 103                                 return true;
 104                         #elif defined(__i386__) || defined(__x86_64__)
 105                                 return false;
 106                         #else
 107                                 #error "Unknown architecture"
 108                         #endif
 109                 }
 110
 111                 static bool detectSSE4_1()
 112                 {
 113                         #if defined(__i386__) || defined(__x86_64__)
 114                                 int registers[4];
 115                                 cpuid(registers, 1);
 116                                 return (registers[2] & 0x00080000) != 0;
 117                         #else
 118                                 return false;
 119                         #endif
 120                 }
 121         };
 122
 123         const bool CPUID::ARM = CPUID::detectARM();
 124         const bool CPUID::SSE4_1 = CPUID::detectSSE4_1();
 125         const bool emulateIntrinsics = CPUID::ARM;
 126         const bool emulateMismatchedBitCast = CPUID::ARM;
 127 }
 128
 129 namespace sw
 130 {
 131         enum EmulatedType
 132         {
 133                 EmulatedShift = 16,
 134                 EmulatedV2 = 2 << EmulatedShift,
 135                 EmulatedV4 = 4 << EmulatedShift,
 136                 EmulatedV8 = 8 << EmulatedShift,
 137                 EmulatedBits = EmulatedV2 | EmulatedV4 | EmulatedV8,
 138
 139                 Type_v2i32 = Ice::IceType_v4i32 | EmulatedV2,
 140                 Type_v4i16 = Ice::IceType_v8i16 | EmulatedV4,
 141                 Type_v2i16 = Ice::IceType_v8i16 | EmulatedV2,
 142                 Type_v8i8 =  Ice::IceType_v16i8 | EmulatedV8,
 143                 Type_v4i8 =  Ice::IceType_v16i8 | EmulatedV4,
 144                 Type_v2f32 = Ice::IceType_v4f32 | EmulatedV2,
 145         };
 146
 147         class Value : public Ice::Operand {};
 148         class SwitchCases : public Ice::InstSwitch {};
 149         class BasicBlock : public Ice::CfgNode {};
 150
 151         Ice::Type T(Type *t)
 152         {
 153                 static_assert(static_cast<unsigned int>(Ice::IceType_NUM) < static_cast<unsigned int>(EmulatedBits), "Ice::Type overlaps with our emulated types!");
 154                 return (Ice::Type)(reinterpret_cast<std::intptr_t>(t) & ~EmulatedBits);
 155         }
 156
 157         Type *T(Ice::Type t)
 158         {
 159                 return reinterpret_cast<Type*>(t);
 160         }
 161
 162         Type *T(EmulatedType t)
 163         {
 164                 return reinterpret_cast<Type*>(t);
 165         }
 166
 167         Value *V(Ice::Operand *v)
 168         {
 169                 return reinterpret_cast<Value*>(v);
 170         }
 171
 172         BasicBlock *B(Ice::CfgNode *b)
 173         {
 174                 return reinterpret_cast<BasicBlock*>(b);
 175         }
 176
 177         static size_t typeSize(Type *type)
 178         {
 179                 if(reinterpret_cast<std::intptr_t>(type) & EmulatedBits)
 180                 {
 181                         switch(reinterpret_cast<std::intptr_t>(type))
 182                         {
 183                         case Type_v2i32: return 8;
 184                         case Type_v4i16: return 8;
 185                         case Type_v2i16: return 4;
 186                         case Type_v8i8:  return 8;
 187                         case Type_v4i8:  return 4;
 188                         case Type_v2f32: return 8;
 189                         default: assert(false);
 190                         }
 191                 }
 192
 193                 return Ice::typeWidthInBytes(T(type));
 194         }
 195
 196         Optimization optimization[10] = {InstructionCombining, Disabled};
 197
 198         using ElfHeader = std::conditional<sizeof(void*) == 8, Elf64_Ehdr, Elf32_Ehdr>::type;
 199         using SectionHeader = std::conditional<sizeof(void*) == 8, Elf64_Shdr, Elf32_Shdr>::type;
 200
 201         inline const SectionHeader *sectionHeader(const ElfHeader *elfHeader)
 202         {
 203                 return reinterpret_cast<const SectionHeader*>((intptr_t)elfHeader + elfHeader->e_shoff);
 204         }
 205
 206         inline const SectionHeader *elfSection(const ElfHeader *elfHeader, int index)
 207         {
 208                 return &sectionHeader(elfHeader)[index];
 209         }
 210
 211         static void *relocateSymbol(const ElfHeader *elfHeader, const Elf32_Rel &relocation, const SectionHeader &relocationTable)
 212         {
 213                 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
 214
 215                 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
 216                 int32_t *patchSite = (int*)(address + relocation.r_offset);
 217                 uint32_t index = relocation.getSymbol();
 218                 int table = relocationTable.sh_link;
 219                 void *symbolValue = nullptr;
 220
 221                 if(index != SHN_UNDEF)
 222                 {
 223                         if(table == SHN_UNDEF) return nullptr;
 224                         const SectionHeader *symbolTable = elfSection(elfHeader, table);
 225
 226                         uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
 227                         if(index >= symtab_entries)
 228                         {
 229                                 assert(index < symtab_entries && "Symbol Index out of range");
 230                                 return nullptr;
 231                         }
 232
 233                         intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
 234                         Elf32_Sym &symbol = ((Elf32_Sym*)symbolAddress)[index];
 235                         uint16_t section = symbol.st_shndx;
 236
 237                         if(section != SHN_UNDEF && section < SHN_LORESERVE)
 238                         {
 239                                 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
 240                                 symbolValue = reinterpret_cast<void*>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
 241                         }
 242                         else
 243                         {
 244                                 return nullptr;
 245                         }
 246                 }
 247
 248                 if(CPUID::ARM)
 249                 {
 250                         switch(relocation.getType())
 251                         {
 252                         case R_ARM_NONE:
 253                                 // No relocation
 254                                 break;
 255                         case R_ARM_MOVW_ABS_NC:
 256                                 {
 257                                         uint32_t thumb = 0;   // Calls to Thumb code not supported.
 258                                         uint32_t lo = (uint32_t)(intptr_t)symbolValue | thumb;
 259                                         *patchSite = (*patchSite & 0xFFF0F000) | ((lo & 0xF000) << 4) | (lo & 0x0FFF);
 260                                 }
 261                                 break;
 262                         case R_ARM_MOVT_ABS:
 263                                 {
 264                                         uint32_t hi = (uint32_t)(intptr_t)(symbolValue) >> 16;
 265                                         *patchSite = (*patchSite & 0xFFF0F000) | ((hi & 0xF000) << 4) | (hi & 0x0FFF);
 266                                 }
 267                                 break;
 268                         default:
 269                                 assert(false && "Unsupported relocation type");
 270                                 return nullptr;
 271                         }
 272                 }
 273                 else
 274                 {
 275                         switch(relocation.getType())
 276                         {
 277                         case R_386_NONE:
 278                                 // No relocation
 279                                 break;
 280                         case R_386_32:
 281                                 *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite);
 282                                 break;
 283                 //      case R_386_PC32:
 284                 //              *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite - (intptr_t)patchSite);
 285                 //              break;
 286                         default:
 287                                 assert(false && "Unsupported relocation type");
 288                                 return nullptr;
 289                         }
 290                 }
 291
 292
 293                 return symbolValue;
 294         }
 295
 296         static void *relocateSymbol(const ElfHeader *elfHeader, const Elf64_Rela &relocation, const SectionHeader &relocationTable)
 297         {
 298                 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
 299
 300                 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
 301                 int32_t *patchSite = (int*)(address + relocation.r_offset);
 302                 uint32_t index = relocation.getSymbol();
 303                 int table = relocationTable.sh_link;
 304                 void *symbolValue = nullptr;
 305
 306                 if(index != SHN_UNDEF)
 307                 {
 308                         if(table == SHN_UNDEF) return nullptr;
 309                         const SectionHeader *symbolTable = elfSection(elfHeader, table);
 310
 311                         uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
 312                         if(index >= symtab_entries)
 313                         {
 314                                 assert(index < symtab_entries && "Symbol Index out of range");
 315                                 return nullptr;
 316                         }
 317
 318                         intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
 319                         Elf64_Sym &symbol = ((Elf64_Sym*)symbolAddress)[index];
 320                         uint16_t section = symbol.st_shndx;
 321
 322                         if(section != SHN_UNDEF && section < SHN_LORESERVE)
 323                         {
 324                                 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
 325                                 symbolValue = reinterpret_cast<void*>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
 326                         }
 327                         else
 328                         {
 329                                 return nullptr;
 330                         }
 331                 }
 332
 333                 switch(relocation.getType())
 334                 {
 335                 case R_X86_64_NONE:
 336                         // No relocation
 337                         break;
 338                 case R_X86_64_64:
 339                         *(int64_t*)patchSite = (int64_t)((intptr_t)symbolValue + *(int64_t*)patchSite) + relocation.r_addend;
 340                         break;
 341                 case R_X86_64_PC32:
 342                         *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite - (intptr_t)patchSite) + relocation.r_addend;
 343                         break;
 344                 case R_X86_64_32S:
 345                         *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite) + relocation.r_addend;
 346                         break;
 347                 default:
 348                         assert(false && "Unsupported relocation type");
 349                         return nullptr;
 350                 }
 351
 352                 return symbolValue;
 353         }
 354
 355         void *loadImage(uint8_t *const elfImage, size_t &codeSize)
 356         {
 357                 ElfHeader *elfHeader = (ElfHeader*)elfImage;
 358
 359                 if(!elfHeader->checkMagic())
 360                 {
 361                         return nullptr;
 362                 }
 363
 364                 // Expect ELF bitness to match platform
 365                 assert(sizeof(void*) == 8 ? elfHeader->getFileClass() == ELFCLASS64 : elfHeader->getFileClass() == ELFCLASS32);
 366                 #if defined(__i386__)
 367                         assert(sizeof(void*) == 4 && elfHeader->e_machine == EM_386);
 368                 #elif defined(__x86_64__)
 369                         assert(sizeof(void*) == 8 && elfHeader->e_machine == EM_X86_64);
 370                 #elif defined(__arm__)
 371                         assert(sizeof(void*) == 4 && elfHeader->e_machine == EM_ARM);
 372                 #else
 373                         #error "Unsupported platform"
 374                 #endif
 375
 376                 SectionHeader *sectionHeader = (SectionHeader*)(elfImage + elfHeader->e_shoff);
 377                 void *entry = nullptr;
 378
 379                 for(int i = 0; i < elfHeader->e_shnum; i++)
 380                 {
 381                         if(sectionHeader[i].sh_type == SHT_PROGBITS)
 382                         {
 383                                 if(sectionHeader[i].sh_flags & SHF_EXECINSTR)
 384                                 {
 385                                         entry = elfImage + sectionHeader[i].sh_offset;
 386                                         codeSize = sectionHeader[i].sh_size;
 387                                 }
 388                         }
 389                         else if(sectionHeader[i].sh_type == SHT_REL)
 390                         {
 391                                 assert(sizeof(void*) == 4 && "UNIMPLEMENTED");   // Only expected/implemented for 32-bit code
 392
 393                                 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
 394                                 {
 395                                         const Elf32_Rel &relocation = ((const Elf32_Rel*)(elfImage + sectionHeader[i].sh_offset))[index];
 396                                         relocateSymbol(elfHeader, relocation, sectionHeader[i]);
 397                                 }
 398                         }
 399                         else if(sectionHeader[i].sh_type == SHT_RELA)
 400                         {
 401                                 assert(sizeof(void*) == 8 && "UNIMPLEMENTED");   // Only expected/implemented for 64-bit code
 402
 403                                 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
 404                                 {
 405                                         const Elf64_Rela &relocation = ((const Elf64_Rela*)(elfImage + sectionHeader[i].sh_offset))[index];
 406                                         relocateSymbol(elfHeader, relocation, sectionHeader[i]);
 407                                 }
 408                         }
 409                 }
 410
 411                 return entry;
 412         }
 413
 414         template<typename T>
 415         struct ExecutableAllocator
 416         {
 417                 ExecutableAllocator() {};
 418                 template<class U> ExecutableAllocator(const ExecutableAllocator<U> &other) {};
 419
 420                 using value_type = T;
 421                 using size_type = std::size_t;
 422
 423                 T *allocate(size_type n)
 424                 {
 425                         #if defined(_WIN32)
 426                                 return (T*)VirtualAlloc(NULL, sizeof(T) * n, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
 427                         #else
 428                                 return (T*)mmap(nullptr, sizeof(T) * n, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
 429                         #endif
 430                 }
 431
 432                 void deallocate(T *p, size_type n)
 433                 {
 434                         #if defined(_WIN32)
 435                                 VirtualFree(p, 0, MEM_RELEASE);
 436                         #else
 437                                 munmap(p, sizeof(T) * n);
 438                         #endif
 439                 }
 440         };
 441
 442         class ELFMemoryStreamer : public Ice::ELFStreamer, public Routine
 443         {
 444                 ELFMemoryStreamer(const ELFMemoryStreamer &) = delete;
 445                 ELFMemoryStreamer &operator=(const ELFMemoryStreamer &) = delete;
 446
 447         public:
 448                 ELFMemoryStreamer() : Routine(), entry(nullptr)
 449                 {
 450                         position = 0;
 451                         buffer.reserve(0x1000);
 452                 }
 453
 454                 ~ELFMemoryStreamer() override
 455                 {
 456                         #if defined(_WIN32)
 457                                 if(buffer.size() != 0)
 458                                 {
 459                                         DWORD exeProtection;
 460                                         VirtualProtect(&buffer[0], buffer.size(), oldProtection, &exeProtection);
 461                                 }
 462                         #endif
 463                 }
 464
 465                 void write8(uint8_t Value) override
 466                 {
 467                         if(position == (uint64_t)buffer.size())
 468                         {
 469                                 buffer.push_back(Value);
 470                                 position++;
 471                         }
 472                         else if(position < (uint64_t)buffer.size())
 473                         {
 474                                 buffer[position] = Value;
 475                                 position++;
 476                         }
 477                         else assert(false && "UNIMPLEMENTED");
 478                 }
 479
 480                 void writeBytes(llvm::StringRef Bytes) override
 481                 {
 482                         std::size_t oldSize = buffer.size();
 483                         buffer.resize(oldSize + Bytes.size());
 484                         memcpy(&buffer[oldSize], Bytes.begin(), Bytes.size());
 485                         position += Bytes.size();
 486                 }
 487
 488                 uint64_t tell() const override { return position; }
 489
 490                 void seek(uint64_t Off) override { position = Off; }
 491
 492                 const void *getEntry() override
 493                 {
 494                         if(!entry)
 495                         {
 496                                 position = std::numeric_limits<std::size_t>::max();   // Can't stream more data after this
 497
 498                                 size_t codeSize = 0;
 499                                 entry = loadImage(&buffer[0], codeSize);
 500
 501                                 #if defined(_WIN32)
 502                                         VirtualProtect(&buffer[0], buffer.size(), PAGE_EXECUTE_READ, &oldProtection);
 503                                         FlushInstructionCache(GetCurrentProcess(), NULL, 0);
 504                                 #else
 505                                         mprotect(&buffer[0], buffer.size(), PROT_READ | PROT_EXEC);
 506                                         __builtin___clear_cache((char*)entry, (char*)entry + codeSize);
 507                                 #endif
 508                         }
 509
 510                         return entry;
 511                 }
 512
 513         private:
 514                 void *entry;
 515                 std::vector<uint8_t, ExecutableAllocator<uint8_t>> buffer;
 516                 std::size_t position;
 517
 518                 #if defined(_WIN32)
 519                 DWORD oldProtection;
 520                 #endif
 521         };
 522
 523         Nucleus::Nucleus()
 524         {
 525                 ::codegenMutex.lock();   // Reactor is currently not thread safe
 526
 527                 Ice::ClFlags &Flags = Ice::ClFlags::Flags;
 528                 Ice::ClFlags::getParsedClFlags(Flags);
 529
 530                 #if defined(__arm__)
 531                         Flags.setTargetArch(Ice::Target_ARM32);
 532                         Flags.setTargetInstructionSet(Ice::ARM32InstructionSet_HWDivArm);
 533                 #else   // x86
 534                         Flags.setTargetArch(sizeof(void*) == 8 ? Ice::Target_X8664 : Ice::Target_X8632);
 535                         Flags.setTargetInstructionSet(CPUID::SSE4_1 ? Ice::X86InstructionSet_SSE4_1 : Ice::X86InstructionSet_SSE2);
 536                 #endif
 537                 Flags.setOutFileType(Ice::FT_Elf);
 538                 Flags.setOptLevel(Ice::Opt_2);
 539                 Flags.setApplicationBinaryInterface(Ice::ABI_Platform);
 540                 Flags.setVerbose(false ? Ice::IceV_Most : Ice::IceV_None);
 541                 Flags.setDisableHybridAssembly(true);
 542
 543                 static llvm::raw_os_ostream cout(std::cout);
 544                 static llvm::raw_os_ostream cerr(std::cerr);
 545
 546                 if(false)   // Write out to a file
 547                 {
 548                         std::error_code errorCode;
 549                         ::out = new Ice::Fdstream("out.o", errorCode, llvm::sys::fs::F_None);
 550                         ::elfFile = new Ice::ELFFileStreamer(*out);
 551                         ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfFile);
 552                 }
 553                 else
 554                 {
 555                         ELFMemoryStreamer *elfMemory = new ELFMemoryStreamer();
 556                         ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfMemory);
 557                         ::routine = elfMemory;
 558                 }
 559         }
 560
 561         Nucleus::~Nucleus()
 562         {
 563                 delete ::routine;
 564
 565                 delete ::allocator;
 566                 delete ::function;
 567                 delete ::context;
 568
 569                 delete ::elfFile;
 570                 delete ::out;
 571
 572                 ::codegenMutex.unlock();
 573         }
 574
 575         Routine *Nucleus::acquireRoutine(const wchar_t *name, bool runOptimizations)
 576         {
 577                 if(basicBlock->getInsts().empty() || basicBlock->getInsts().back().getKind() != Ice::Inst::Ret)
 578                 {
 579                         createRetVoid();
 580                 }
 581
 582                 std::wstring wideName(name);
 583                 std::string asciiName(wideName.begin(), wideName.end());
 584                 ::function->setFunctionName(Ice::GlobalString::createWithString(::context, asciiName));
 585
 586                 optimize();
 587
 588                 ::function->translate();
 589                 assert(!::function->hasError());
 590
 591                 auto globals = ::function->getGlobalInits();
 592
 593                 if(globals && !globals->empty())
 594                 {
 595                         ::context->getGlobals()->merge(globals.get());
 596                 }
 597
 598                 ::context->emitFileHeader();
 599                 ::function->emitIAS();
 600                 auto assembler = ::function->releaseAssembler();
 601                 auto objectWriter = ::context->getObjectWriter();
 602                 assembler->alignFunction();
 603                 objectWriter->writeFunctionCode(::function->getFunctionName(), false, assembler.get());
 604                 ::context->lowerGlobals("last");
 605                 ::context->lowerConstants();
 606                 ::context->lowerJumpTables();
 607                 objectWriter->setUndefinedSyms(::context->getConstantExternSyms());
 608                 objectWriter->writeNonUserSections();
 609
 610                 Routine *handoffRoutine = ::routine;
 611                 ::routine = nullptr;
 612
 613                 return handoffRoutine;
 614         }
 615
 616         void Nucleus::optimize()
 617         {
 618                 sw::optimize(::function);
 619         }
 620
 621         Value *Nucleus::allocateStackVariable(Type *t, int arraySize)
 622         {
 623                 Ice::Type type = T(t);
 624                 int typeSize = Ice::typeWidthInBytes(type);
 625                 int totalSize = typeSize * (arraySize ? arraySize : 1);
 626
 627                 auto bytes = Ice::ConstantInteger32::create(::context, type, totalSize);
 628                 auto address = ::function->makeVariable(T(getPointerType(t)));
 629                 auto alloca = Ice::InstAlloca::create(::function, address, bytes, typeSize);
 630                 ::function->getEntryNode()->getInsts().push_front(alloca);
 631
 632                 return V(address);
 633         }
 634
 635         BasicBlock *Nucleus::createBasicBlock()
 636         {
 637                 return B(::function->makeNode());
 638         }
 639
 640         BasicBlock *Nucleus::getInsertBlock()
 641         {
 642                 return B(::basicBlock);
 643         }
 644
 645         void Nucleus::setInsertBlock(BasicBlock *basicBlock)
 646         {
 647         //      assert(::basicBlock->getInsts().back().getTerminatorEdges().size() >= 0 && "Previous basic block must have a terminator");
 648                 ::basicBlock = basicBlock;
 649         }
 650
 651         void Nucleus::createFunction(Type *ReturnType, std::vector<Type*> &Params)
 652         {
 653                 uint32_t sequenceNumber = 0;
 654                 ::function = Ice::Cfg::create(::context, sequenceNumber).release();
 655                 ::allocator = new Ice::CfgLocalAllocatorScope(::function);
 656
 657                 for(Type *type : Params)
 658                 {
 659                         Ice::Variable *arg = ::function->makeVariable(T(type));
 660                         ::function->addArg(arg);
 661                 }
 662
 663                 Ice::CfgNode *node = ::function->makeNode();
 664                 ::function->setEntryNode(node);
 665                 ::basicBlock = node;
 666         }
 667
 668         Value *Nucleus::getArgument(unsigned int index)
 669         {
 670                 return V(::function->getArgs()[index]);
 671         }
 672
 673         void Nucleus::createRetVoid()
 674         {
 675                 Ice::InstRet *ret = Ice::InstRet::create(::function);
 676                 ::basicBlock->appendInst(ret);
 677         }
 678
 679         void Nucleus::createRet(Value *v)
 680         {
 681                 Ice::InstRet *ret = Ice::InstRet::create(::function, v);
 682                 ::basicBlock->appendInst(ret);
 683         }
 684
 685         void Nucleus::createBr(BasicBlock *dest)
 686         {
 687                 auto br = Ice::InstBr::create(::function, dest);
 688                 ::basicBlock->appendInst(br);
 689         }
 690
 691         void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
 692         {
 693                 auto br = Ice::InstBr::create(::function, cond, ifTrue, ifFalse);
 694                 ::basicBlock->appendInst(br);
 695         }
 696
 697         static bool isCommutative(Ice::InstArithmetic::OpKind op)
 698         {
 699                 switch(op)
 700                 {
 701                 case Ice::InstArithmetic::Add:
 702                 case Ice::InstArithmetic::Fadd:
 703                 case Ice::InstArithmetic::Mul:
 704                 case Ice::InstArithmetic::Fmul:
 705                 case Ice::InstArithmetic::And:
 706                 case Ice::InstArithmetic::Or:
 707                 case Ice::InstArithmetic::Xor:
 708                         return true;
 709                 default:
 710                         return false;
 711                 }
 712         }
 713
 714         static Value *createArithmetic(Ice::InstArithmetic::OpKind op, Value *lhs, Value *rhs)
 715         {
 716                 assert(lhs->getType() == rhs->getType() || (llvm::isa<Ice::Constant>(rhs) && (op == Ice::InstArithmetic::Shl || Ice::InstArithmetic::Lshr || Ice::InstArithmetic::Ashr)));
 717
 718                 bool swapOperands = llvm::isa<Ice::Constant>(lhs) && isCommutative(op);
 719
 720                 Ice::Variable *result = ::function->makeVariable(lhs->getType());
 721                 Ice::InstArithmetic *arithmetic = Ice::InstArithmetic::create(::function, op, result, swapOperands ? rhs : lhs, swapOperands ? lhs : rhs);
 722                 ::basicBlock->appendInst(arithmetic);
 723
 724                 return V(result);
 725         }
 726
 727         Value *Nucleus::createAdd(Value *lhs, Value *rhs)
 728         {
 729                 return createArithmetic(Ice::InstArithmetic::Add, lhs, rhs);
 730         }
 731
 732         Value *Nucleus::createSub(Value *lhs, Value *rhs)
 733         {
 734                 return createArithmetic(Ice::InstArithmetic::Sub, lhs, rhs);
 735         }
 736
 737         Value *Nucleus::createMul(Value *lhs, Value *rhs)
 738         {
 739                 return createArithmetic(Ice::InstArithmetic::Mul, lhs, rhs);
 740         }
 741
 742         Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
 743         {
 744                 return createArithmetic(Ice::InstArithmetic::Udiv, lhs, rhs);
 745         }
 746
 747         Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
 748         {
 749                 return createArithmetic(Ice::InstArithmetic::Sdiv, lhs, rhs);
 750         }
 751
 752         Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
 753         {
 754                 return createArithmetic(Ice::InstArithmetic::Fadd, lhs, rhs);
 755         }
 756
 757         Value *Nucleus::createFSub(Value *lhs, Value *rhs)
 758         {
 759                 return createArithmetic(Ice::InstArithmetic::Fsub, lhs, rhs);
 760         }
 761
 762         Value *Nucleus::createFMul(Value *lhs, Value *rhs)
 763         {
 764                 return createArithmetic(Ice::InstArithmetic::Fmul, lhs, rhs);
 765         }
 766
 767         Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
 768         {
 769                 return createArithmetic(Ice::InstArithmetic::Fdiv, lhs, rhs);
 770         }
 771
 772         Value *Nucleus::createURem(Value *lhs, Value *rhs)
 773         {
 774                 return createArithmetic(Ice::InstArithmetic::Urem, lhs, rhs);
 775         }
 776
 777         Value *Nucleus::createSRem(Value *lhs, Value *rhs)
 778         {
 779                 return createArithmetic(Ice::InstArithmetic::Srem, lhs, rhs);
 780         }
 781
 782         Value *Nucleus::createFRem(Value *lhs, Value *rhs)
 783         {
 784                 return createArithmetic(Ice::InstArithmetic::Frem, lhs, rhs);
 785         }
 786
 787         Value *Nucleus::createShl(Value *lhs, Value *rhs)
 788         {
 789                 return createArithmetic(Ice::InstArithmetic::Shl, lhs, rhs);
 790         }
 791
 792         Value *Nucleus::createLShr(Value *lhs, Value *rhs)
 793         {
 794                 return createArithmetic(Ice::InstArithmetic::Lshr, lhs, rhs);
 795         }
 796
 797         Value *Nucleus::createAShr(Value *lhs, Value *rhs)
 798         {
 799                 return createArithmetic(Ice::InstArithmetic::Ashr, lhs, rhs);
 800         }
 801
 802         Value *Nucleus::createAnd(Value *lhs, Value *rhs)
 803         {
 804                 return createArithmetic(Ice::InstArithmetic::And, lhs, rhs);
 805         }
 806
 807         Value *Nucleus::createOr(Value *lhs, Value *rhs)
 808         {
 809                 return createArithmetic(Ice::InstArithmetic::Or, lhs, rhs);
 810         }
 811
 812         Value *Nucleus::createXor(Value *lhs, Value *rhs)
 813         {
 814                 return createArithmetic(Ice::InstArithmetic::Xor, lhs, rhs);
 815         }
 816
 817         Value *Nucleus::createNeg(Value *v)
 818         {
 819                 return createSub(createNullValue(T(v->getType())), v);
 820         }
 821
 822         Value *Nucleus::createFNeg(Value *v)
 823         {
 824                 double c[4] = {-0.0, -0.0, -0.0, -0.0};
 825                 Value *negativeZero = Ice::isVectorType(v->getType()) ?
 826                                       createConstantVector(c, T(v->getType())) :
 827                                       V(::context->getConstantFloat(-0.0f));
 828
 829                 return createFSub(negativeZero, v);
 830         }
 831
 832         Value *Nucleus::createNot(Value *v)
 833         {
 834                 if(Ice::isScalarIntegerType(v->getType()))
 835                 {
 836                         return createXor(v, V(::context->getConstantInt(v->getType(), -1)));
 837                 }
 838                 else   // Vector
 839                 {
 840                         int64_t c[16] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1};
 841                         return createXor(v, createConstantVector(c, T(v->getType())));
 842                 }
 843         }
 844
 845         Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int align)
 846         {
 847                 int valueType = (int)reinterpret_cast<intptr_t>(type);
 848                 Ice::Variable *result = ::function->makeVariable(T(type));
 849
 850                 if(valueType & EmulatedBits)
 851                 {
 852                         if(emulateIntrinsics)
 853                         {
 854                                 if(typeSize(type) == 4)
 855                                 {
 856                                         auto pointer = RValue<Pointer<Byte>>(ptr);
 857                                         Int x = *Pointer<Int>(pointer +1-1);
 858
 859                                         Int4 vector;
 860                                         vector = Insert(vector, x, 0);
 861
 862                                         auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
 863                                         ::basicBlock->appendInst(bitcast);
 864                                 }
 865                                 else if(typeSize(type) == 8)
 866                                 {
 867                                         auto pointer = RValue<Pointer<Byte>>(ptr);
 868                                         Int x = *Pointer<Int>(pointer +1-1);
 869                                         Int y = *Pointer<Int>(pointer + 4);
 870
 871                                         Int4 vector;
 872                                         vector = Insert(vector, x, 0);
 873                                         vector = Insert(vector, y, 1);
 874
 875                                         auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
 876                                         ::basicBlock->appendInst(bitcast);
 877                                 }
 878                                 else assert(false);
 879                         }
 880                         else
 881                         {
 882                                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::LoadSubVector, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
 883                                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
 884                                 auto load = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
 885                                 load->addArg(ptr);
 886                                 load->addArg(::context->getConstantInt32(typeSize(type)));
 887                                 ::basicBlock->appendInst(load);
 888                         }
 889                 }
 890                 else
 891                 {
 892                         auto load = Ice::InstLoad::create(::function, result, ptr, align);
 893                         ::basicBlock->appendInst(load);
 894                 }
 895
 896                 return V(result);
 897         }
 898
 899         Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int align)
 900         {
 901                 int valueType = (int)reinterpret_cast<intptr_t>(type);
 902
 903                 if(valueType & EmulatedBits)
 904                 {
 905                         if(emulateIntrinsics)
 906                         {
 907                                 if(typeSize(type) == 4)
 908                                 {
 909                                         Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
 910                                         auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
 911                                         ::basicBlock->appendInst(bitcast);
 912
 913                                         RValue<Int4> v(V(vector));
 914
 915                                         auto pointer = RValue<Pointer<Byte>>(ptr);
 916                                         Int x = Extract(v, 0);
 917                                         *Pointer<Int>(pointer) = x;
 918                                 }
 919                                 else if(typeSize(type) == 8)
 920                                 {
 921                                         Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
 922                                         auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
 923                                         ::basicBlock->appendInst(bitcast);
 924
 925                                         RValue<Int4> v(V(vector));
 926
 927                                         auto pointer = RValue<Pointer<Byte>>(ptr);
 928                                         Int x = Extract(v, 0);
 929                                         *Pointer<Int>(pointer) = x;
 930                                         Int y = Extract(v, 1);
 931                                         *Pointer<Int>(pointer + 4) = y;
 932                                 }
 933                                 else assert(false);
 934                         }
 935                         else
 936                         {
 937                                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::StoreSubVector, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T};
 938                                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
 939                                 auto store = Ice::InstIntrinsicCall::create(::function, 3, nullptr, target, intrinsic);
 940                                 store->addArg(value);
 941                                 store->addArg(ptr);
 942                                 store->addArg(::context->getConstantInt32(typeSize(type)));
 943                                 ::basicBlock->appendInst(store);
 944                         }
 945                 }
 946                 else
 947                 {
 948                         assert(T(value->getType()) == type);
 949
 950                         auto store = Ice::InstStore::create(::function, value, ptr, align);
 951                         ::basicBlock->appendInst(store);
 952                 }
 953
 954                 return value;
 955         }
 956
 957         Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex)
 958         {
 959                 assert(index->getType() == Ice::IceType_i32);
 960
 961                 if(auto *constant = llvm::dyn_cast<Ice::ConstantInteger32>(index))
 962                 {
 963                         int32_t offset = constant->getValue() * (int)typeSize(type);
 964
 965                         if(offset == 0)
 966                         {
 967                                 return ptr;
 968                         }
 969
 970                         return createAdd(ptr, createConstantInt(offset));
 971                 }
 972
 973                 if(!Ice::isByteSizedType(T(type)))
 974                 {
 975                         index = createMul(index, createConstantInt((int)typeSize(type)));
 976                 }
 977
 978                 if(sizeof(void*) == 8)
 979                 {
 980                         if(unsignedIndex)
 981                         {
 982                                 index = createZExt(index, T(Ice::IceType_i64));
 983                         }
 984                         else
 985                         {
 986                                 index = createSExt(index, T(Ice::IceType_i64));
 987                         }
 988                 }
 989
 990                 return createAdd(ptr, index);
 991         }
 992
 993         Value *Nucleus::createAtomicAdd(Value *ptr, Value *value)
 994         {
 995                 assert(false && "UNIMPLEMENTED"); return nullptr;
 996         }
 997
 998         static Value *createCast(Ice::InstCast::OpKind op, Value *v, Type *destType)
 999         {
1000                 if(v->getType() == T(destType))
1001                 {
1002                         return v;
1003                 }
1004
1005                 Ice::Variable *result = ::function->makeVariable(T(destType));
1006                 Ice::InstCast *cast = Ice::InstCast::create(::function, op, result, v);
1007                 ::basicBlock->appendInst(cast);
1008
1009                 return V(result);
1010         }
1011
1012         Value *Nucleus::createTrunc(Value *v, Type *destType)
1013         {
1014                 return createCast(Ice::InstCast::Trunc, v, destType);
1015         }
1016
1017         Value *Nucleus::createZExt(Value *v, Type *destType)
1018         {
1019                 return createCast(Ice::InstCast::Zext, v, destType);
1020         }
1021
1022         Value *Nucleus::createSExt(Value *v, Type *destType)
1023         {
1024                 return createCast(Ice::InstCast::Sext, v, destType);
1025         }
1026
1027         Value *Nucleus::createFPToSI(Value *v, Type *destType)
1028         {
1029                 return createCast(Ice::InstCast::Fptosi, v, destType);
1030         }
1031
1032         Value *Nucleus::createSIToFP(Value *v, Type *destType)
1033         {
1034                 return createCast(Ice::InstCast::Sitofp, v, destType);
1035         }
1036
1037         Value *Nucleus::createFPTrunc(Value *v, Type *destType)
1038         {
1039                 return createCast(Ice::InstCast::Fptrunc, v, destType);
1040         }
1041
1042         Value *Nucleus::createFPExt(Value *v, Type *destType)
1043         {
1044                 return createCast(Ice::InstCast::Fpext, v, destType);
1045         }
1046
1047         Value *Nucleus::createBitCast(Value *v, Type *destType)
1048         {
1049                 // Bitcasts must be between types of the same logical size. But with emulated narrow vectors we need
1050                 // support for casting between scalars and wide vectors. For platforms where this is not supported,
1051                 // emulate them by writing to the stack and reading back as the destination type.
1052                 if(emulateMismatchedBitCast)
1053                 {
1054                         if(!Ice::isVectorType(v->getType()) && Ice::isVectorType(T(destType)))
1055                         {
1056                                 Value *address = allocateStackVariable(destType);
1057                                 createStore(v, address, T(v->getType()));
1058                                 return createLoad(address, destType);
1059                         }
1060                         else if(Ice::isVectorType(v->getType()) && !Ice::isVectorType(T(destType)))
1061                         {
1062                                 Value *address = allocateStackVariable(T(v->getType()));
1063                                 createStore(v, address, T(v->getType()));
1064                                 return createLoad(address, destType);
1065                         }
1066                 }
1067
1068                 return createCast(Ice::InstCast::Bitcast, v, destType);
1069         }
1070
1071         static Value *createIntCompare(Ice::InstIcmp::ICond condition, Value *lhs, Value *rhs)
1072         {
1073                 assert(lhs->getType() == rhs->getType());
1074
1075                 auto result = ::function->makeVariable(Ice::isScalarIntegerType(lhs->getType()) ? Ice::IceType_i1 : lhs->getType());
1076                 auto cmp = Ice::InstIcmp::create(::function, condition, result, lhs, rhs);
1077                 ::basicBlock->appendInst(cmp);
1078
1079                 return V(result);
1080         }
1081
1082         Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
1083         {
1084                 return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
1085         }
1086
1087         Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
1088         {
1089                 return createIntCompare(Ice::InstIcmp::Ne, lhs, rhs);
1090         }
1091
1092         Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
1093         {
1094                 return createIntCompare(Ice::InstIcmp::Ugt, lhs, rhs);
1095         }
1096
1097         Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
1098         {
1099                 return createIntCompare(Ice::InstIcmp::Uge, lhs, rhs);
1100         }
1101
1102         Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
1103         {
1104                 return createIntCompare(Ice::InstIcmp::Ult, lhs, rhs);
1105         }
1106
1107         Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
1108         {
1109                 return createIntCompare(Ice::InstIcmp::Ule, lhs, rhs);
1110         }
1111
1112         Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
1113         {
1114                 return createIntCompare(Ice::InstIcmp::Sgt, lhs, rhs);
1115         }
1116
1117         Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
1118         {
1119                 return createIntCompare(Ice::InstIcmp::Sge, lhs, rhs);
1120         }
1121
1122         Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
1123         {
1124                 return createIntCompare(Ice::InstIcmp::Slt, lhs, rhs);
1125         }
1126
1127         Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
1128         {
1129                 return createIntCompare(Ice::InstIcmp::Sle, lhs, rhs);
1130         }
1131
1132         static Value *createFloatCompare(Ice::InstFcmp::FCond condition, Value *lhs, Value *rhs)
1133         {
1134                 assert(lhs->getType() == rhs->getType());
1135                 assert(Ice::isScalarFloatingType(lhs->getType()) || lhs->getType() == Ice::IceType_v4f32);
1136
1137                 auto result = ::function->makeVariable(Ice::isScalarFloatingType(lhs->getType()) ? Ice::IceType_i1 : Ice::IceType_v4i32);
1138                 auto cmp = Ice::InstFcmp::create(::function, condition, result, lhs, rhs);
1139                 ::basicBlock->appendInst(cmp);
1140
1141                 return V(result);
1142         }
1143
1144         Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
1145         {
1146                 return createFloatCompare(Ice::InstFcmp::Oeq, lhs, rhs);
1147         }
1148
1149         Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
1150         {
1151                 return createFloatCompare(Ice::InstFcmp::Ogt, lhs, rhs);
1152         }
1153
1154         Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
1155         {
1156                 return createFloatCompare(Ice::InstFcmp::Oge, lhs, rhs);
1157         }
1158
1159         Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
1160         {
1161                 return createFloatCompare(Ice::InstFcmp::Olt, lhs, rhs);
1162         }
1163
1164         Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
1165         {
1166                 return createFloatCompare(Ice::InstFcmp::Ole, lhs, rhs);
1167         }
1168
1169         Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
1170         {
1171                 return createFloatCompare(Ice::InstFcmp::One, lhs, rhs);
1172         }
1173
1174         Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
1175         {
1176                 return createFloatCompare(Ice::InstFcmp::Ord, lhs, rhs);
1177         }
1178
1179         Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
1180         {
1181                 return createFloatCompare(Ice::InstFcmp::Uno, lhs, rhs);
1182         }
1183
1184         Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
1185         {
1186                 return createFloatCompare(Ice::InstFcmp::Ueq, lhs, rhs);
1187         }
1188
1189         Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
1190         {
1191                 return createFloatCompare(Ice::InstFcmp::Ugt, lhs, rhs);
1192         }
1193
1194         Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
1195         {
1196                 return createFloatCompare(Ice::InstFcmp::Uge, lhs, rhs);
1197         }
1198
1199         Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
1200         {
1201                 return createFloatCompare(Ice::InstFcmp::Ult, lhs, rhs);
1202         }
1203
1204         Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
1205         {
1206                 return createFloatCompare(Ice::InstFcmp::Ule, lhs, rhs);
1207         }
1208
1209         Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
1210         {
1211                 return createFloatCompare(Ice::InstFcmp::Une, lhs, rhs);
1212         }
1213
1214         Value *Nucleus::createExtractElement(Value *vector, Type *type, int index)
1215         {
1216                 auto result = ::function->makeVariable(T(type));
1217                 auto extract = Ice::InstExtractElement::create(::function, result, vector, ::context->getConstantInt32(index));
1218                 ::basicBlock->appendInst(extract);
1219
1220                 return V(result);
1221         }
1222
1223         Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
1224         {
1225                 auto result = ::function->makeVariable(vector->getType());
1226                 auto insert = Ice::InstInsertElement::create(::function, result, vector, element, ::context->getConstantInt32(index));
1227                 ::basicBlock->appendInst(insert);
1228
1229                 return V(result);
1230         }
1231
1232         Value *Nucleus::createShuffleVector(Value *V1, Value *V2, const int *select)
1233         {
1234                 assert(V1->getType() == V2->getType());
1235
1236                 int size = Ice::typeNumElements(V1->getType());
1237                 auto result = ::function->makeVariable(V1->getType());
1238                 auto shuffle = Ice::InstShuffleVector::create(::function, result, V1, V2);
1239
1240                 for(int i = 0; i < size; i++)
1241                 {
1242                         shuffle->addIndex(llvm::cast<Ice::ConstantInteger32>(::context->getConstantInt32(select[i])));
1243                 }
1244
1245                 ::basicBlock->appendInst(shuffle);
1246
1247                 return V(result);
1248         }
1249
1250         Value *Nucleus::createSelect(Value *C, Value *ifTrue, Value *ifFalse)
1251         {
1252                 assert(ifTrue->getType() == ifFalse->getType());
1253
1254                 auto result = ::function->makeVariable(ifTrue->getType());
1255                 auto *select = Ice::InstSelect::create(::function, result, C, ifTrue, ifFalse);
1256                 ::basicBlock->appendInst(select);
1257
1258                 return V(result);
1259         }
1260
1261         SwitchCases *Nucleus::createSwitch(Value *control, BasicBlock *defaultBranch, unsigned numCases)
1262         {
1263                 auto switchInst = Ice::InstSwitch::create(::function, numCases, control, defaultBranch);
1264                 ::basicBlock->appendInst(switchInst);
1265
1266                 return reinterpret_cast<SwitchCases*>(switchInst);
1267         }
1268
1269         void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch)
1270         {
1271                 switchCases->addBranch(label, label, branch);
1272         }
1273
1274         void Nucleus::createUnreachable()
1275         {
1276                 Ice::InstUnreachable *unreachable = Ice::InstUnreachable::create(::function);
1277                 ::basicBlock->appendInst(unreachable);
1278         }
1279
1280         static Value *createSwizzle4(Value *val, unsigned char select)
1281         {
1282                 int swizzle[4] =
1283                 {
1284                         (select >> 0) & 0x03,
1285                         (select >> 2) & 0x03,
1286                         (select >> 4) & 0x03,
1287                         (select >> 6) & 0x03,
1288                 };
1289
1290                 return Nucleus::createShuffleVector(val, val, swizzle);
1291         }
1292
1293         static Value *createMask4(Value *lhs, Value *rhs, unsigned char select)
1294         {
1295                 int64_t mask[4] = {0, 0, 0, 0};
1296
1297                 mask[(select >> 0) & 0x03] = -1;
1298                 mask[(select >> 2) & 0x03] = -1;
1299                 mask[(select >> 4) & 0x03] = -1;
1300                 mask[(select >> 6) & 0x03] = -1;
1301
1302                 Value *condition = Nucleus::createConstantVector(mask, T(Ice::IceType_v4i1));
1303                 Value *result = Nucleus::createSelect(condition, rhs, lhs);
1304
1305                 return result;
1306         }
1307
1308         Type *Nucleus::getPointerType(Type *ElementType)
1309         {
1310                 if(sizeof(void*) == 8)
1311                 {
1312                         return T(Ice::IceType_i64);
1313                 }
1314                 else
1315                 {
1316                         return T(Ice::IceType_i32);
1317                 }
1318         }
1319
1320         Value *Nucleus::createNullValue(Type *Ty)
1321         {
1322                 if(Ice::isVectorType(T(Ty)))
1323                 {
1324                         assert(Ice::typeNumElements(T(Ty)) <= 16);
1325                         int64_t c[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
1326                         return createConstantVector(c, Ty);
1327                 }
1328                 else
1329                 {
1330                         return V(::context->getConstantZero(T(Ty)));
1331                 }
1332         }
1333
1334         Value *Nucleus::createConstantLong(int64_t i)
1335         {
1336                 return V(::context->getConstantInt64(i));
1337         }
1338
1339         Value *Nucleus::createConstantInt(int i)
1340         {
1341                 return V(::context->getConstantInt32(i));
1342         }
1343
1344         Value *Nucleus::createConstantInt(unsigned int i)
1345         {
1346                 return V(::context->getConstantInt32(i));
1347         }
1348
1349         Value *Nucleus::createConstantBool(bool b)
1350         {
1351                 return V(::context->getConstantInt1(b));
1352         }
1353
1354         Value *Nucleus::createConstantByte(signed char i)
1355         {
1356                 return V(::context->getConstantInt8(i));
1357         }
1358
1359         Value *Nucleus::createConstantByte(unsigned char i)
1360         {
1361                 return V(::context->getConstantInt8(i));
1362         }
1363
1364         Value *Nucleus::createConstantShort(short i)
1365         {
1366                 return V(::context->getConstantInt16(i));
1367         }
1368
1369         Value *Nucleus::createConstantShort(unsigned short i)
1370         {
1371                 return V(::context->getConstantInt16(i));
1372         }
1373
1374         Value *Nucleus::createConstantFloat(float x)
1375         {
1376                 return V(::context->getConstantFloat(x));
1377         }
1378
1379         Value *Nucleus::createNullPointer(Type *Ty)
1380         {
1381                 return createNullValue(T(sizeof(void*) == 8 ? Ice::IceType_i64 : Ice::IceType_i32));
1382         }
1383
1384         Value *Nucleus::createConstantVector(const int64_t *constants, Type *type)
1385         {
1386                 const int vectorSize = 16;
1387                 assert(Ice::typeWidthInBytes(T(type)) == vectorSize);
1388                 const int alignment = vectorSize;
1389                 auto globalPool = ::function->getGlobalPool();
1390
1391                 const int64_t *i = constants;
1392                 const double *f = reinterpret_cast<const double*>(constants);
1393                 Ice::VariableDeclaration::DataInitializer *dataInitializer = nullptr;
1394
1395                 switch((int)reinterpret_cast<intptr_t>(type))
1396                 {
1397                 case Ice::IceType_v4i32:
1398                 case Ice::IceType_v4i1:
1399                         {
1400                                 const int initializer[4] = {(int)i[0], (int)i[1], (int)i[2], (int)i[3]};
1401                                 static_assert(sizeof(initializer) == vectorSize, "!");
1402                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1403                         }
1404                         break;
1405                 case Ice::IceType_v4f32:
1406                         {
1407                                 const float initializer[4] = {(float)f[0], (float)f[1], (float)f[2], (float)f[3]};
1408                                 static_assert(sizeof(initializer) == vectorSize, "!");
1409                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1410                         }
1411                         break;
1412                 case Ice::IceType_v8i16:
1413                 case Ice::IceType_v8i1:
1414                         {
1415                                 const short initializer[8] = {(short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[4], (short)i[5], (short)i[6], (short)i[7]};
1416                                 static_assert(sizeof(initializer) == vectorSize, "!");
1417                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1418                         }
1419                         break;
1420                 case Ice::IceType_v16i8:
1421                 case Ice::IceType_v16i1:
1422                         {
1423                                 const char initializer[16] = {(char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[8], (char)i[9], (char)i[10], (char)i[11], (char)i[12], (char)i[13], (char)i[14], (char)i[15]};
1424                                 static_assert(sizeof(initializer) == vectorSize, "!");
1425                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1426                         }
1427                         break;
1428                 case Type_v2i32:
1429                         {
1430                                 const int initializer[4] = {(int)i[0], (int)i[1], (int)i[0], (int)i[1]};
1431                                 static_assert(sizeof(initializer) == vectorSize, "!");
1432                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1433                         }
1434                         break;
1435                 case Type_v2f32:
1436                         {
1437                                 const float initializer[4] = {(float)f[0], (float)f[1], (float)f[0], (float)f[1]};
1438                                 static_assert(sizeof(initializer) == vectorSize, "!");
1439                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1440                         }
1441                         break;
1442                 case Type_v4i16:
1443                         {
1444                                 const short initializer[8] = {(short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[0], (short)i[1], (short)i[2], (short)i[3]};
1445                                 static_assert(sizeof(initializer) == vectorSize, "!");
1446                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1447                         }
1448                         break;
1449                 case Type_v8i8:
1450                         {
1451                                 const char initializer[16] = {(char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7]};
1452                                 static_assert(sizeof(initializer) == vectorSize, "!");
1453                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1454                         }
1455                         break;
1456                 case Type_v4i8:
1457                         {
1458                                 const char initializer[16] = {(char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3]};
1459                                 static_assert(sizeof(initializer) == vectorSize, "!");
1460                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1461                         }
1462                         break;
1463                 default:
1464                         assert(false && "Unknown constant vector type" && type);
1465                 }
1466
1467                 auto name = Ice::GlobalString::createWithoutString(::context);
1468                 auto *variableDeclaration = Ice::VariableDeclaration::create(globalPool);
1469                 variableDeclaration->setName(name);
1470                 variableDeclaration->setAlignment(alignment);
1471                 variableDeclaration->setIsConstant(true);
1472                 variableDeclaration->addInitializer(dataInitializer);
1473
1474                 ::function->addGlobal(variableDeclaration);
1475
1476                 constexpr int32_t offset = 0;
1477                 Ice::Operand *ptr = ::context->getConstantSym(offset, name);
1478
1479                 Ice::Variable *result = ::function->makeVariable(T(type));
1480                 auto load = Ice::InstLoad::create(::function, result, ptr, alignment);
1481                 ::basicBlock->appendInst(load);
1482
1483                 return V(result);
1484         }
1485
1486         Value *Nucleus::createConstantVector(const double *constants, Type *type)
1487         {
1488                 return createConstantVector((const int64_t*)constants, type);
1489         }
1490
1491         Type *Void::getType()
1492         {
1493                 return T(Ice::IceType_void);
1494         }
1495
1496         Bool::Bool(Argument<Bool> argument)
1497         {
1498                 storeValue(argument.value);
1499         }
1500
1501         Bool::Bool(bool x)
1502         {
1503                 storeValue(Nucleus::createConstantBool(x));
1504         }
1505
1506         Bool::Bool(RValue<Bool> rhs)
1507         {
1508                 storeValue(rhs.value);
1509         }
1510
1511         Bool::Bool(const Bool &rhs)
1512         {
1513                 Value *value = rhs.loadValue();
1514                 storeValue(value);
1515         }
1516
1517         Bool::Bool(const Reference<Bool> &rhs)
1518         {
1519                 Value *value = rhs.loadValue();
1520                 storeValue(value);
1521         }
1522
1523         RValue<Bool> Bool::operator=(RValue<Bool> rhs)
1524         {
1525                 storeValue(rhs.value);
1526
1527                 return rhs;
1528         }
1529
1530         RValue<Bool> Bool::operator=(const Bool &rhs)
1531         {
1532                 Value *value = rhs.loadValue();
1533                 storeValue(value);
1534
1535                 return RValue<Bool>(value);
1536         }
1537
1538         RValue<Bool> Bool::operator=(const Reference<Bool> &rhs)
1539         {
1540                 Value *value = rhs.loadValue();
1541                 storeValue(value);
1542
1543                 return RValue<Bool>(value);
1544         }
1545
1546         RValue<Bool> operator!(RValue<Bool> val)
1547         {
1548                 return RValue<Bool>(Nucleus::createNot(val.value));
1549         }
1550
1551         RValue<Bool> operator&&(RValue<Bool> lhs, RValue<Bool> rhs)
1552         {
1553                 return RValue<Bool>(Nucleus::createAnd(lhs.value, rhs.value));
1554         }
1555
1556         RValue<Bool> operator||(RValue<Bool> lhs, RValue<Bool> rhs)
1557         {
1558                 return RValue<Bool>(Nucleus::createOr(lhs.value, rhs.value));
1559         }
1560
1561         Type *Bool::getType()
1562         {
1563                 return T(Ice::IceType_i1);
1564         }
1565
1566         Byte::Byte(Argument<Byte> argument)
1567         {
1568                 storeValue(argument.value);
1569         }
1570
1571         Byte::Byte(RValue<Int> cast)
1572         {
1573                 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
1574
1575                 storeValue(integer);
1576         }
1577
1578         Byte::Byte(RValue<UInt> cast)
1579         {
1580                 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
1581
1582                 storeValue(integer);
1583         }
1584
1585         Byte::Byte(RValue<UShort> cast)
1586         {
1587                 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
1588
1589                 storeValue(integer);
1590         }
1591
1592         Byte::Byte(int x)
1593         {
1594                 storeValue(Nucleus::createConstantByte((unsigned char)x));
1595         }
1596
1597         Byte::Byte(unsigned char x)
1598         {
1599                 storeValue(Nucleus::createConstantByte(x));
1600         }
1601
1602         Byte::Byte(RValue<Byte> rhs)
1603         {
1604                 storeValue(rhs.value);
1605         }
1606
1607         Byte::Byte(const Byte &rhs)
1608         {
1609                 Value *value = rhs.loadValue();
1610                 storeValue(value);
1611         }
1612
1613         Byte::Byte(const Reference<Byte> &rhs)
1614         {
1615                 Value *value = rhs.loadValue();
1616                 storeValue(value);
1617         }
1618
1619         RValue<Byte> Byte::operator=(RValue<Byte> rhs)
1620         {
1621                 storeValue(rhs.value);
1622
1623                 return rhs;
1624         }
1625
1626         RValue<Byte> Byte::operator=(const Byte &rhs)
1627         {
1628                 Value *value = rhs.loadValue();
1629                 storeValue(value);
1630
1631                 return RValue<Byte>(value);
1632         }
1633
1634         RValue<Byte> Byte::operator=(const Reference<Byte> &rhs)
1635         {
1636                 Value *value = rhs.loadValue();
1637                 storeValue(value);
1638
1639                 return RValue<Byte>(value);
1640         }
1641
1642         RValue<Byte> operator+(RValue<Byte> lhs, RValue<Byte> rhs)
1643         {
1644                 return RValue<Byte>(Nucleus::createAdd(lhs.value, rhs.value));
1645         }
1646
1647         RValue<Byte> operator-(RValue<Byte> lhs, RValue<Byte> rhs)
1648         {
1649                 return RValue<Byte>(Nucleus::createSub(lhs.value, rhs.value));
1650         }
1651
1652         RValue<Byte> operator*(RValue<Byte> lhs, RValue<Byte> rhs)
1653         {
1654                 return RValue<Byte>(Nucleus::createMul(lhs.value, rhs.value));
1655         }
1656
1657         RValue<Byte> operator/(RValue<Byte> lhs, RValue<Byte> rhs)
1658         {
1659                 return RValue<Byte>(Nucleus::createUDiv(lhs.value, rhs.value));
1660         }
1661
1662         RValue<Byte> operator%(RValue<Byte> lhs, RValue<Byte> rhs)
1663         {
1664                 return RValue<Byte>(Nucleus::createURem(lhs.value, rhs.value));
1665         }
1666
1667         RValue<Byte> operator&(RValue<Byte> lhs, RValue<Byte> rhs)
1668         {
1669                 return RValue<Byte>(Nucleus::createAnd(lhs.value, rhs.value));
1670         }
1671
1672         RValue<Byte> operator|(RValue<Byte> lhs, RValue<Byte> rhs)
1673         {
1674                 return RValue<Byte>(Nucleus::createOr(lhs.value, rhs.value));
1675         }
1676
1677         RValue<Byte> operator^(RValue<Byte> lhs, RValue<Byte> rhs)
1678         {
1679                 return RValue<Byte>(Nucleus::createXor(lhs.value, rhs.value));
1680         }
1681
1682         RValue<Byte> operator<<(RValue<Byte> lhs, RValue<Byte> rhs)
1683         {
1684                 return RValue<Byte>(Nucleus::createShl(lhs.value, rhs.value));
1685         }
1686
1687         RValue<Byte> operator>>(RValue<Byte> lhs, RValue<Byte> rhs)
1688         {
1689                 return RValue<Byte>(Nucleus::createLShr(lhs.value, rhs.value));
1690         }
1691
1692         RValue<Byte> operator+=(Byte &lhs, RValue<Byte> rhs)
1693         {
1694                 return lhs = lhs + rhs;
1695         }
1696
1697         RValue<Byte> operator-=(Byte &lhs, RValue<Byte> rhs)
1698         {
1699                 return lhs = lhs - rhs;
1700         }
1701
1702         RValue<Byte> operator*=(Byte &lhs, RValue<Byte> rhs)
1703         {
1704                 return lhs = lhs * rhs;
1705         }
1706
1707         RValue<Byte> operator/=(Byte &lhs, RValue<Byte> rhs)
1708         {
1709                 return lhs = lhs / rhs;
1710         }
1711
1712         RValue<Byte> operator%=(Byte &lhs, RValue<Byte> rhs)
1713         {
1714                 return lhs = lhs % rhs;
1715         }
1716
1717         RValue<Byte> operator&=(Byte &lhs, RValue<Byte> rhs)
1718         {
1719                 return lhs = lhs & rhs;
1720         }
1721
1722         RValue<Byte> operator|=(Byte &lhs, RValue<Byte> rhs)
1723         {
1724                 return lhs = lhs | rhs;
1725         }
1726
1727         RValue<Byte> operator^=(Byte &lhs, RValue<Byte> rhs)
1728         {
1729                 return lhs = lhs ^ rhs;
1730         }
1731
1732         RValue<Byte> operator<<=(Byte &lhs, RValue<Byte> rhs)
1733         {
1734                 return lhs = lhs << rhs;
1735         }
1736
1737         RValue<Byte> operator>>=(Byte &lhs, RValue<Byte> rhs)
1738         {
1739                 return lhs = lhs >> rhs;
1740         }
1741
1742         RValue<Byte> operator+(RValue<Byte> val)
1743         {
1744                 return val;
1745         }
1746
1747         RValue<Byte> operator-(RValue<Byte> val)
1748         {
1749                 return RValue<Byte>(Nucleus::createNeg(val.value));
1750         }
1751
1752         RValue<Byte> operator~(RValue<Byte> val)
1753         {
1754                 return RValue<Byte>(Nucleus::createNot(val.value));
1755         }
1756
1757         RValue<Byte> operator++(Byte &val, int)   // Post-increment
1758         {
1759                 RValue<Byte> res = val;
1760                 val += Byte(1);
1761                 return res;
1762         }
1763
1764         const Byte &operator++(Byte &val)   // Pre-increment
1765         {
1766                 val += Byte(1);
1767                 return val;
1768         }
1769
1770         RValue<Byte> operator--(Byte &val, int)   // Post-decrement
1771         {
1772                 RValue<Byte> res = val;
1773                 val -= Byte(1);
1774                 return res;
1775         }
1776
1777         const Byte &operator--(Byte &val)   // Pre-decrement
1778         {
1779                 val -= Byte(1);
1780                 return val;
1781         }
1782
1783         RValue<Bool> operator<(RValue<Byte> lhs, RValue<Byte> rhs)
1784         {
1785                 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
1786         }
1787
1788         RValue<Bool> operator<=(RValue<Byte> lhs, RValue<Byte> rhs)
1789         {
1790                 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
1791         }
1792
1793         RValue<Bool> operator>(RValue<Byte> lhs, RValue<Byte> rhs)
1794         {
1795                 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
1796         }
1797
1798         RValue<Bool> operator>=(RValue<Byte> lhs, RValue<Byte> rhs)
1799         {
1800                 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
1801         }
1802
1803         RValue<Bool> operator!=(RValue<Byte> lhs, RValue<Byte> rhs)
1804         {
1805                 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
1806         }
1807
1808         RValue<Bool> operator==(RValue<Byte> lhs, RValue<Byte> rhs)
1809         {
1810                 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
1811         }
1812
1813         Type *Byte::getType()
1814         {
1815                 return T(Ice::IceType_i8);
1816         }
1817
1818         SByte::SByte(Argument<SByte> argument)
1819         {
1820                 storeValue(argument.value);
1821         }
1822
1823         SByte::SByte(RValue<Int> cast)
1824         {
1825                 Value *integer = Nucleus::createTrunc(cast.value, SByte::getType());
1826
1827                 storeValue(integer);
1828         }
1829
1830         SByte::SByte(RValue<Short> cast)
1831         {
1832                 Value *integer = Nucleus::createTrunc(cast.value, SByte::getType());
1833
1834                 storeValue(integer);
1835         }
1836
1837         SByte::SByte(signed char x)
1838         {
1839                 storeValue(Nucleus::createConstantByte(x));
1840         }
1841
1842         SByte::SByte(RValue<SByte> rhs)
1843         {
1844                 storeValue(rhs.value);
1845         }
1846
1847         SByte::SByte(const SByte &rhs)
1848         {
1849                 Value *value = rhs.loadValue();
1850                 storeValue(value);
1851         }
1852
1853         SByte::SByte(const Reference<SByte> &rhs)
1854         {
1855                 Value *value = rhs.loadValue();
1856                 storeValue(value);
1857         }
1858
1859         RValue<SByte> SByte::operator=(RValue<SByte> rhs)
1860         {
1861                 storeValue(rhs.value);
1862
1863                 return rhs;
1864         }
1865
1866         RValue<SByte> SByte::operator=(const SByte &rhs)
1867         {
1868                 Value *value = rhs.loadValue();
1869                 storeValue(value);
1870
1871                 return RValue<SByte>(value);
1872         }
1873
1874         RValue<SByte> SByte::operator=(const Reference<SByte> &rhs)
1875         {
1876                 Value *value = rhs.loadValue();
1877                 storeValue(value);
1878
1879                 return RValue<SByte>(value);
1880         }
1881
1882         RValue<SByte> operator+(RValue<SByte> lhs, RValue<SByte> rhs)
1883         {
1884                 return RValue<SByte>(Nucleus::createAdd(lhs.value, rhs.value));
1885         }
1886
1887         RValue<SByte> operator-(RValue<SByte> lhs, RValue<SByte> rhs)
1888         {
1889                 return RValue<SByte>(Nucleus::createSub(lhs.value, rhs.value));
1890         }
1891
1892         RValue<SByte> operator*(RValue<SByte> lhs, RValue<SByte> rhs)
1893         {
1894                 return RValue<SByte>(Nucleus::createMul(lhs.value, rhs.value));
1895         }
1896
1897         RValue<SByte> operator/(RValue<SByte> lhs, RValue<SByte> rhs)
1898         {
1899                 return RValue<SByte>(Nucleus::createSDiv(lhs.value, rhs.value));
1900         }
1901
1902         RValue<SByte> operator%(RValue<SByte> lhs, RValue<SByte> rhs)
1903         {
1904                 return RValue<SByte>(Nucleus::createSRem(lhs.value, rhs.value));
1905         }
1906
1907         RValue<SByte> operator&(RValue<SByte> lhs, RValue<SByte> rhs)
1908         {
1909                 return RValue<SByte>(Nucleus::createAnd(lhs.value, rhs.value));
1910         }
1911
1912         RValue<SByte> operator|(RValue<SByte> lhs, RValue<SByte> rhs)
1913         {
1914                 return RValue<SByte>(Nucleus::createOr(lhs.value, rhs.value));
1915         }
1916
1917         RValue<SByte> operator^(RValue<SByte> lhs, RValue<SByte> rhs)
1918         {
1919                 return RValue<SByte>(Nucleus::createXor(lhs.value, rhs.value));
1920         }
1921
1922         RValue<SByte> operator<<(RValue<SByte> lhs, RValue<SByte> rhs)
1923         {
1924                 return RValue<SByte>(Nucleus::createShl(lhs.value, rhs.value));
1925         }
1926
1927         RValue<SByte> operator>>(RValue<SByte> lhs, RValue<SByte> rhs)
1928         {
1929                 return RValue<SByte>(Nucleus::createAShr(lhs.value, rhs.value));
1930         }
1931
1932         RValue<SByte> operator+=(SByte &lhs, RValue<SByte> rhs)
1933         {
1934                 return lhs = lhs + rhs;
1935         }
1936
1937         RValue<SByte> operator-=(SByte &lhs, RValue<SByte> rhs)
1938         {
1939                 return lhs = lhs - rhs;
1940         }
1941
1942         RValue<SByte> operator*=(SByte &lhs, RValue<SByte> rhs)
1943         {
1944                 return lhs = lhs * rhs;
1945         }
1946
1947         RValue<SByte> operator/=(SByte &lhs, RValue<SByte> rhs)
1948         {
1949                 return lhs = lhs / rhs;
1950         }
1951
1952         RValue<SByte> operator%=(SByte &lhs, RValue<SByte> rhs)
1953         {
1954                 return lhs = lhs % rhs;
1955         }
1956
1957         RValue<SByte> operator&=(SByte &lhs, RValue<SByte> rhs)
1958         {
1959                 return lhs = lhs & rhs;
1960         }
1961
1962         RValue<SByte> operator|=(SByte &lhs, RValue<SByte> rhs)
1963         {
1964                 return lhs = lhs | rhs;
1965         }
1966
1967         RValue<SByte> operator^=(SByte &lhs, RValue<SByte> rhs)
1968         {
1969                 return lhs = lhs ^ rhs;
1970         }
1971
1972         RValue<SByte> operator<<=(SByte &lhs, RValue<SByte> rhs)
1973         {
1974                 return lhs = lhs << rhs;
1975         }
1976
1977         RValue<SByte> operator>>=(SByte &lhs, RValue<SByte> rhs)
1978         {
1979                 return lhs = lhs >> rhs;
1980         }
1981
1982         RValue<SByte> operator+(RValue<SByte> val)
1983         {
1984                 return val;
1985         }
1986
1987         RValue<SByte> operator-(RValue<SByte> val)
1988         {
1989                 return RValue<SByte>(Nucleus::createNeg(val.value));
1990         }
1991
1992         RValue<SByte> operator~(RValue<SByte> val)
1993         {
1994                 return RValue<SByte>(Nucleus::createNot(val.value));
1995         }
1996
1997         RValue<SByte> operator++(SByte &val, int)   // Post-increment
1998         {
1999                 RValue<SByte> res = val;
2000                 val += SByte(1);
2001                 return res;
2002         }
2003
2004         const SByte &operator++(SByte &val)   // Pre-increment
2005         {
2006                 val += SByte(1);
2007                 return val;
2008         }
2009
2010         RValue<SByte> operator--(SByte &val, int)   // Post-decrement
2011         {
2012                 RValue<SByte> res = val;
2013                 val -= SByte(1);
2014                 return res;
2015         }
2016
2017         const SByte &operator--(SByte &val)   // Pre-decrement
2018         {
2019                 val -= SByte(1);
2020                 return val;
2021         }
2022
2023         RValue<Bool> operator<(RValue<SByte> lhs, RValue<SByte> rhs)
2024         {
2025                 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
2026         }
2027
2028         RValue<Bool> operator<=(RValue<SByte> lhs, RValue<SByte> rhs)
2029         {
2030                 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
2031         }
2032
2033         RValue<Bool> operator>(RValue<SByte> lhs, RValue<SByte> rhs)
2034         {
2035                 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
2036         }
2037
2038         RValue<Bool> operator>=(RValue<SByte> lhs, RValue<SByte> rhs)
2039         {
2040                 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
2041         }
2042
2043         RValue<Bool> operator!=(RValue<SByte> lhs, RValue<SByte> rhs)
2044         {
2045                 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
2046         }
2047
2048         RValue<Bool> operator==(RValue<SByte> lhs, RValue<SByte> rhs)
2049         {
2050                 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
2051         }
2052
2053         Type *SByte::getType()
2054         {
2055                 return T(Ice::IceType_i8);
2056         }
2057
2058         Short::Short(Argument<Short> argument)
2059         {
2060                 storeValue(argument.value);
2061         }
2062
2063         Short::Short(RValue<Int> cast)
2064         {
2065                 Value *integer = Nucleus::createTrunc(cast.value, Short::getType());
2066
2067                 storeValue(integer);
2068         }
2069
2070         Short::Short(short x)
2071         {
2072                 storeValue(Nucleus::createConstantShort(x));
2073         }
2074
2075         Short::Short(RValue<Short> rhs)
2076         {
2077                 storeValue(rhs.value);
2078         }
2079
2080         Short::Short(const Short &rhs)
2081         {
2082                 Value *value = rhs.loadValue();
2083                 storeValue(value);
2084         }
2085
2086         Short::Short(const Reference<Short> &rhs)
2087         {
2088                 Value *value = rhs.loadValue();
2089                 storeValue(value);
2090         }
2091
2092         RValue<Short> Short::operator=(RValue<Short> rhs)
2093         {
2094                 storeValue(rhs.value);
2095
2096                 return rhs;
2097         }
2098
2099         RValue<Short> Short::operator=(const Short &rhs)
2100         {
2101                 Value *value = rhs.loadValue();
2102                 storeValue(value);
2103
2104                 return RValue<Short>(value);
2105         }
2106
2107         RValue<Short> Short::operator=(const Reference<Short> &rhs)
2108         {
2109                 Value *value = rhs.loadValue();
2110                 storeValue(value);
2111
2112                 return RValue<Short>(value);
2113         }
2114
2115         RValue<Short> operator+(RValue<Short> lhs, RValue<Short> rhs)
2116         {
2117                 return RValue<Short>(Nucleus::createAdd(lhs.value, rhs.value));
2118         }
2119
2120         RValue<Short> operator-(RValue<Short> lhs, RValue<Short> rhs)
2121         {
2122                 return RValue<Short>(Nucleus::createSub(lhs.value, rhs.value));
2123         }
2124
2125         RValue<Short> operator*(RValue<Short> lhs, RValue<Short> rhs)
2126         {
2127                 return RValue<Short>(Nucleus::createMul(lhs.value, rhs.value));
2128         }
2129
2130         RValue<Short> operator/(RValue<Short> lhs, RValue<Short> rhs)
2131         {
2132                 return RValue<Short>(Nucleus::createSDiv(lhs.value, rhs.value));
2133         }
2134
2135         RValue<Short> operator%(RValue<Short> lhs, RValue<Short> rhs)
2136         {
2137                 return RValue<Short>(Nucleus::createSRem(lhs.value, rhs.value));
2138         }
2139
2140         RValue<Short> operator&(RValue<Short> lhs, RValue<Short> rhs)
2141         {
2142                 return RValue<Short>(Nucleus::createAnd(lhs.value, rhs.value));
2143         }
2144
2145         RValue<Short> operator|(RValue<Short> lhs, RValue<Short> rhs)
2146         {
2147                 return RValue<Short>(Nucleus::createOr(lhs.value, rhs.value));
2148         }
2149
2150         RValue<Short> operator^(RValue<Short> lhs, RValue<Short> rhs)
2151         {
2152                 return RValue<Short>(Nucleus::createXor(lhs.value, rhs.value));
2153         }
2154
2155         RValue<Short> operator<<(RValue<Short> lhs, RValue<Short> rhs)
2156         {
2157                 return RValue<Short>(Nucleus::createShl(lhs.value, rhs.value));
2158         }
2159
2160         RValue<Short> operator>>(RValue<Short> lhs, RValue<Short> rhs)
2161         {
2162                 return RValue<Short>(Nucleus::createAShr(lhs.value, rhs.value));
2163         }
2164
2165         RValue<Short> operator+=(Short &lhs, RValue<Short> rhs)
2166         {
2167                 return lhs = lhs + rhs;
2168         }
2169
2170         RValue<Short> operator-=(Short &lhs, RValue<Short> rhs)
2171         {
2172                 return lhs = lhs - rhs;
2173         }
2174
2175         RValue<Short> operator*=(Short &lhs, RValue<Short> rhs)
2176         {
2177                 return lhs = lhs * rhs;
2178         }
2179
2180         RValue<Short> operator/=(Short &lhs, RValue<Short> rhs)
2181         {
2182                 return lhs = lhs / rhs;
2183         }
2184
2185         RValue<Short> operator%=(Short &lhs, RValue<Short> rhs)
2186         {
2187                 return lhs = lhs % rhs;
2188         }
2189
2190         RValue<Short> operator&=(Short &lhs, RValue<Short> rhs)
2191         {
2192                 return lhs = lhs & rhs;
2193         }
2194
2195         RValue<Short> operator|=(Short &lhs, RValue<Short> rhs)
2196         {
2197                 return lhs = lhs | rhs;
2198         }
2199
2200         RValue<Short> operator^=(Short &lhs, RValue<Short> rhs)
2201         {
2202                 return lhs = lhs ^ rhs;
2203         }
2204
2205         RValue<Short> operator<<=(Short &lhs, RValue<Short> rhs)
2206         {
2207                 return lhs = lhs << rhs;
2208         }
2209
2210         RValue<Short> operator>>=(Short &lhs, RValue<Short> rhs)
2211         {
2212                 return lhs = lhs >> rhs;
2213         }
2214
2215         RValue<Short> operator+(RValue<Short> val)
2216         {
2217                 return val;
2218         }
2219
2220         RValue<Short> operator-(RValue<Short> val)
2221         {
2222                 return RValue<Short>(Nucleus::createNeg(val.value));
2223         }
2224
2225         RValue<Short> operator~(RValue<Short> val)
2226         {
2227                 return RValue<Short>(Nucleus::createNot(val.value));
2228         }
2229
2230         RValue<Short> operator++(Short &val, int)   // Post-increment
2231         {
2232                 RValue<Short> res = val;
2233                 val += Short(1);
2234                 return res;
2235         }
2236
2237         const Short &operator++(Short &val)   // Pre-increment
2238         {
2239                 val += Short(1);
2240                 return val;
2241         }
2242
2243         RValue<Short> operator--(Short &val, int)   // Post-decrement
2244         {
2245                 RValue<Short> res = val;
2246                 val -= Short(1);
2247                 return res;
2248         }
2249
2250         const Short &operator--(Short &val)   // Pre-decrement
2251         {
2252                 val -= Short(1);
2253                 return val;
2254         }
2255
2256         RValue<Bool> operator<(RValue<Short> lhs, RValue<Short> rhs)
2257         {
2258                 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
2259         }
2260
2261         RValue<Bool> operator<=(RValue<Short> lhs, RValue<Short> rhs)
2262         {
2263                 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
2264         }
2265
2266         RValue<Bool> operator>(RValue<Short> lhs, RValue<Short> rhs)
2267         {
2268                 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
2269         }
2270
2271         RValue<Bool> operator>=(RValue<Short> lhs, RValue<Short> rhs)
2272         {
2273                 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
2274         }
2275
2276         RValue<Bool> operator!=(RValue<Short> lhs, RValue<Short> rhs)
2277         {
2278                 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
2279         }
2280
2281         RValue<Bool> operator==(RValue<Short> lhs, RValue<Short> rhs)
2282         {
2283                 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
2284         }
2285
2286         Type *Short::getType()
2287         {
2288                 return T(Ice::IceType_i16);
2289         }
2290
2291         UShort::UShort(Argument<UShort> argument)
2292         {
2293                 storeValue(argument.value);
2294         }
2295
2296         UShort::UShort(RValue<UInt> cast)
2297         {
2298                 Value *integer = Nucleus::createTrunc(cast.value, UShort::getType());
2299
2300                 storeValue(integer);
2301         }
2302
2303         UShort::UShort(RValue<Int> cast)
2304         {
2305                 Value *integer = Nucleus::createTrunc(cast.value, UShort::getType());
2306
2307                 storeValue(integer);
2308         }
2309
2310         UShort::UShort(unsigned short x)
2311         {
2312                 storeValue(Nucleus::createConstantShort(x));
2313         }
2314
2315         UShort::UShort(RValue<UShort> rhs)
2316         {
2317                 storeValue(rhs.value);
2318         }
2319
2320         UShort::UShort(const UShort &rhs)
2321         {
2322                 Value *value = rhs.loadValue();
2323                 storeValue(value);
2324         }
2325
2326         UShort::UShort(const Reference<UShort> &rhs)
2327         {
2328                 Value *value = rhs.loadValue();
2329                 storeValue(value);
2330         }
2331
2332         RValue<UShort> UShort::operator=(RValue<UShort> rhs)
2333         {
2334                 storeValue(rhs.value);
2335
2336                 return rhs;
2337         }
2338
2339         RValue<UShort> UShort::operator=(const UShort &rhs)
2340         {
2341                 Value *value = rhs.loadValue();
2342                 storeValue(value);
2343
2344                 return RValue<UShort>(value);
2345         }
2346
2347         RValue<UShort> UShort::operator=(const Reference<UShort> &rhs)
2348         {
2349                 Value *value = rhs.loadValue();
2350                 storeValue(value);
2351
2352                 return RValue<UShort>(value);
2353         }
2354
2355         RValue<UShort> operator+(RValue<UShort> lhs, RValue<UShort> rhs)
2356         {
2357                 return RValue<UShort>(Nucleus::createAdd(lhs.value, rhs.value));
2358         }
2359
2360         RValue<UShort> operator-(RValue<UShort> lhs, RValue<UShort> rhs)
2361         {
2362                 return RValue<UShort>(Nucleus::createSub(lhs.value, rhs.value));
2363         }
2364
2365         RValue<UShort> operator*(RValue<UShort> lhs, RValue<UShort> rhs)
2366         {
2367                 return RValue<UShort>(Nucleus::createMul(lhs.value, rhs.value));
2368         }
2369
2370         RValue<UShort> operator/(RValue<UShort> lhs, RValue<UShort> rhs)
2371         {
2372                 return RValue<UShort>(Nucleus::createUDiv(lhs.value, rhs.value));
2373         }
2374
2375         RValue<UShort> operator%(RValue<UShort> lhs, RValue<UShort> rhs)
2376         {
2377                 return RValue<UShort>(Nucleus::createURem(lhs.value, rhs.value));
2378         }
2379
2380         RValue<UShort> operator&(RValue<UShort> lhs, RValue<UShort> rhs)
2381         {
2382                 return RValue<UShort>(Nucleus::createAnd(lhs.value, rhs.value));
2383         }
2384
2385         RValue<UShort> operator|(RValue<UShort> lhs, RValue<UShort> rhs)
2386         {
2387                 return RValue<UShort>(Nucleus::createOr(lhs.value, rhs.value));
2388         }
2389
2390         RValue<UShort> operator^(RValue<UShort> lhs, RValue<UShort> rhs)
2391         {
2392                 return RValue<UShort>(Nucleus::createXor(lhs.value, rhs.value));
2393         }
2394
2395         RValue<UShort> operator<<(RValue<UShort> lhs, RValue<UShort> rhs)
2396         {
2397                 return RValue<UShort>(Nucleus::createShl(lhs.value, rhs.value));
2398         }
2399
2400         RValue<UShort> operator>>(RValue<UShort> lhs, RValue<UShort> rhs)
2401         {
2402                 return RValue<UShort>(Nucleus::createLShr(lhs.value, rhs.value));
2403         }
2404
2405         RValue<UShort> operator+=(UShort &lhs, RValue<UShort> rhs)
2406         {
2407                 return lhs = lhs + rhs;
2408         }
2409
2410         RValue<UShort> operator-=(UShort &lhs, RValue<UShort> rhs)
2411         {
2412                 return lhs = lhs - rhs;
2413         }
2414
2415         RValue<UShort> operator*=(UShort &lhs, RValue<UShort> rhs)
2416         {
2417                 return lhs = lhs * rhs;
2418         }
2419
2420         RValue<UShort> operator/=(UShort &lhs, RValue<UShort> rhs)
2421         {
2422                 return lhs = lhs / rhs;
2423         }
2424
2425         RValue<UShort> operator%=(UShort &lhs, RValue<UShort> rhs)
2426         {
2427                 return lhs = lhs % rhs;
2428         }
2429
2430         RValue<UShort> operator&=(UShort &lhs, RValue<UShort> rhs)
2431         {
2432                 return lhs = lhs & rhs;
2433         }
2434
2435         RValue<UShort> operator|=(UShort &lhs, RValue<UShort> rhs)
2436         {
2437                 return lhs = lhs | rhs;
2438         }
2439
2440         RValue<UShort> operator^=(UShort &lhs, RValue<UShort> rhs)
2441         {
2442                 return lhs = lhs ^ rhs;
2443         }
2444
2445         RValue<UShort> operator<<=(UShort &lhs, RValue<UShort> rhs)
2446         {
2447                 return lhs = lhs << rhs;
2448         }
2449
2450         RValue<UShort> operator>>=(UShort &lhs, RValue<UShort> rhs)
2451         {
2452                 return lhs = lhs >> rhs;
2453         }
2454
2455         RValue<UShort> operator+(RValue<UShort> val)
2456         {
2457                 return val;
2458         }
2459
2460         RValue<UShort> operator-(RValue<UShort> val)
2461         {
2462                 return RValue<UShort>(Nucleus::createNeg(val.value));
2463         }
2464
2465         RValue<UShort> operator~(RValue<UShort> val)
2466         {
2467                 return RValue<UShort>(Nucleus::createNot(val.value));
2468         }
2469
2470         RValue<UShort> operator++(UShort &val, int)   // Post-increment
2471         {
2472                 RValue<UShort> res = val;
2473                 val += UShort(1);
2474                 return res;
2475         }
2476
2477         const UShort &operator++(UShort &val)   // Pre-increment
2478         {
2479                 val += UShort(1);
2480                 return val;
2481         }
2482
2483         RValue<UShort> operator--(UShort &val, int)   // Post-decrement
2484         {
2485                 RValue<UShort> res = val;
2486                 val -= UShort(1);
2487                 return res;
2488         }
2489
2490         const UShort &operator--(UShort &val)   // Pre-decrement
2491         {
2492                 val -= UShort(1);
2493                 return val;
2494         }
2495
2496         RValue<Bool> operator<(RValue<UShort> lhs, RValue<UShort> rhs)
2497         {
2498                 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
2499         }
2500
2501         RValue<Bool> operator<=(RValue<UShort> lhs, RValue<UShort> rhs)
2502         {
2503                 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
2504         }
2505
2506         RValue<Bool> operator>(RValue<UShort> lhs, RValue<UShort> rhs)
2507         {
2508                 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
2509         }
2510
2511         RValue<Bool> operator>=(RValue<UShort> lhs, RValue<UShort> rhs)
2512         {
2513                 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
2514         }
2515
2516         RValue<Bool> operator!=(RValue<UShort> lhs, RValue<UShort> rhs)
2517         {
2518                 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
2519         }
2520
2521         RValue<Bool> operator==(RValue<UShort> lhs, RValue<UShort> rhs)
2522         {
2523                 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
2524         }
2525
2526         Type *UShort::getType()
2527         {
2528                 return T(Ice::IceType_i16);
2529         }
2530
2531         Byte4::Byte4(RValue<Byte8> cast)
2532         {
2533                 storeValue(Nucleus::createBitCast(cast.value, getType()));
2534         }
2535
2536         Byte4::Byte4(const Reference<Byte4> &rhs)
2537         {
2538                 Value *value = rhs.loadValue();
2539                 storeValue(value);
2540         }
2541
2542         Type *Byte4::getType()
2543         {
2544                 return T(Type_v4i8);
2545         }
2546
2547         Type *SByte4::getType()
2548         {
2549                 return T(Type_v4i8);
2550         }
2551
2552         Byte8::Byte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7)
2553         {
2554                 int64_t constantVector[8] = {x0, x1, x2, x3, x4, x5, x6, x7};
2555                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
2556         }
2557
2558         Byte8::Byte8(RValue<Byte8> rhs)
2559         {
2560                 storeValue(rhs.value);
2561         }
2562
2563         Byte8::Byte8(const Byte8 &rhs)
2564         {
2565                 Value *value = rhs.loadValue();
2566                 storeValue(value);
2567         }
2568
2569         Byte8::Byte8(const Reference<Byte8> &rhs)
2570         {
2571                 Value *value = rhs.loadValue();
2572                 storeValue(value);
2573         }
2574
2575         RValue<Byte8> Byte8::operator=(RValue<Byte8> rhs)
2576         {
2577                 storeValue(rhs.value);
2578
2579                 return rhs;
2580         }
2581
2582         RValue<Byte8> Byte8::operator=(const Byte8 &rhs)
2583         {
2584                 Value *value = rhs.loadValue();
2585                 storeValue(value);
2586
2587                 return RValue<Byte8>(value);
2588         }
2589
2590         RValue<Byte8> Byte8::operator=(const Reference<Byte8> &rhs)
2591         {
2592                 Value *value = rhs.loadValue();
2593                 storeValue(value);
2594
2595                 return RValue<Byte8>(value);
2596         }
2597
2598         RValue<Byte8> operator+(RValue<Byte8> lhs, RValue<Byte8> rhs)
2599         {
2600                 return RValue<Byte8>(Nucleus::createAdd(lhs.value, rhs.value));
2601         }
2602
2603         RValue<Byte8> operator-(RValue<Byte8> lhs, RValue<Byte8> rhs)
2604         {
2605                 return RValue<Byte8>(Nucleus::createSub(lhs.value, rhs.value));
2606         }
2607
2608 //      RValue<Byte8> operator*(RValue<Byte8> lhs, RValue<Byte8> rhs)
2609 //      {
2610 //              return RValue<Byte8>(Nucleus::createMul(lhs.value, rhs.value));
2611 //      }
2612
2613 //      RValue<Byte8> operator/(RValue<Byte8> lhs, RValue<Byte8> rhs)
2614 //      {
2615 //              return RValue<Byte8>(Nucleus::createUDiv(lhs.value, rhs.value));
2616 //      }
2617
2618 //      RValue<Byte8> operator%(RValue<Byte8> lhs, RValue<Byte8> rhs)
2619 //      {
2620 //              return RValue<Byte8>(Nucleus::createURem(lhs.value, rhs.value));
2621 //      }
2622
2623         RValue<Byte8> operator&(RValue<Byte8> lhs, RValue<Byte8> rhs)
2624         {
2625                 return RValue<Byte8>(Nucleus::createAnd(lhs.value, rhs.value));
2626         }
2627
2628         RValue<Byte8> operator|(RValue<Byte8> lhs, RValue<Byte8> rhs)
2629         {
2630                 return RValue<Byte8>(Nucleus::createOr(lhs.value, rhs.value));
2631         }
2632
2633         RValue<Byte8> operator^(RValue<Byte8> lhs, RValue<Byte8> rhs)
2634         {
2635                 return RValue<Byte8>(Nucleus::createXor(lhs.value, rhs.value));
2636         }
2637
2638 //      RValue<Byte8> operator<<(RValue<Byte8> lhs, unsigned char rhs)
2639 //      {
2640 //              return RValue<Byte8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2641 //      }
2642
2643 //      RValue<Byte8> operator>>(RValue<Byte8> lhs, unsigned char rhs)
2644 //      {
2645 //              return RValue<Byte8>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
2646 //      }
2647
2648         RValue<Byte8> operator+=(Byte8 &lhs, RValue<Byte8> rhs)
2649         {
2650                 return lhs = lhs + rhs;
2651         }
2652
2653         RValue<Byte8> operator-=(Byte8 &lhs, RValue<Byte8> rhs)
2654         {
2655                 return lhs = lhs - rhs;
2656         }
2657
2658 //      RValue<Byte8> operator*=(Byte8 &lhs, RValue<Byte8> rhs)
2659 //      {
2660 //              return lhs = lhs * rhs;
2661 //      }
2662
2663 //      RValue<Byte8> operator/=(Byte8 &lhs, RValue<Byte8> rhs)
2664 //      {
2665 //              return lhs = lhs / rhs;
2666 //      }
2667
2668 //      RValue<Byte8> operator%=(Byte8 &lhs, RValue<Byte8> rhs)
2669 //      {
2670 //              return lhs = lhs % rhs;
2671 //      }
2672
2673         RValue<Byte8> operator&=(Byte8 &lhs, RValue<Byte8> rhs)
2674         {
2675                 return lhs = lhs & rhs;
2676         }
2677
2678         RValue<Byte8> operator|=(Byte8 &lhs, RValue<Byte8> rhs)
2679         {
2680                 return lhs = lhs | rhs;
2681         }
2682
2683         RValue<Byte8> operator^=(Byte8 &lhs, RValue<Byte8> rhs)
2684         {
2685                 return lhs = lhs ^ rhs;
2686         }
2687
2688 //      RValue<Byte8> operator<<=(Byte8 &lhs, RValue<Byte8> rhs)
2689 //      {
2690 //              return lhs = lhs << rhs;
2691 //      }
2692
2693 //      RValue<Byte8> operator>>=(Byte8 &lhs, RValue<Byte8> rhs)
2694 //      {
2695 //              return lhs = lhs >> rhs;
2696 //      }
2697
2698 //      RValue<Byte8> operator+(RValue<Byte8> val)
2699 //      {
2700 //              return val;
2701 //      }
2702
2703 //      RValue<Byte8> operator-(RValue<Byte8> val)
2704 //      {
2705 //              return RValue<Byte8>(Nucleus::createNeg(val.value));
2706 //      }
2707
2708         RValue<Byte8> operator~(RValue<Byte8> val)
2709         {
2710                 return RValue<Byte8>(Nucleus::createNot(val.value));
2711         }
2712
2713         RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
2714         {
2715                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
2716                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2717                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2718                 auto paddusb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2719                 paddusb->addArg(x.value);
2720                 paddusb->addArg(y.value);
2721                 ::basicBlock->appendInst(paddusb);
2722
2723                 return RValue<Byte8>(V(result));
2724         }
2725
2726         RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
2727         {
2728                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
2729                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2730                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2731                 auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2732                 psubusw->addArg(x.value);
2733                 psubusw->addArg(y.value);
2734                 ::basicBlock->appendInst(psubusw);
2735
2736                 return RValue<Byte8>(V(result));
2737         }
2738
2739         RValue<Short4> Unpack(RValue<Byte4> x)
2740         {
2741                 int shuffle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7};   // Real type is v16i8
2742                 return As<Short4>(Nucleus::createShuffleVector(x.value, x.value, shuffle));
2743         }
2744
2745         RValue<Short4> Unpack(RValue<Byte4> x, RValue<Byte4> y)
2746         {
2747                 return UnpackLow(As<Byte8>(x), As<Byte8>(y));
2748         }
2749
2750         RValue<Short4> UnpackLow(RValue<Byte8> x, RValue<Byte8> y)
2751         {
2752                 int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};   // Real type is v16i8
2753                 return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
2754         }
2755
2756         RValue<Short4> UnpackHigh(RValue<Byte8> x, RValue<Byte8> y)
2757         {
2758                 int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};   // Real type is v16i8
2759                 auto lowHigh = RValue<Byte16>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
2760                 return As<Short4>(Swizzle(As<Int4>(lowHigh), 0xEE));
2761         }
2762
2763         RValue<Int> SignMask(RValue<Byte8> x)
2764         {
2765                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
2766                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2767                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2768                 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
2769                 movmsk->addArg(x.value);
2770                 ::basicBlock->appendInst(movmsk);
2771
2772                 return RValue<Int>(V(result));
2773         }
2774
2775 //      RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
2776 //      {
2777 //              return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Ugt, x.value, y.value));
2778 //      }
2779
2780         RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
2781         {
2782                 return RValue<Byte8>(Nucleus::createICmpEQ(x.value, y.value));
2783         }
2784
2785         Type *Byte8::getType()
2786         {
2787                 return T(Type_v8i8);
2788         }
2789
2790         SByte8::SByte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7)
2791         {
2792                 int64_t constantVector[8] = { x0, x1, x2, x3, x4, x5, x6, x7 };
2793                 Value *vector = V(Nucleus::createConstantVector(constantVector, getType()));
2794
2795                 storeValue(Nucleus::createBitCast(vector, getType()));
2796         }
2797
2798         SByte8::SByte8(RValue<SByte8> rhs)
2799         {
2800                 storeValue(rhs.value);
2801         }
2802
2803         SByte8::SByte8(const SByte8 &rhs)
2804         {
2805                 Value *value = rhs.loadValue();
2806                 storeValue(value);
2807         }
2808
2809         SByte8::SByte8(const Reference<SByte8> &rhs)
2810         {
2811                 Value *value = rhs.loadValue();
2812                 storeValue(value);
2813         }
2814
2815         RValue<SByte8> SByte8::operator=(RValue<SByte8> rhs)
2816         {
2817                 storeValue(rhs.value);
2818
2819                 return rhs;
2820         }
2821
2822         RValue<SByte8> SByte8::operator=(const SByte8 &rhs)
2823         {
2824                 Value *value = rhs.loadValue();
2825                 storeValue(value);
2826
2827                 return RValue<SByte8>(value);
2828         }
2829
2830         RValue<SByte8> SByte8::operator=(const Reference<SByte8> &rhs)
2831         {
2832                 Value *value = rhs.loadValue();
2833                 storeValue(value);
2834
2835                 return RValue<SByte8>(value);
2836         }
2837
2838         RValue<SByte8> operator+(RValue<SByte8> lhs, RValue<SByte8> rhs)
2839         {
2840                 return RValue<SByte8>(Nucleus::createAdd(lhs.value, rhs.value));
2841         }
2842
2843         RValue<SByte8> operator-(RValue<SByte8> lhs, RValue<SByte8> rhs)
2844         {
2845                 return RValue<SByte8>(Nucleus::createSub(lhs.value, rhs.value));
2846         }
2847
2848 //      RValue<SByte8> operator*(RValue<SByte8> lhs, RValue<SByte8> rhs)
2849 //      {
2850 //              return RValue<SByte8>(Nucleus::createMul(lhs.value, rhs.value));
2851 //      }
2852
2853 //      RValue<SByte8> operator/(RValue<SByte8> lhs, RValue<SByte8> rhs)
2854 //      {
2855 //              return RValue<SByte8>(Nucleus::createSDiv(lhs.value, rhs.value));
2856 //      }
2857
2858 //      RValue<SByte8> operator%(RValue<SByte8> lhs, RValue<SByte8> rhs)
2859 //      {
2860 //              return RValue<SByte8>(Nucleus::createSRem(lhs.value, rhs.value));
2861 //      }
2862
2863         RValue<SByte8> operator&(RValue<SByte8> lhs, RValue<SByte8> rhs)
2864         {
2865                 return RValue<SByte8>(Nucleus::createAnd(lhs.value, rhs.value));
2866         }
2867
2868         RValue<SByte8> operator|(RValue<SByte8> lhs, RValue<SByte8> rhs)
2869         {
2870                 return RValue<SByte8>(Nucleus::createOr(lhs.value, rhs.value));
2871         }
2872
2873         RValue<SByte8> operator^(RValue<SByte8> lhs, RValue<SByte8> rhs)
2874         {
2875                 return RValue<SByte8>(Nucleus::createXor(lhs.value, rhs.value));
2876         }
2877
2878 //      RValue<SByte8> operator<<(RValue<SByte8> lhs, unsigned char rhs)
2879 //      {
2880 //              return RValue<SByte8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2881 //      }
2882
2883 //      RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2884 //      {
2885 //              return RValue<SByte8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
2886 //      }
2887
2888         RValue<SByte8> operator+=(SByte8 &lhs, RValue<SByte8> rhs)
2889         {
2890                 return lhs = lhs + rhs;
2891         }
2892
2893         RValue<SByte8> operator-=(SByte8 &lhs, RValue<SByte8> rhs)
2894         {
2895                 return lhs = lhs - rhs;
2896         }
2897
2898 //      RValue<SByte8> operator*=(SByte8 &lhs, RValue<SByte8> rhs)
2899 //      {
2900 //              return lhs = lhs * rhs;
2901 //      }
2902
2903 //      RValue<SByte8> operator/=(SByte8 &lhs, RValue<SByte8> rhs)
2904 //      {
2905 //              return lhs = lhs / rhs;
2906 //      }
2907
2908 //      RValue<SByte8> operator%=(SByte8 &lhs, RValue<SByte8> rhs)
2909 //      {
2910 //              return lhs = lhs % rhs;
2911 //      }
2912
2913         RValue<SByte8> operator&=(SByte8 &lhs, RValue<SByte8> rhs)
2914         {
2915                 return lhs = lhs & rhs;
2916         }
2917
2918         RValue<SByte8> operator|=(SByte8 &lhs, RValue<SByte8> rhs)
2919         {
2920                 return lhs = lhs | rhs;
2921         }
2922
2923         RValue<SByte8> operator^=(SByte8 &lhs, RValue<SByte8> rhs)
2924         {
2925                 return lhs = lhs ^ rhs;
2926         }
2927
2928 //      RValue<SByte8> operator<<=(SByte8 &lhs, RValue<SByte8> rhs)
2929 //      {
2930 //              return lhs = lhs << rhs;
2931 //      }
2932
2933 //      RValue<SByte8> operator>>=(SByte8 &lhs, RValue<SByte8> rhs)
2934 //      {
2935 //              return lhs = lhs >> rhs;
2936 //      }
2937
2938 //      RValue<SByte8> operator+(RValue<SByte8> val)
2939 //      {
2940 //              return val;
2941 //      }
2942
2943 //      RValue<SByte8> operator-(RValue<SByte8> val)
2944 //      {
2945 //              return RValue<SByte8>(Nucleus::createNeg(val.value));
2946 //      }
2947
2948         RValue<SByte8> operator~(RValue<SByte8> val)
2949         {
2950                 return RValue<SByte8>(Nucleus::createNot(val.value));
2951         }
2952
2953         RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
2954         {
2955                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
2956                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2957                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2958                 auto paddsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2959                 paddsb->addArg(x.value);
2960                 paddsb->addArg(y.value);
2961                 ::basicBlock->appendInst(paddsb);
2962
2963                 return RValue<SByte8>(V(result));
2964         }
2965
2966         RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
2967         {
2968                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
2969                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2970                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2971                 auto psubsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2972                 psubsb->addArg(x.value);
2973                 psubsb->addArg(y.value);
2974                 ::basicBlock->appendInst(psubsb);
2975
2976                 return RValue<SByte8>(V(result));
2977         }
2978
2979         RValue<Short4> UnpackLow(RValue<SByte8> x, RValue<SByte8> y)
2980         {
2981                 int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};   // Real type is v16i8
2982                 return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
2983         }
2984
2985         RValue<Short4> UnpackHigh(RValue<SByte8> x, RValue<SByte8> y)
2986         {
2987                 int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};   // Real type is v16i8
2988                 auto lowHigh = RValue<Byte16>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
2989                 return As<Short4>(Swizzle(As<Int4>(lowHigh), 0xEE));
2990         }
2991
2992         RValue<Int> SignMask(RValue<SByte8> x)
2993         {
2994                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
2995                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2996                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2997                 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
2998                 movmsk->addArg(x.value);
2999                 ::basicBlock->appendInst(movmsk);
3000
3001                 return RValue<Int>(V(result));
3002         }
3003
3004         RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
3005         {
3006                 return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Sgt, x.value, y.value));
3007         }
3008
3009         RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
3010         {
3011                 return RValue<Byte8>(Nucleus::createICmpEQ(x.value, y.value));
3012         }
3013
3014         Type *SByte8::getType()
3015         {
3016                 return T(Type_v8i8);
3017         }
3018
3019         Byte16::Byte16(RValue<Byte16> rhs)
3020         {
3021                 storeValue(rhs.value);
3022         }
3023
3024         Byte16::Byte16(const Byte16 &rhs)
3025         {
3026                 Value *value = rhs.loadValue();
3027                 storeValue(value);
3028         }
3029
3030         Byte16::Byte16(const Reference<Byte16> &rhs)
3031         {
3032                 Value *value = rhs.loadValue();
3033                 storeValue(value);
3034         }
3035
3036         RValue<Byte16> Byte16::operator=(RValue<Byte16> rhs)
3037         {
3038                 storeValue(rhs.value);
3039
3040                 return rhs;
3041         }
3042
3043         RValue<Byte16> Byte16::operator=(const Byte16 &rhs)
3044         {
3045                 Value *value = rhs.loadValue();
3046                 storeValue(value);
3047
3048                 return RValue<Byte16>(value);
3049         }
3050
3051         RValue<Byte16> Byte16::operator=(const Reference<Byte16> &rhs)
3052         {
3053                 Value *value = rhs.loadValue();
3054                 storeValue(value);
3055
3056                 return RValue<Byte16>(value);
3057         }
3058
3059         Type *Byte16::getType()
3060         {
3061                 return T(Ice::IceType_v16i8);
3062         }
3063
3064         Type *SByte16::getType()
3065         {
3066                 return T(Ice::IceType_v16i8);
3067         }
3068
3069         Short2::Short2(RValue<Short4> cast)
3070         {
3071                 storeValue(Nucleus::createBitCast(cast.value, getType()));
3072         }
3073
3074         Type *Short2::getType()
3075         {
3076                 return T(Type_v2i16);
3077         }
3078
3079         UShort2::UShort2(RValue<UShort4> cast)
3080         {
3081                 storeValue(Nucleus::createBitCast(cast.value, getType()));
3082         }
3083
3084         Type *UShort2::getType()
3085         {
3086                 return T(Type_v2i16);
3087         }
3088
3089         Short4::Short4(RValue<Int> cast)
3090         {
3091                 Value *vector = loadValue();
3092                 Value *element = Nucleus::createTrunc(cast.value, Short::getType());
3093                 Value *insert = Nucleus::createInsertElement(vector, element, 0);
3094                 Value *swizzle = Swizzle(RValue<Short4>(insert), 0x00).value;
3095
3096                 storeValue(swizzle);
3097         }
3098
3099         Short4::Short4(RValue<Int4> cast)
3100         {
3101                 int select[8] = {0, 2, 4, 6, 0, 2, 4, 6};
3102                 Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType());
3103                 Value *packed = Nucleus::createShuffleVector(short8, short8, select);
3104
3105                 Value *int2 = RValue<Int2>(Int2(As<Int4>(packed))).value;
3106                 Value *short4 = Nucleus::createBitCast(int2, Short4::getType());
3107
3108                 storeValue(short4);
3109         }
3110
3111 //      Short4::Short4(RValue<Float> cast)
3112 //      {
3113 //      }
3114
3115         Short4::Short4(RValue<Float4> cast)
3116         {
3117                 assert(false && "UNIMPLEMENTED");
3118         }
3119
3120         Short4::Short4(short xyzw)
3121         {
3122                 int64_t constantVector[4] = {xyzw, xyzw, xyzw, xyzw};
3123                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3124         }
3125
3126         Short4::Short4(short x, short y, short z, short w)
3127         {
3128                 int64_t constantVector[4] = {x, y, z, w};
3129                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3130         }
3131
3132         Short4::Short4(RValue<Short4> rhs)
3133         {
3134                 storeValue(rhs.value);
3135         }
3136
3137         Short4::Short4(const Short4 &rhs)
3138         {
3139                 Value *value = rhs.loadValue();
3140                 storeValue(value);
3141         }
3142
3143         Short4::Short4(const Reference<Short4> &rhs)
3144         {
3145                 Value *value = rhs.loadValue();
3146                 storeValue(value);
3147         }
3148
3149         Short4::Short4(RValue<UShort4> rhs)
3150         {
3151                 storeValue(rhs.value);
3152         }
3153
3154         Short4::Short4(const UShort4 &rhs)
3155         {
3156                 storeValue(rhs.loadValue());
3157         }
3158
3159         Short4::Short4(const Reference<UShort4> &rhs)
3160         {
3161                 storeValue(rhs.loadValue());
3162         }
3163
3164         RValue<Short4> Short4::operator=(RValue<Short4> rhs)
3165         {
3166                 storeValue(rhs.value);
3167
3168                 return rhs;
3169         }
3170
3171         RValue<Short4> Short4::operator=(const Short4 &rhs)
3172         {
3173                 Value *value = rhs.loadValue();
3174                 storeValue(value);
3175
3176                 return RValue<Short4>(value);
3177         }
3178
3179         RValue<Short4> Short4::operator=(const Reference<Short4> &rhs)
3180         {
3181                 Value *value = rhs.loadValue();
3182                 storeValue(value);
3183
3184                 return RValue<Short4>(value);
3185         }
3186
3187         RValue<Short4> Short4::operator=(RValue<UShort4> rhs)
3188         {
3189                 storeValue(rhs.value);
3190
3191                 return RValue<Short4>(rhs);
3192         }
3193
3194         RValue<Short4> Short4::operator=(const UShort4 &rhs)
3195         {
3196                 Value *value = rhs.loadValue();
3197                 storeValue(value);
3198
3199                 return RValue<Short4>(value);
3200         }
3201
3202         RValue<Short4> Short4::operator=(const Reference<UShort4> &rhs)
3203         {
3204                 Value *value = rhs.loadValue();
3205                 storeValue(value);
3206
3207                 return RValue<Short4>(value);
3208         }
3209
3210         RValue<Short4> operator+(RValue<Short4> lhs, RValue<Short4> rhs)
3211         {
3212                 return RValue<Short4>(Nucleus::createAdd(lhs.value, rhs.value));
3213         }
3214
3215         RValue<Short4> operator-(RValue<Short4> lhs, RValue<Short4> rhs)
3216         {
3217                 return RValue<Short4>(Nucleus::createSub(lhs.value, rhs.value));
3218         }
3219
3220         RValue<Short4> operator*(RValue<Short4> lhs, RValue<Short4> rhs)
3221         {
3222                 return RValue<Short4>(Nucleus::createMul(lhs.value, rhs.value));
3223         }
3224
3225 //      RValue<Short4> operator/(RValue<Short4> lhs, RValue<Short4> rhs)
3226 //      {
3227 //              return RValue<Short4>(Nucleus::createSDiv(lhs.value, rhs.value));
3228 //      }
3229
3230 //      RValue<Short4> operator%(RValue<Short4> lhs, RValue<Short4> rhs)
3231 //      {
3232 //              return RValue<Short4>(Nucleus::createSRem(lhs.value, rhs.value));
3233 //      }
3234
3235         RValue<Short4> operator&(RValue<Short4> lhs, RValue<Short4> rhs)
3236         {
3237                 return RValue<Short4>(Nucleus::createAnd(lhs.value, rhs.value));
3238         }
3239
3240         RValue<Short4> operator|(RValue<Short4> lhs, RValue<Short4> rhs)
3241         {
3242                 return RValue<Short4>(Nucleus::createOr(lhs.value, rhs.value));
3243         }
3244
3245         RValue<Short4> operator^(RValue<Short4> lhs, RValue<Short4> rhs)
3246         {
3247                 return RValue<Short4>(Nucleus::createXor(lhs.value, rhs.value));
3248         }
3249
3250         RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
3251         {
3252                 if(emulateIntrinsics)
3253                 {
3254                         Short4 result;
3255                         result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
3256                         result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
3257                         result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
3258                         result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
3259
3260                         return result;
3261                 }
3262                 else
3263                 {
3264                         return RValue<Short4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
3265                 }
3266         }
3267
3268         RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
3269         {
3270                 if(emulateIntrinsics)
3271                 {
3272                         Short4 result;
3273                         result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
3274                         result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
3275                         result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
3276                         result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
3277
3278                         return result;
3279                 }
3280                 else
3281                 {
3282                         return RValue<Short4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
3283                 }
3284         }
3285
3286         RValue<Short4> operator+=(Short4 &lhs, RValue<Short4> rhs)
3287         {
3288                 return lhs = lhs + rhs;
3289         }
3290
3291         RValue<Short4> operator-=(Short4 &lhs, RValue<Short4> rhs)
3292         {
3293                 return lhs = lhs - rhs;
3294         }
3295
3296         RValue<Short4> operator*=(Short4 &lhs, RValue<Short4> rhs)
3297         {
3298                 return lhs = lhs * rhs;
3299         }
3300
3301 //      RValue<Short4> operator/=(Short4 &lhs, RValue<Short4> rhs)
3302 //      {
3303 //              return lhs = lhs / rhs;
3304 //      }
3305
3306 //      RValue<Short4> operator%=(Short4 &lhs, RValue<Short4> rhs)
3307 //      {
3308 //              return lhs = lhs % rhs;
3309 //      }
3310
3311         RValue<Short4> operator&=(Short4 &lhs, RValue<Short4> rhs)
3312         {
3313                 return lhs = lhs & rhs;
3314         }
3315
3316         RValue<Short4> operator|=(Short4 &lhs, RValue<Short4> rhs)
3317         {
3318                 return lhs = lhs | rhs;
3319         }
3320
3321         RValue<Short4> operator^=(Short4 &lhs, RValue<Short4> rhs)
3322         {
3323                 return lhs = lhs ^ rhs;
3324         }
3325
3326         RValue<Short4> operator<<=(Short4 &lhs, unsigned char rhs)
3327         {
3328                 return lhs = lhs << rhs;
3329         }
3330
3331         RValue<Short4> operator>>=(Short4 &lhs, unsigned char rhs)
3332         {
3333                 return lhs = lhs >> rhs;
3334         }
3335
3336 //      RValue<Short4> operator+(RValue<Short4> val)
3337 //      {
3338 //              return val;
3339 //      }
3340
3341         RValue<Short4> operator-(RValue<Short4> val)
3342         {
3343                 return RValue<Short4>(Nucleus::createNeg(val.value));
3344         }
3345
3346         RValue<Short4> operator~(RValue<Short4> val)
3347         {
3348                 return RValue<Short4>(Nucleus::createNot(val.value));
3349         }
3350
3351         RValue<Short4> RoundShort4(RValue<Float4> cast)
3352         {
3353                 RValue<Int4> int4 = RoundInt(cast);
3354                 return As<Short4>(Pack(int4, int4));
3355         }
3356
3357         RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
3358         {
3359                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
3360                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value, y.value);
3361                 ::basicBlock->appendInst(cmp);
3362
3363                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3364                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3365                 ::basicBlock->appendInst(select);
3366
3367                 return RValue<Short4>(V(result));
3368         }
3369
3370         RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
3371         {
3372                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
3373                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value, y.value);
3374                 ::basicBlock->appendInst(cmp);
3375
3376                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3377                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3378                 ::basicBlock->appendInst(select);
3379
3380                 return RValue<Short4>(V(result));
3381         }
3382
3383         RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
3384         {
3385                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3386                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3387                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3388                 auto paddsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3389                 paddsw->addArg(x.value);
3390                 paddsw->addArg(y.value);
3391                 ::basicBlock->appendInst(paddsw);
3392
3393                 return RValue<Short4>(V(result));
3394         }
3395
3396         RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
3397         {
3398                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3399                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3400                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3401                 auto psubsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3402                 psubsw->addArg(x.value);
3403                 psubsw->addArg(y.value);
3404                 ::basicBlock->appendInst(psubsw);
3405
3406                 return RValue<Short4>(V(result));
3407         }
3408
3409         RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
3410         {
3411                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3412                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::MultiplyHighSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3413                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3414                 auto pmulhw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3415                 pmulhw->addArg(x.value);
3416                 pmulhw->addArg(y.value);
3417                 ::basicBlock->appendInst(pmulhw);
3418
3419                 return RValue<Short4>(V(result));
3420         }
3421
3422         RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
3423         {
3424                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3425                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::MultiplyAddPairs, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3426                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3427                 auto pmaddwd = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3428                 pmaddwd->addArg(x.value);
3429                 pmaddwd->addArg(y.value);
3430                 ::basicBlock->appendInst(pmaddwd);
3431
3432                 return As<Int2>(V(result));
3433         }
3434
3435         RValue<SByte8> Pack(RValue<Short4> x, RValue<Short4> y)
3436         {
3437                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
3438                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3439                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3440                 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3441                 pack->addArg(x.value);
3442                 pack->addArg(y.value);
3443                 ::basicBlock->appendInst(pack);
3444
3445                 return As<SByte8>(Swizzle(As<Int4>(V(result)), 0x88));
3446         }
3447
3448         RValue<Int2> UnpackLow(RValue<Short4> x, RValue<Short4> y)
3449         {
3450                 int shuffle[8] = {0, 8, 1, 9, 2, 10, 3, 11};   // Real type is v8i16
3451                 return As<Int2>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
3452         }
3453
3454         RValue<Int2> UnpackHigh(RValue<Short4> x, RValue<Short4> y)
3455         {
3456                 int shuffle[8] = {0, 8, 1, 9, 2, 10, 3, 11};   // Real type is v8i16
3457                 auto lowHigh = RValue<Short8>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
3458                 return As<Int2>(Swizzle(As<Int4>(lowHigh), 0xEE));
3459         }
3460
3461         RValue<Short4> Swizzle(RValue<Short4> x, unsigned char select)
3462         {
3463                 // Real type is v8i16
3464                 int shuffle[8] =
3465                 {
3466                         (select >> 0) & 0x03,
3467                         (select >> 2) & 0x03,
3468                         (select >> 4) & 0x03,
3469                         (select >> 6) & 0x03,
3470                         (select >> 0) & 0x03,
3471                         (select >> 2) & 0x03,
3472                         (select >> 4) & 0x03,
3473                         (select >> 6) & 0x03,
3474                 };
3475
3476                 return RValue<Short4>(Nucleus::createShuffleVector(x.value, x.value, shuffle));
3477         }
3478
3479         RValue<Short4> Insert(RValue<Short4> val, RValue<Short> element, int i)
3480         {
3481                 return RValue<Short4>(Nucleus::createInsertElement(val.value, element.value, i));
3482         }
3483
3484         RValue<Short> Extract(RValue<Short4> val, int i)
3485         {
3486                 return RValue<Short>(Nucleus::createExtractElement(val.value, Short::getType(), i));
3487         }
3488
3489         RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
3490         {
3491                 return RValue<Short4>(createIntCompare(Ice::InstIcmp::Sgt, x.value, y.value));
3492         }
3493
3494         RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
3495         {
3496                 return RValue<Short4>(Nucleus::createICmpEQ(x.value, y.value));
3497         }
3498
3499         Type *Short4::getType()
3500         {
3501                 return T(Type_v4i16);
3502         }
3503
3504         UShort4::UShort4(RValue<Int4> cast)
3505         {
3506                 *this = Short4(cast);
3507         }
3508
3509         UShort4::UShort4(RValue<Float4> cast, bool saturate)
3510         {
3511                 if(saturate)
3512                 {
3513                         if(CPUID::SSE4_1)
3514                         {
3515                                 Int4 int4(Min(cast, Float4(0xFFFF)));   // packusdw takes care of 0x0000 saturation
3516                                 *this = As<Short4>(Pack(As<UInt4>(int4), As<UInt4>(int4)));
3517                         }
3518                         else
3519                         {
3520                                 *this = Short4(Int4(Max(Min(cast, Float4(0xFFFF)), Float4(0x0000))));
3521                         }
3522                 }
3523                 else
3524                 {
3525                         *this = Short4(Int4(cast));
3526                 }
3527         }
3528
3529         UShort4::UShort4(unsigned short xyzw)
3530         {
3531                 int64_t constantVector[4] = {xyzw, xyzw, xyzw, xyzw};
3532                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3533         }
3534
3535         UShort4::UShort4(unsigned short x, unsigned short y, unsigned short z, unsigned short w)
3536         {
3537                 int64_t constantVector[4] = {x, y, z, w};
3538                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3539         }
3540
3541         UShort4::UShort4(RValue<UShort4> rhs)
3542         {
3543                 storeValue(rhs.value);
3544         }
3545
3546         UShort4::UShort4(const UShort4 &rhs)
3547         {
3548                 Value *value = rhs.loadValue();
3549                 storeValue(value);
3550         }
3551
3552         UShort4::UShort4(const Reference<UShort4> &rhs)
3553         {
3554                 Value *value = rhs.loadValue();
3555                 storeValue(value);
3556         }
3557
3558         UShort4::UShort4(RValue<Short4> rhs)
3559         {
3560                 storeValue(rhs.value);
3561         }
3562
3563         UShort4::UShort4(const Short4 &rhs)
3564         {
3565                 Value *value = rhs.loadValue();
3566                 storeValue(value);
3567         }
3568
3569         UShort4::UShort4(const Reference<Short4> &rhs)
3570         {
3571                 Value *value = rhs.loadValue();
3572                 storeValue(value);
3573         }
3574
3575         RValue<UShort4> UShort4::operator=(RValue<UShort4> rhs)
3576         {
3577                 storeValue(rhs.value);
3578
3579                 return rhs;
3580         }
3581
3582         RValue<UShort4> UShort4::operator=(const UShort4 &rhs)
3583         {
3584                 Value *value = rhs.loadValue();
3585                 storeValue(value);
3586
3587                 return RValue<UShort4>(value);
3588         }
3589
3590         RValue<UShort4> UShort4::operator=(const Reference<UShort4> &rhs)
3591         {
3592                 Value *value = rhs.loadValue();
3593                 storeValue(value);
3594
3595                 return RValue<UShort4>(value);
3596         }
3597
3598         RValue<UShort4> UShort4::operator=(RValue<Short4> rhs)
3599         {
3600                 storeValue(rhs.value);
3601
3602                 return RValue<UShort4>(rhs);
3603         }
3604
3605         RValue<UShort4> UShort4::operator=(const Short4 &rhs)
3606         {
3607                 Value *value = rhs.loadValue();
3608                 storeValue(value);
3609
3610                 return RValue<UShort4>(value);
3611         }
3612
3613         RValue<UShort4> UShort4::operator=(const Reference<Short4> &rhs)
3614         {
3615                 Value *value = rhs.loadValue();
3616                 storeValue(value);
3617
3618                 return RValue<UShort4>(value);
3619         }
3620
3621         RValue<UShort4> operator+(RValue<UShort4> lhs, RValue<UShort4> rhs)
3622         {
3623                 return RValue<UShort4>(Nucleus::createAdd(lhs.value, rhs.value));
3624         }
3625
3626         RValue<UShort4> operator-(RValue<UShort4> lhs, RValue<UShort4> rhs)
3627         {
3628                 return RValue<UShort4>(Nucleus::createSub(lhs.value, rhs.value));
3629         }
3630
3631         RValue<UShort4> operator*(RValue<UShort4> lhs, RValue<UShort4> rhs)
3632         {
3633                 return RValue<UShort4>(Nucleus::createMul(lhs.value, rhs.value));
3634         }
3635
3636         RValue<UShort4> operator&(RValue<UShort4> lhs, RValue<UShort4> rhs)
3637         {
3638                 return RValue<UShort4>(Nucleus::createAnd(lhs.value, rhs.value));
3639         }
3640
3641         RValue<UShort4> operator|(RValue<UShort4> lhs, RValue<UShort4> rhs)
3642         {
3643                 return RValue<UShort4>(Nucleus::createOr(lhs.value, rhs.value));
3644         }
3645
3646         RValue<UShort4> operator^(RValue<UShort4> lhs, RValue<UShort4> rhs)
3647         {
3648                 return RValue<UShort4>(Nucleus::createXor(lhs.value, rhs.value));
3649         }
3650
3651         RValue<UShort> Extract(RValue<UShort4> val, int i)
3652         {
3653                 return RValue<UShort>(Nucleus::createExtractElement(val.value, UShort::getType(), i));
3654         }
3655
3656         RValue<UShort4> Insert(RValue<UShort4> val, RValue<UShort> element, int i)
3657         {
3658                 return RValue<UShort4>(Nucleus::createInsertElement(val.value, element.value, i));
3659         }
3660
3661         RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
3662         {
3663                 if(emulateIntrinsics)
3664                 {
3665                         UShort4 result;
3666                         result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
3667                         result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
3668                         result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
3669                         result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
3670
3671                         return result;
3672                 }
3673                 else
3674                 {
3675                         return RValue<UShort4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
3676                 }
3677         }
3678
3679         RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
3680         {
3681                 if(emulateIntrinsics)
3682                 {
3683                         UShort4 result;
3684                         result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
3685                         result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
3686                         result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
3687                         result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
3688
3689                         return result;
3690                 }
3691                 else
3692                 {
3693                         return RValue<UShort4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
3694                 }
3695         }
3696
3697         RValue<UShort4> operator<<=(UShort4 &lhs, unsigned char rhs)
3698         {
3699                 return lhs = lhs << rhs;
3700         }
3701
3702         RValue<UShort4> operator>>=(UShort4 &lhs, unsigned char rhs)
3703         {
3704                 return lhs = lhs >> rhs;
3705         }
3706
3707         RValue<UShort4> operator~(RValue<UShort4> val)
3708         {
3709                 return RValue<UShort4>(Nucleus::createNot(val.value));
3710         }
3711
3712         RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
3713         {
3714                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
3715                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value, y.value);
3716                 ::basicBlock->appendInst(cmp);
3717
3718                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3719                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3720                 ::basicBlock->appendInst(select);
3721
3722                 return RValue<UShort4>(V(result));
3723         }
3724
3725         RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
3726         {
3727                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
3728                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value, y.value);
3729                 ::basicBlock->appendInst(cmp);
3730
3731                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3732                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3733                 ::basicBlock->appendInst(select);
3734
3735                 return RValue<UShort4>(V(result));
3736         }
3737
3738         RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
3739         {
3740                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3741                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3742                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3743                 auto paddusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3744                 paddusw->addArg(x.value);
3745                 paddusw->addArg(y.value);
3746                 ::basicBlock->appendInst(paddusw);
3747
3748                 return RValue<UShort4>(V(result));
3749         }
3750
3751         RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
3752         {
3753                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3754                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3755                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3756                 auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3757                 psubusw->addArg(x.value);
3758                 psubusw->addArg(y.value);
3759                 ::basicBlock->appendInst(psubusw);
3760
3761                 return RValue<UShort4>(V(result));
3762         }
3763
3764         RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
3765         {
3766                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3767                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::MultiplyHighUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3768                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3769                 auto pmulhuw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3770                 pmulhuw->addArg(x.value);
3771                 pmulhuw->addArg(y.value);
3772                 ::basicBlock->appendInst(pmulhuw);
3773
3774                 return RValue<UShort4>(V(result));
3775         }
3776
3777         RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
3778         {
3779                 assert(false && "UNIMPLEMENTED"); return RValue<UShort4>(V(nullptr));
3780         }
3781
3782         RValue<Byte8> Pack(RValue<UShort4> x, RValue<UShort4> y)
3783         {
3784                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
3785                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3786                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3787                 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3788                 pack->addArg(x.value);
3789                 pack->addArg(y.value);
3790                 ::basicBlock->appendInst(pack);
3791
3792                 return As<Byte8>(Swizzle(As<Int4>(V(result)), 0x88));
3793         }
3794
3795         Type *UShort4::getType()
3796         {
3797                 return T(Type_v4i16);
3798         }
3799
3800         Short8::Short8(short c)
3801         {
3802                 int64_t constantVector[8] = {c, c, c, c, c, c, c, c};
3803                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3804         }
3805
3806         Short8::Short8(short c0, short c1, short c2, short c3, short c4, short c5, short c6, short c7)
3807         {
3808                 int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7};
3809                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3810         }
3811
3812         Short8::Short8(RValue<Short8> rhs)
3813         {
3814                 storeValue(rhs.value);
3815         }
3816
3817         Short8::Short8(const Reference<Short8> &rhs)
3818         {
3819                 Value *value = rhs.loadValue();
3820                 storeValue(value);
3821         }
3822
3823         Short8::Short8(RValue<Short4> lo, RValue<Short4> hi)
3824         {
3825                 int shuffle[8] = {0, 1, 2, 3, 8, 9, 10, 11};   // Real type is v8i16
3826                 Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
3827
3828                 storeValue(packed);
3829         }
3830
3831         RValue<Short8> operator+(RValue<Short8> lhs, RValue<Short8> rhs)
3832         {
3833                 return RValue<Short8>(Nucleus::createAdd(lhs.value, rhs.value));
3834         }
3835
3836         RValue<Short8> operator&(RValue<Short8> lhs, RValue<Short8> rhs)
3837         {
3838                 return RValue<Short8>(Nucleus::createAnd(lhs.value, rhs.value));
3839         }
3840
3841         RValue<Short> Extract(RValue<Short8> val, int i)
3842         {
3843                 return RValue<Short>(Nucleus::createExtractElement(val.value, Short::getType(), i));
3844         }
3845
3846         RValue<Short8> Insert(RValue<Short8> val, RValue<Short> element, int i)
3847         {
3848                 return RValue<Short8>(Nucleus::createInsertElement(val.value, element.value, i));
3849         }
3850
3851         RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
3852         {
3853                 if(emulateIntrinsics)
3854                 {
3855                         Short8 result;
3856                         result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
3857                         result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
3858                         result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
3859                         result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
3860                         result = Insert(result, Extract(lhs, 4) << Short(rhs), 4);
3861                         result = Insert(result, Extract(lhs, 5) << Short(rhs), 5);
3862                         result = Insert(result, Extract(lhs, 6) << Short(rhs), 6);
3863                         result = Insert(result, Extract(lhs, 7) << Short(rhs), 7);
3864
3865                         return result;
3866                 }
3867                 else
3868                 {
3869                         return RValue<Short8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
3870                 }
3871         }
3872
3873         RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
3874         {
3875                 if(emulateIntrinsics)
3876                 {
3877                         Short8 result;
3878                         result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
3879                         result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
3880                         result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
3881                         result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
3882                         result = Insert(result, Extract(lhs, 4) >> Short(rhs), 4);
3883                         result = Insert(result, Extract(lhs, 5) >> Short(rhs), 5);
3884                         result = Insert(result, Extract(lhs, 6) >> Short(rhs), 6);
3885                         result = Insert(result, Extract(lhs, 7) >> Short(rhs), 7);
3886
3887                         return result;
3888                 }
3889                 else
3890                 {
3891                         return RValue<Short8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
3892                 }
3893         }
3894
3895         RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
3896         {
3897                 assert(false && "UNIMPLEMENTED"); return RValue<Int4>(V(nullptr));
3898         }
3899
3900         RValue<Int4> Abs(RValue<Int4> x)
3901         {
3902                 auto negative = x >> 31;
3903                 return (x ^ negative) - negative;
3904         }
3905
3906         RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
3907         {
3908                 assert(false && "UNIMPLEMENTED"); return RValue<Short8>(V(nullptr));
3909         }
3910
3911         Type *Short8::getType()
3912         {
3913                 return T(Ice::IceType_v8i16);
3914         }
3915
3916         UShort8::UShort8(unsigned short c)
3917         {
3918                 int64_t constantVector[8] = {c, c, c, c, c, c, c, c};
3919                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3920         }
3921
3922         UShort8::UShort8(unsigned short c0, unsigned short c1, unsigned short c2, unsigned short c3, unsigned short c4, unsigned short c5, unsigned short c6, unsigned short c7)
3923         {
3924                 int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7};
3925                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3926         }
3927
3928         UShort8::UShort8(RValue<UShort8> rhs)
3929         {
3930                 storeValue(rhs.value);
3931         }
3932
3933         UShort8::UShort8(const Reference<UShort8> &rhs)
3934         {
3935                 Value *value = rhs.loadValue();
3936                 storeValue(value);
3937         }
3938
3939         UShort8::UShort8(RValue<UShort4> lo, RValue<UShort4> hi)
3940         {
3941                 int shuffle[8] = {0, 1, 2, 3, 8, 9, 10, 11};   // Real type is v8i16
3942                 Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
3943
3944                 storeValue(packed);
3945         }
3946
3947         RValue<UShort8> UShort8::operator=(RValue<UShort8> rhs)
3948         {
3949                 storeValue(rhs.value);
3950
3951                 return rhs;
3952         }
3953
3954         RValue<UShort8> UShort8::operator=(const UShort8 &rhs)
3955         {
3956                 Value *value = rhs.loadValue();
3957                 storeValue(value);
3958
3959                 return RValue<UShort8>(value);
3960         }
3961
3962         RValue<UShort8> UShort8::operator=(const Reference<UShort8> &rhs)
3963         {
3964                 Value *value = rhs.loadValue();
3965                 storeValue(value);
3966
3967                 return RValue<UShort8>(value);
3968         }
3969
3970         RValue<UShort8> operator&(RValue<UShort8> lhs, RValue<UShort8> rhs)
3971         {
3972                 return RValue<UShort8>(Nucleus::createAnd(lhs.value, rhs.value));
3973         }
3974
3975         RValue<UShort> Extract(RValue<UShort8> val, int i)
3976         {
3977                 return RValue<UShort>(Nucleus::createExtractElement(val.value, UShort::getType(), i));
3978         }
3979
3980         RValue<UShort8> Insert(RValue<UShort8> val, RValue<UShort> element, int i)
3981         {
3982                 return RValue<UShort8>(Nucleus::createInsertElement(val.value, element.value, i));
3983         }
3984
3985         RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
3986         {
3987                 if(emulateIntrinsics)
3988                 {
3989                         UShort8 result;
3990                         result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
3991                         result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
3992                         result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
3993                         result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
3994                         result = Insert(result, Extract(lhs, 4) << UShort(rhs), 4);
3995                         result = Insert(result, Extract(lhs, 5) << UShort(rhs), 5);
3996                         result = Insert(result, Extract(lhs, 6) << UShort(rhs), 6);
3997                         result = Insert(result, Extract(lhs, 7) << UShort(rhs), 7);
3998
3999                         return result;
4000                 }
4001                 else
4002                 {
4003                         return RValue<UShort8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
4004                 }
4005         }
4006
4007         RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
4008         {
4009                 if(emulateIntrinsics)
4010                 {
4011                         UShort8 result;
4012                         result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
4013                         result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
4014                         result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
4015                         result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
4016                         result = Insert(result, Extract(lhs, 4) >> UShort(rhs), 4);
4017                         result = Insert(result, Extract(lhs, 5) >> UShort(rhs), 5);
4018                         result = Insert(result, Extract(lhs, 6) >> UShort(rhs), 6);
4019                         result = Insert(result, Extract(lhs, 7) >> UShort(rhs), 7);
4020
4021                         return result;
4022                 }
4023                 else
4024                 {
4025                         return RValue<UShort8>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
4026                 }
4027         }
4028
4029         RValue<UShort8> operator+(RValue<UShort8> lhs, RValue<UShort8> rhs)
4030         {
4031                 return RValue<UShort8>(Nucleus::createAdd(lhs.value, rhs.value));
4032         }
4033
4034         RValue<UShort8> operator*(RValue<UShort8> lhs, RValue<UShort8> rhs)
4035         {
4036                 return RValue<UShort8>(Nucleus::createMul(lhs.value, rhs.value));
4037         }
4038
4039         RValue<UShort8> operator+=(UShort8 &lhs, RValue<UShort8> rhs)
4040         {
4041                 return lhs = lhs + rhs;
4042         }
4043
4044         RValue<UShort8> operator~(RValue<UShort8> val)
4045         {
4046                 return RValue<UShort8>(Nucleus::createNot(val.value));
4047         }
4048
4049         RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7)
4050         {
4051                 assert(false && "UNIMPLEMENTED"); return RValue<UShort8>(V(nullptr));
4052         }
4053
4054         RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
4055         {
4056                 assert(false && "UNIMPLEMENTED"); return RValue<UShort8>(V(nullptr));
4057         }
4058
4059         // FIXME: Implement as Shuffle(x, y, Select(i0, ..., i16)) and Shuffle(x, y, SELECT_PACK_REPEAT(element))
4060 //      RValue<UShort8> PackRepeat(RValue<Byte16> x, RValue<Byte16> y, int element)
4061 //      {
4062 //              assert(false && "UNIMPLEMENTED"); return RValue<UShort8>(V(nullptr));
4063 //      }
4064
4065         Type *UShort8::getType()
4066         {
4067                 return T(Ice::IceType_v8i16);
4068         }
4069
4070         Int::Int(Argument<Int> argument)
4071         {
4072                 storeValue(argument.value);
4073         }
4074
4075         Int::Int(RValue<Byte> cast)
4076         {
4077                 Value *integer = Nucleus::createZExt(cast.value, Int::getType());
4078
4079                 storeValue(integer);
4080         }
4081
4082         Int::Int(RValue<SByte> cast)
4083         {
4084                 Value *integer = Nucleus::createSExt(cast.value, Int::getType());
4085
4086                 storeValue(integer);
4087         }
4088
4089         Int::Int(RValue<Short> cast)
4090         {
4091                 Value *integer = Nucleus::createSExt(cast.value, Int::getType());
4092
4093                 storeValue(integer);
4094         }
4095
4096         Int::Int(RValue<UShort> cast)
4097         {
4098                 Value *integer = Nucleus::createZExt(cast.value, Int::getType());
4099
4100                 storeValue(integer);
4101         }
4102
4103         Int::Int(RValue<Int2> cast)
4104         {
4105                 *this = Extract(cast, 0);
4106         }
4107
4108         Int::Int(RValue<Long> cast)
4109         {
4110                 Value *integer = Nucleus::createTrunc(cast.value, Int::getType());
4111
4112                 storeValue(integer);
4113         }
4114
4115         Int::Int(RValue<Float> cast)
4116         {
4117                 Value *integer = Nucleus::createFPToSI(cast.value, Int::getType());
4118
4119                 storeValue(integer);
4120         }
4121
4122         Int::Int(int x)
4123         {
4124                 storeValue(Nucleus::createConstantInt(x));
4125         }
4126
4127         Int::Int(RValue<Int> rhs)
4128         {
4129                 storeValue(rhs.value);
4130         }
4131
4132         Int::Int(RValue<UInt> rhs)
4133         {
4134                 storeValue(rhs.value);
4135         }
4136
4137         Int::Int(const Int &rhs)
4138         {
4139                 Value *value = rhs.loadValue();
4140                 storeValue(value);
4141         }
4142
4143         Int::Int(const Reference<Int> &rhs)
4144         {
4145                 Value *value = rhs.loadValue();
4146                 storeValue(value);
4147         }
4148
4149         Int::Int(const UInt &rhs)
4150         {
4151                 Value *value = rhs.loadValue();
4152                 storeValue(value);
4153         }
4154
4155         Int::Int(const Reference<UInt> &rhs)
4156         {
4157                 Value *value = rhs.loadValue();
4158                 storeValue(value);
4159         }
4160
4161         RValue<Int> Int::operator=(int rhs)
4162         {
4163                 return RValue<Int>(storeValue(Nucleus::createConstantInt(rhs)));
4164         }
4165
4166         RValue<Int> Int::operator=(RValue<Int> rhs)
4167         {
4168                 storeValue(rhs.value);
4169
4170                 return rhs;
4171         }
4172
4173         RValue<Int> Int::operator=(RValue<UInt> rhs)
4174         {
4175                 storeValue(rhs.value);
4176
4177                 return RValue<Int>(rhs);
4178         }
4179
4180         RValue<Int> Int::operator=(const Int &rhs)
4181         {
4182                 Value *value = rhs.loadValue();
4183                 storeValue(value);
4184
4185                 return RValue<Int>(value);
4186         }
4187
4188         RValue<Int> Int::operator=(const Reference<Int> &rhs)
4189         {
4190                 Value *value = rhs.loadValue();
4191                 storeValue(value);
4192
4193                 return RValue<Int>(value);
4194         }
4195
4196         RValue<Int> Int::operator=(const UInt &rhs)
4197         {
4198                 Value *value = rhs.loadValue();
4199                 storeValue(value);
4200
4201                 return RValue<Int>(value);
4202         }
4203
4204         RValue<Int> Int::operator=(const Reference<UInt> &rhs)
4205         {
4206                 Value *value = rhs.loadValue();
4207                 storeValue(value);
4208
4209                 return RValue<Int>(value);
4210         }
4211
4212         RValue<Int> operator+(RValue<Int> lhs, RValue<Int> rhs)
4213         {
4214                 return RValue<Int>(Nucleus::createAdd(lhs.value, rhs.value));
4215         }
4216
4217         RValue<Int> operator-(RValue<Int> lhs, RValue<Int> rhs)
4218         {
4219                 return RValue<Int>(Nucleus::createSub(lhs.value, rhs.value));
4220         }
4221
4222         RValue<Int> operator*(RValue<Int> lhs, RValue<Int> rhs)
4223         {
4224                 return RValue<Int>(Nucleus::createMul(lhs.value, rhs.value));
4225         }
4226
4227         RValue<Int> operator/(RValue<Int> lhs, RValue<Int> rhs)
4228         {
4229                 return RValue<Int>(Nucleus::createSDiv(lhs.value, rhs.value));
4230         }
4231
4232         RValue<Int> operator%(RValue<Int> lhs, RValue<Int> rhs)
4233         {
4234                 return RValue<Int>(Nucleus::createSRem(lhs.value, rhs.value));
4235         }
4236
4237         RValue<Int> operator&(RValue<Int> lhs, RValue<Int> rhs)
4238         {
4239                 return RValue<Int>(Nucleus::createAnd(lhs.value, rhs.value));
4240         }
4241
4242         RValue<Int> operator|(RValue<Int> lhs, RValue<Int> rhs)
4243         {
4244                 return RValue<Int>(Nucleus::createOr(lhs.value, rhs.value));
4245         }
4246
4247         RValue<Int> operator^(RValue<Int> lhs, RValue<Int> rhs)
4248         {
4249                 return RValue<Int>(Nucleus::createXor(lhs.value, rhs.value));
4250         }
4251
4252         RValue<Int> operator<<(RValue<Int> lhs, RValue<Int> rhs)
4253         {
4254                 return RValue<Int>(Nucleus::createShl(lhs.value, rhs.value));
4255         }
4256
4257         RValue<Int> operator>>(RValue<Int> lhs, RValue<Int> rhs)
4258         {
4259                 return RValue<Int>(Nucleus::createAShr(lhs.value, rhs.value));
4260         }
4261
4262         RValue<Int> operator+=(Int &lhs, RValue<Int> rhs)
4263         {
4264                 return lhs = lhs + rhs;
4265         }
4266
4267         RValue<Int> operator-=(Int &lhs, RValue<Int> rhs)
4268         {
4269                 return lhs = lhs - rhs;
4270         }
4271
4272         RValue<Int> operator*=(Int &lhs, RValue<Int> rhs)
4273         {
4274                 return lhs = lhs * rhs;
4275         }
4276
4277         RValue<Int> operator/=(Int &lhs, RValue<Int> rhs)
4278         {
4279                 return lhs = lhs / rhs;
4280         }
4281
4282         RValue<Int> operator%=(Int &lhs, RValue<Int> rhs)
4283         {
4284                 return lhs = lhs % rhs;
4285         }
4286
4287         RValue<Int> operator&=(Int &lhs, RValue<Int> rhs)
4288         {
4289                 return lhs = lhs & rhs;
4290         }
4291
4292         RValue<Int> operator|=(Int &lhs, RValue<Int> rhs)
4293         {
4294                 return lhs = lhs | rhs;
4295         }
4296
4297         RValue<Int> operator^=(Int &lhs, RValue<Int> rhs)
4298         {
4299                 return lhs = lhs ^ rhs;
4300         }
4301
4302         RValue<Int> operator<<=(Int &lhs, RValue<Int> rhs)
4303         {
4304                 return lhs = lhs << rhs;
4305         }
4306
4307         RValue<Int> operator>>=(Int &lhs, RValue<Int> rhs)
4308         {
4309                 return lhs = lhs >> rhs;
4310         }
4311
4312         RValue<Int> operator+(RValue<Int> val)
4313         {
4314                 return val;
4315         }
4316
4317         RValue<Int> operator-(RValue<Int> val)
4318         {
4319                 return RValue<Int>(Nucleus::createNeg(val.value));
4320         }
4321
4322         RValue<Int> operator~(RValue<Int> val)
4323         {
4324                 return RValue<Int>(Nucleus::createNot(val.value));
4325         }
4326
4327         RValue<Int> operator++(Int &val, int)   // Post-increment
4328         {
4329                 RValue<Int> res = val;
4330                 val += 1;
4331                 return res;
4332         }
4333
4334         const Int &operator++(Int &val)   // Pre-increment
4335         {
4336                 val += 1;
4337                 return val;
4338         }
4339
4340         RValue<Int> operator--(Int &val, int)   // Post-decrement
4341         {
4342                 RValue<Int> res = val;
4343                 val -= 1;
4344                 return res;
4345         }
4346
4347         const Int &operator--(Int &val)   // Pre-decrement
4348         {
4349                 val -= 1;
4350                 return val;
4351         }
4352
4353         RValue<Bool> operator<(RValue<Int> lhs, RValue<Int> rhs)
4354         {
4355                 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
4356         }
4357
4358         RValue<Bool> operator<=(RValue<Int> lhs, RValue<Int> rhs)
4359         {
4360                 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
4361         }
4362
4363         RValue<Bool> operator>(RValue<Int> lhs, RValue<Int> rhs)
4364         {
4365                 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
4366         }
4367
4368         RValue<Bool> operator>=(RValue<Int> lhs, RValue<Int> rhs)
4369         {
4370                 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
4371         }
4372
4373         RValue<Bool> operator!=(RValue<Int> lhs, RValue<Int> rhs)
4374         {
4375                 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
4376         }
4377
4378         RValue<Bool> operator==(RValue<Int> lhs, RValue<Int> rhs)
4379         {
4380                 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
4381         }
4382
4383         RValue<Int> Max(RValue<Int> x, RValue<Int> y)
4384         {
4385                 return IfThenElse(x > y, x, y);
4386         }
4387
4388         RValue<Int> Min(RValue<Int> x, RValue<Int> y)
4389         {
4390                 return IfThenElse(x < y, x, y);
4391         }
4392
4393         RValue<Int> Clamp(RValue<Int> x, RValue<Int> min, RValue<Int> max)
4394         {
4395                 return Min(Max(x, min), max);
4396         }
4397
4398         RValue<Int> RoundInt(RValue<Float> cast)
4399         {
4400                 if(emulateIntrinsics)
4401                 {
4402                         // Push the fractional part off the mantissa. Accurate up to +/-2^22.
4403                         return Int((cast + Float(0x00C00000)) - Float(0x00C00000));
4404                 }
4405                 else
4406                 {
4407                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
4408                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
4409                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
4410                         auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
4411                         nearbyint->addArg(cast.value);
4412                         ::basicBlock->appendInst(nearbyint);
4413
4414                         return RValue<Int>(V(result));
4415                 }
4416         }
4417
4418         Type *Int::getType()
4419         {
4420                 return T(Ice::IceType_i32);
4421         }
4422
4423         Long::Long(RValue<Int> cast)
4424         {
4425                 Value *integer = Nucleus::createSExt(cast.value, Long::getType());
4426
4427                 storeValue(integer);
4428         }
4429
4430         Long::Long(RValue<UInt> cast)
4431         {
4432                 Value *integer = Nucleus::createZExt(cast.value, Long::getType());
4433
4434                 storeValue(integer);
4435         }
4436
4437         Long::Long(RValue<Long> rhs)
4438         {
4439                 storeValue(rhs.value);
4440         }
4441
4442         RValue<Long> Long::operator=(int64_t rhs)
4443         {
4444                 return RValue<Long>(storeValue(Nucleus::createConstantLong(rhs)));
4445         }
4446
4447         RValue<Long> Long::operator=(RValue<Long> rhs)
4448         {
4449                 storeValue(rhs.value);
4450
4451                 return rhs;
4452         }
4453
4454         RValue<Long> Long::operator=(const Long &rhs)
4455         {
4456                 Value *value = rhs.loadValue();
4457                 storeValue(value);
4458
4459                 return RValue<Long>(value);
4460         }
4461
4462         RValue<Long> Long::operator=(const Reference<Long> &rhs)
4463         {
4464                 Value *value = rhs.loadValue();
4465                 storeValue(value);
4466
4467                 return RValue<Long>(value);
4468         }
4469
4470         RValue<Long> operator+(RValue<Long> lhs, RValue<Long> rhs)
4471         {
4472                 return RValue<Long>(Nucleus::createAdd(lhs.value, rhs.value));
4473         }
4474
4475         RValue<Long> operator-(RValue<Long> lhs, RValue<Long> rhs)
4476         {
4477                 return RValue<Long>(Nucleus::createSub(lhs.value, rhs.value));
4478         }
4479
4480         RValue<Long> operator+=(Long &lhs, RValue<Long> rhs)
4481         {
4482                 return lhs = lhs + rhs;
4483         }
4484
4485         RValue<Long> operator-=(Long &lhs, RValue<Long> rhs)
4486         {
4487                 return lhs = lhs - rhs;
4488         }
4489
4490         RValue<Long> AddAtomic(RValue<Pointer<Long> > x, RValue<Long> y)
4491         {
4492                 return RValue<Long>(Nucleus::createAtomicAdd(x.value, y.value));
4493         }
4494
4495         Type *Long::getType()
4496         {
4497                 return T(Ice::IceType_i64);
4498         }
4499
4500         UInt::UInt(Argument<UInt> argument)
4501         {
4502                 storeValue(argument.value);
4503         }
4504
4505         UInt::UInt(RValue<UShort> cast)
4506         {
4507                 Value *integer = Nucleus::createZExt(cast.value, UInt::getType());
4508
4509                 storeValue(integer);
4510         }
4511
4512         UInt::UInt(RValue<Long> cast)
4513         {
4514                 Value *integer = Nucleus::createTrunc(cast.value, UInt::getType());
4515
4516                 storeValue(integer);
4517         }
4518
4519         UInt::UInt(RValue<Float> cast)
4520         {
4521                 // Smallest positive value representable in UInt, but not in Int
4522                 const unsigned int ustart = 0x80000000u;
4523                 const float ustartf = float(ustart);
4524
4525                 // If the value is negative, store 0, otherwise store the result of the conversion
4526                 storeValue((~(As<Int>(cast) >> 31) &
4527                 // Check if the value can be represented as an Int
4528                         IfThenElse(cast >= ustartf,
4529                 // If the value is too large, subtract ustart and re-add it after conversion.
4530                                 As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)),
4531                 // Otherwise, just convert normally
4532                                 Int(cast))).value);
4533         }
4534
4535         UInt::UInt(int x)
4536         {
4537                 storeValue(Nucleus::createConstantInt(x));
4538         }
4539
4540         UInt::UInt(unsigned int x)
4541         {
4542                 storeValue(Nucleus::createConstantInt(x));
4543         }
4544
4545         UInt::UInt(RValue<UInt> rhs)
4546         {
4547                 storeValue(rhs.value);
4548         }
4549
4550         UInt::UInt(RValue<Int> rhs)
4551         {
4552                 storeValue(rhs.value);
4553         }
4554
4555         UInt::UInt(const UInt &rhs)
4556         {
4557                 Value *value = rhs.loadValue();
4558                 storeValue(value);
4559         }
4560
4561         UInt::UInt(const Reference<UInt> &rhs)
4562         {
4563                 Value *value = rhs.loadValue();
4564                 storeValue(value);
4565         }
4566
4567         UInt::UInt(const Int &rhs)
4568         {
4569                 Value *value = rhs.loadValue();
4570                 storeValue(value);
4571         }
4572
4573         UInt::UInt(const Reference<Int> &rhs)
4574         {
4575                 Value *value = rhs.loadValue();
4576                 storeValue(value);
4577         }
4578
4579         RValue<UInt> UInt::operator=(unsigned int rhs)
4580         {
4581                 return RValue<UInt>(storeValue(Nucleus::createConstantInt(rhs)));
4582         }
4583
4584         RValue<UInt> UInt::operator=(RValue<UInt> rhs)
4585         {
4586                 storeValue(rhs.value);
4587
4588                 return rhs;
4589         }
4590
4591         RValue<UInt> UInt::operator=(RValue<Int> rhs)
4592         {
4593                 storeValue(rhs.value);
4594
4595                 return RValue<UInt>(rhs);
4596         }
4597
4598         RValue<UInt> UInt::operator=(const UInt &rhs)
4599         {
4600                 Value *value = rhs.loadValue();
4601                 storeValue(value);
4602
4603                 return RValue<UInt>(value);
4604         }
4605
4606         RValue<UInt> UInt::operator=(const Reference<UInt> &rhs)
4607         {
4608                 Value *value = rhs.loadValue();
4609                 storeValue(value);
4610
4611                 return RValue<UInt>(value);
4612         }
4613
4614         RValue<UInt> UInt::operator=(const Int &rhs)
4615         {
4616                 Value *value = rhs.loadValue();
4617                 storeValue(value);
4618
4619                 return RValue<UInt>(value);
4620         }
4621
4622         RValue<UInt> UInt::operator=(const Reference<Int> &rhs)
4623         {
4624                 Value *value = rhs.loadValue();
4625                 storeValue(value);
4626
4627                 return RValue<UInt>(value);
4628         }
4629
4630         RValue<UInt> operator+(RValue<UInt> lhs, RValue<UInt> rhs)
4631         {
4632                 return RValue<UInt>(Nucleus::createAdd(lhs.value, rhs.value));
4633         }
4634
4635         RValue<UInt> operator-(RValue<UInt> lhs, RValue<UInt> rhs)
4636         {
4637                 return RValue<UInt>(Nucleus::createSub(lhs.value, rhs.value));
4638         }
4639
4640         RValue<UInt> operator*(RValue<UInt> lhs, RValue<UInt> rhs)
4641         {
4642                 return RValue<UInt>(Nucleus::createMul(lhs.value, rhs.value));
4643         }
4644
4645         RValue<UInt> operator/(RValue<UInt> lhs, RValue<UInt> rhs)
4646         {
4647                 return RValue<UInt>(Nucleus::createUDiv(lhs.value, rhs.value));
4648         }
4649
4650         RValue<UInt> operator%(RValue<UInt> lhs, RValue<UInt> rhs)
4651         {
4652                 return RValue<UInt>(Nucleus::createURem(lhs.value, rhs.value));
4653         }
4654
4655         RValue<UInt> operator&(RValue<UInt> lhs, RValue<UInt> rhs)
4656         {
4657                 return RValue<UInt>(Nucleus::createAnd(lhs.value, rhs.value));
4658         }
4659
4660         RValue<UInt> operator|(RValue<UInt> lhs, RValue<UInt> rhs)
4661         {
4662                 return RValue<UInt>(Nucleus::createOr(lhs.value, rhs.value));
4663         }
4664
4665         RValue<UInt> operator^(RValue<UInt> lhs, RValue<UInt> rhs)
4666         {
4667                 return RValue<UInt>(Nucleus::createXor(lhs.value, rhs.value));
4668         }
4669
4670         RValue<UInt> operator<<(RValue<UInt> lhs, RValue<UInt> rhs)
4671         {
4672                 return RValue<UInt>(Nucleus::createShl(lhs.value, rhs.value));
4673         }
4674
4675         RValue<UInt> operator>>(RValue<UInt> lhs, RValue<UInt> rhs)
4676         {
4677                 return RValue<UInt>(Nucleus::createLShr(lhs.value, rhs.value));
4678         }
4679
4680         RValue<UInt> operator+=(UInt &lhs, RValue<UInt> rhs)
4681         {
4682                 return lhs = lhs + rhs;
4683         }
4684
4685         RValue<UInt> operator-=(UInt &lhs, RValue<UInt> rhs)
4686         {
4687                 return lhs = lhs - rhs;
4688         }
4689
4690         RValue<UInt> operator*=(UInt &lhs, RValue<UInt> rhs)
4691         {
4692                 return lhs = lhs * rhs;
4693         }
4694
4695         RValue<UInt> operator/=(UInt &lhs, RValue<UInt> rhs)
4696         {
4697                 return lhs = lhs / rhs;
4698         }
4699
4700         RValue<UInt> operator%=(UInt &lhs, RValue<UInt> rhs)
4701         {
4702                 return lhs = lhs % rhs;
4703         }
4704
4705         RValue<UInt> operator&=(UInt &lhs, RValue<UInt> rhs)
4706         {
4707                 return lhs = lhs & rhs;
4708         }
4709
4710         RValue<UInt> operator|=(UInt &lhs, RValue<UInt> rhs)
4711         {
4712                 return lhs = lhs | rhs;
4713         }
4714
4715         RValue<UInt> operator^=(UInt &lhs, RValue<UInt> rhs)
4716         {
4717                 return lhs = lhs ^ rhs;
4718         }
4719
4720         RValue<UInt> operator<<=(UInt &lhs, RValue<UInt> rhs)
4721         {
4722                 return lhs = lhs << rhs;
4723         }
4724
4725         RValue<UInt> operator>>=(UInt &lhs, RValue<UInt> rhs)
4726         {
4727                 return lhs = lhs >> rhs;
4728         }
4729
4730         RValue<UInt> operator+(RValue<UInt> val)
4731         {
4732                 return val;
4733         }
4734
4735         RValue<UInt> operator-(RValue<UInt> val)
4736         {
4737                 return RValue<UInt>(Nucleus::createNeg(val.value));
4738         }
4739
4740         RValue<UInt> operator~(RValue<UInt> val)
4741         {
4742                 return RValue<UInt>(Nucleus::createNot(val.value));
4743         }
4744
4745         RValue<UInt> operator++(UInt &val, int)   // Post-increment
4746         {
4747                 RValue<UInt> res = val;
4748                 val += 1;
4749                 return res;
4750         }
4751
4752         const UInt &operator++(UInt &val)   // Pre-increment
4753         {
4754                 val += 1;
4755                 return val;
4756         }
4757
4758         RValue<UInt> operator--(UInt &val, int)   // Post-decrement
4759         {
4760                 RValue<UInt> res = val;
4761                 val -= 1;
4762                 return res;
4763         }
4764
4765         const UInt &operator--(UInt &val)   // Pre-decrement
4766         {
4767                 val -= 1;
4768                 return val;
4769         }
4770
4771         RValue<UInt> Max(RValue<UInt> x, RValue<UInt> y)
4772         {
4773                 return IfThenElse(x > y, x, y);
4774         }
4775
4776         RValue<UInt> Min(RValue<UInt> x, RValue<UInt> y)
4777         {
4778                 return IfThenElse(x < y, x, y);
4779         }
4780
4781         RValue<UInt> Clamp(RValue<UInt> x, RValue<UInt> min, RValue<UInt> max)
4782         {
4783                 return Min(Max(x, min), max);
4784         }
4785
4786         RValue<Bool> operator<(RValue<UInt> lhs, RValue<UInt> rhs)
4787         {
4788                 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
4789         }
4790
4791         RValue<Bool> operator<=(RValue<UInt> lhs, RValue<UInt> rhs)
4792         {
4793                 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
4794         }
4795
4796         RValue<Bool> operator>(RValue<UInt> lhs, RValue<UInt> rhs)
4797         {
4798                 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
4799         }
4800
4801         RValue<Bool> operator>=(RValue<UInt> lhs, RValue<UInt> rhs)
4802         {
4803                 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
4804         }
4805
4806         RValue<Bool> operator!=(RValue<UInt> lhs, RValue<UInt> rhs)
4807         {
4808                 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
4809         }
4810
4811         RValue<Bool> operator==(RValue<UInt> lhs, RValue<UInt> rhs)
4812         {
4813                 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
4814         }
4815
4816 //      RValue<UInt> RoundUInt(RValue<Float> cast)
4817 //      {
4818 //              assert(false && "UNIMPLEMENTED"); return RValue<UInt>(V(nullptr));
4819 //      }
4820
4821         Type *UInt::getType()
4822         {
4823                 return T(Ice::IceType_i32);
4824         }
4825
4826 //      Int2::Int2(RValue<Int> cast)
4827 //      {
4828 //              Value *extend = Nucleus::createZExt(cast.value, Long::getType());
4829 //              Value *vector = Nucleus::createBitCast(extend, Int2::getType());
4830 //
4831 //              Constant *shuffle[2];
4832 //              shuffle[0] = Nucleus::createConstantInt(0);
4833 //              shuffle[1] = Nucleus::createConstantInt(0);
4834 //
4835 //              Value *replicate = Nucleus::createShuffleVector(vector, UndefValue::get(Int2::getType()), Nucleus::createConstantVector(shuffle, 2));
4836 //
4837 //              storeValue(replicate);
4838 //      }
4839
4840         Int2::Int2(RValue<Int4> cast)
4841         {
4842                 storeValue(Nucleus::createBitCast(cast.value, getType()));
4843         }
4844
4845         Int2::Int2(int x, int y)
4846         {
4847                 int64_t constantVector[2] = {x, y};
4848                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
4849         }
4850
4851         Int2::Int2(RValue<Int2> rhs)
4852         {
4853                 storeValue(rhs.value);
4854         }
4855
4856         Int2::Int2(const Int2 &rhs)
4857         {
4858                 Value *value = rhs.loadValue();
4859                 storeValue(value);
4860         }
4861
4862         Int2::Int2(const Reference<Int2> &rhs)
4863         {
4864                 Value *value = rhs.loadValue();
4865                 storeValue(value);
4866         }
4867
4868         Int2::Int2(RValue<Int> lo, RValue<Int> hi)
4869         {
4870                 int shuffle[4] = {0, 4, 1, 5};
4871                 Value *packed = Nucleus::createShuffleVector(Int4(lo).loadValue(), Int4(hi).loadValue(), shuffle);
4872
4873                 storeValue(Nucleus::createBitCast(packed, Int2::getType()));
4874         }
4875
4876         RValue<Int2> Int2::operator=(RValue<Int2> rhs)
4877         {
4878                 storeValue(rhs.value);
4879
4880                 return rhs;
4881         }
4882
4883         RValue<Int2> Int2::operator=(const Int2 &rhs)
4884         {
4885                 Value *value = rhs.loadValue();
4886                 storeValue(value);
4887
4888                 return RValue<Int2>(value);
4889         }
4890
4891         RValue<Int2> Int2::operator=(const Reference<Int2> &rhs)
4892         {
4893                 Value *value = rhs.loadValue();
4894                 storeValue(value);
4895
4896                 return RValue<Int2>(value);
4897         }
4898
4899         RValue<Int2> operator+(RValue<Int2> lhs, RValue<Int2> rhs)
4900         {
4901                 return RValue<Int2>(Nucleus::createAdd(lhs.value, rhs.value));
4902         }
4903
4904         RValue<Int2> operator-(RValue<Int2> lhs, RValue<Int2> rhs)
4905         {
4906                 return RValue<Int2>(Nucleus::createSub(lhs.value, rhs.value));
4907         }
4908
4909 //      RValue<Int2> operator*(RValue<Int2> lhs, RValue<Int2> rhs)
4910 //      {
4911 //              return RValue<Int2>(Nucleus::createMul(lhs.value, rhs.value));
4912 //      }
4913
4914 //      RValue<Int2> operator/(RValue<Int2> lhs, RValue<Int2> rhs)
4915 //      {
4916 //              return RValue<Int2>(Nucleus::createSDiv(lhs.value, rhs.value));
4917 //      }
4918
4919 //      RValue<Int2> operator%(RValue<Int2> lhs, RValue<Int2> rhs)
4920 //      {
4921 //              return RValue<Int2>(Nucleus::createSRem(lhs.value, rhs.value));
4922 //      }
4923
4924         RValue<Int2> operator&(RValue<Int2> lhs, RValue<Int2> rhs)
4925         {
4926                 return RValue<Int2>(Nucleus::createAnd(lhs.value, rhs.value));
4927         }
4928
4929         RValue<Int2> operator|(RValue<Int2> lhs, RValue<Int2> rhs)
4930         {
4931                 return RValue<Int2>(Nucleus::createOr(lhs.value, rhs.value));
4932         }
4933
4934         RValue<Int2> operator^(RValue<Int2> lhs, RValue<Int2> rhs)
4935         {
4936                 return RValue<Int2>(Nucleus::createXor(lhs.value, rhs.value));
4937         }
4938
4939         RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
4940         {
4941                 if(emulateIntrinsics)
4942                 {
4943                         Int2 result;
4944                         result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
4945                         result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
4946
4947                         return result;
4948                 }
4949                 else
4950                 {
4951                         return RValue<Int2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
4952                 }
4953         }
4954
4955         RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
4956         {
4957                 if(emulateIntrinsics)
4958                 {
4959                         Int2 result;
4960                         result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
4961                         result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
4962
4963                         return result;
4964                 }
4965                 else
4966                 {
4967                         return RValue<Int2>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
4968                 }
4969         }
4970
4971         RValue<Int2> operator+=(Int2 &lhs, RValue<Int2> rhs)
4972         {
4973                 return lhs = lhs + rhs;
4974         }
4975
4976         RValue<Int2> operator-=(Int2 &lhs, RValue<Int2> rhs)
4977         {
4978                 return lhs = lhs - rhs;
4979         }
4980
4981 //      RValue<Int2> operator*=(Int2 &lhs, RValue<Int2> rhs)
4982 //      {
4983 //              return lhs = lhs * rhs;
4984 //      }
4985
4986 //      RValue<Int2> operator/=(Int2 &lhs, RValue<Int2> rhs)
4987 //      {
4988 //              return lhs = lhs / rhs;
4989 //      }
4990
4991 //      RValue<Int2> operator%=(Int2 &lhs, RValue<Int2> rhs)
4992 //      {
4993 //              return lhs = lhs % rhs;
4994 //      }
4995
4996         RValue<Int2> operator&=(Int2 &lhs, RValue<Int2> rhs)
4997         {
4998                 return lhs = lhs & rhs;
4999         }
5000
5001         RValue<Int2> operator|=(Int2 &lhs, RValue<Int2> rhs)
5002         {
5003                 return lhs = lhs | rhs;
5004         }
5005
5006         RValue<Int2> operator^=(Int2 &lhs, RValue<Int2> rhs)
5007         {
5008                 return lhs = lhs ^ rhs;
5009         }
5010
5011         RValue<Int2> operator<<=(Int2 &lhs, unsigned char rhs)
5012         {
5013                 return lhs = lhs << rhs;
5014         }
5015
5016         RValue<Int2> operator>>=(Int2 &lhs, unsigned char rhs)
5017         {
5018                 return lhs = lhs >> rhs;
5019         }
5020
5021 //      RValue<Int2> operator+(RValue<Int2> val)
5022 //      {
5023 //              return val;
5024 //      }
5025
5026 //      RValue<Int2> operator-(RValue<Int2> val)
5027 //      {
5028 //              return RValue<Int2>(Nucleus::createNeg(val.value));
5029 //      }
5030
5031         RValue<Int2> operator~(RValue<Int2> val)
5032         {
5033                 return RValue<Int2>(Nucleus::createNot(val.value));
5034         }
5035
5036         RValue<Short4> UnpackLow(RValue<Int2> x, RValue<Int2> y)
5037         {
5038                 int shuffle[4] = {0, 4, 1, 5};   // Real type is v4i32
5039                 return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
5040         }
5041
5042         RValue<Short4> UnpackHigh(RValue<Int2> x, RValue<Int2> y)
5043         {
5044                 int shuffle[4] = {0, 4, 1, 5};   // Real type is v4i32
5045                 auto lowHigh = RValue<Int4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
5046                 return As<Short4>(Swizzle(lowHigh, 0xEE));
5047         }
5048
5049         RValue<Int> Extract(RValue<Int2> val, int i)
5050         {
5051                 return RValue<Int>(Nucleus::createExtractElement(val.value, Int::getType(), i));
5052         }
5053
5054         RValue<Int2> Insert(RValue<Int2> val, RValue<Int> element, int i)
5055         {
5056                 return RValue<Int2>(Nucleus::createInsertElement(val.value, element.value, i));
5057         }
5058
5059         Type *Int2::getType()
5060         {
5061                 return T(Type_v2i32);
5062         }
5063
5064         UInt2::UInt2(unsigned int x, unsigned int y)
5065         {
5066                 int64_t constantVector[2] = {x, y};
5067                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
5068         }
5069
5070         UInt2::UInt2(RValue<UInt2> rhs)
5071         {
5072                 storeValue(rhs.value);
5073         }
5074
5075         UInt2::UInt2(const UInt2 &rhs)
5076         {
5077                 Value *value = rhs.loadValue();
5078                 storeValue(value);
5079         }
5080
5081         UInt2::UInt2(const Reference<UInt2> &rhs)
5082         {
5083                 Value *value = rhs.loadValue();
5084                 storeValue(value);
5085         }
5086
5087         RValue<UInt2> UInt2::operator=(RValue<UInt2> rhs)
5088         {
5089                 storeValue(rhs.value);
5090
5091                 return rhs;
5092         }
5093
5094         RValue<UInt2> UInt2::operator=(const UInt2 &rhs)
5095         {
5096                 Value *value = rhs.loadValue();
5097                 storeValue(value);
5098
5099                 return RValue<UInt2>(value);
5100         }
5101
5102         RValue<UInt2> UInt2::operator=(const Reference<UInt2> &rhs)
5103         {
5104                 Value *value = rhs.loadValue();
5105                 storeValue(value);
5106
5107                 return RValue<UInt2>(value);
5108         }
5109
5110         RValue<UInt2> operator+(RValue<UInt2> lhs, RValue<UInt2> rhs)
5111         {
5112                 return RValue<UInt2>(Nucleus::createAdd(lhs.value, rhs.value));
5113         }
5114
5115         RValue<UInt2> operator-(RValue<UInt2> lhs, RValue<UInt2> rhs)
5116         {
5117                 return RValue<UInt2>(Nucleus::createSub(lhs.value, rhs.value));
5118         }
5119
5120 //      RValue<UInt2> operator*(RValue<UInt2> lhs, RValue<UInt2> rhs)
5121 //      {
5122 //              return RValue<UInt2>(Nucleus::createMul(lhs.value, rhs.value));
5123 //      }
5124
5125 //      RValue<UInt2> operator/(RValue<UInt2> lhs, RValue<UInt2> rhs)
5126 //      {
5127 //              return RValue<UInt2>(Nucleus::createUDiv(lhs.value, rhs.value));
5128 //      }
5129
5130 //      RValue<UInt2> operator%(RValue<UInt2> lhs, RValue<UInt2> rhs)
5131 //      {
5132 //              return RValue<UInt2>(Nucleus::createURem(lhs.value, rhs.value));
5133 //      }
5134
5135         RValue<UInt2> operator&(RValue<UInt2> lhs, RValue<UInt2> rhs)
5136         {
5137                 return RValue<UInt2>(Nucleus::createAnd(lhs.value, rhs.value));
5138         }
5139
5140         RValue<UInt2> operator|(RValue<UInt2> lhs, RValue<UInt2> rhs)
5141         {
5142                 return RValue<UInt2>(Nucleus::createOr(lhs.value, rhs.value));
5143         }
5144
5145         RValue<UInt2> operator^(RValue<UInt2> lhs, RValue<UInt2> rhs)
5146         {
5147                 return RValue<UInt2>(Nucleus::createXor(lhs.value, rhs.value));
5148         }
5149
5150         RValue<UInt> Extract(RValue<UInt2> val, int i)
5151         {
5152                 return RValue<UInt>(Nucleus::createExtractElement(val.value, UInt::getType(), i));
5153         }
5154
5155         RValue<UInt2> Insert(RValue<UInt2> val, RValue<UInt> element, int i)
5156         {
5157                 return RValue<UInt2>(Nucleus::createInsertElement(val.value, element.value, i));
5158         }
5159
5160         RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
5161         {
5162                 if(emulateIntrinsics)
5163                 {
5164                         UInt2 result;
5165                         result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
5166                         result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
5167
5168                         return result;
5169                 }
5170                 else
5171                 {
5172                         return RValue<UInt2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
5173                 }
5174         }
5175
5176         RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
5177         {
5178                 if(emulateIntrinsics)
5179                 {
5180                         UInt2 result;
5181                         result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
5182                         result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
5183
5184                         return result;
5185                 }
5186                 else
5187                 {
5188                         return RValue<UInt2>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
5189                 }
5190         }
5191
5192         RValue<UInt2> operator+=(UInt2 &lhs, RValue<UInt2> rhs)
5193         {
5194                 return lhs = lhs + rhs;
5195         }
5196
5197         RValue<UInt2> operator-=(UInt2 &lhs, RValue<UInt2> rhs)
5198         {
5199                 return lhs = lhs - rhs;
5200         }
5201
5202 //      RValue<UInt2> operator*=(UInt2 &lhs, RValue<UInt2> rhs)
5203 //      {
5204 //              return lhs = lhs * rhs;
5205 //      }
5206
5207 //      RValue<UInt2> operator/=(UInt2 &lhs, RValue<UInt2> rhs)
5208 //      {
5209 //              return lhs = lhs / rhs;
5210 //      }
5211
5212 //      RValue<UInt2> operator%=(UInt2 &lhs, RValue<UInt2> rhs)
5213 //      {
5214 //              return lhs = lhs % rhs;
5215 //      }
5216
5217         RValue<UInt2> operator&=(UInt2 &lhs, RValue<UInt2> rhs)
5218         {
5219                 return lhs = lhs & rhs;
5220         }
5221
5222         RValue<UInt2> operator|=(UInt2 &lhs, RValue<UInt2> rhs)
5223         {
5224                 return lhs = lhs | rhs;
5225         }
5226
5227         RValue<UInt2> operator^=(UInt2 &lhs, RValue<UInt2> rhs)
5228         {
5229                 return lhs = lhs ^ rhs;
5230         }
5231
5232         RValue<UInt2> operator<<=(UInt2 &lhs, unsigned char rhs)
5233         {
5234                 return lhs = lhs << rhs;
5235         }
5236
5237         RValue<UInt2> operator>>=(UInt2 &lhs, unsigned char rhs)
5238         {
5239                 return lhs = lhs >> rhs;
5240         }
5241
5242 //      RValue<UInt2> operator+(RValue<UInt2> val)
5243 //      {
5244 //              return val;
5245 //      }
5246
5247 //      RValue<UInt2> operator-(RValue<UInt2> val)
5248 //      {
5249 //              return RValue<UInt2>(Nucleus::createNeg(val.value));
5250 //      }
5251
5252         RValue<UInt2> operator~(RValue<UInt2> val)
5253         {
5254                 return RValue<UInt2>(Nucleus::createNot(val.value));
5255         }
5256
5257         Type *UInt2::getType()
5258         {
5259                 return T(Type_v2i32);
5260         }
5261
5262         Int4::Int4(RValue<Byte4> cast)
5263         {
5264                 Value *x = Nucleus::createBitCast(cast.value, Int::getType());
5265                 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
5266
5267                 Value *e;
5268                 int swizzle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};
5269                 Value *b = Nucleus::createBitCast(a, Byte16::getType());
5270                 Value *c = Nucleus::createShuffleVector(b, V(Nucleus::createNullValue(Byte16::getType())), swizzle);
5271
5272                 int swizzle2[8] = {0, 8, 1, 9, 2, 10, 3, 11};
5273                 Value *d = Nucleus::createBitCast(c, Short8::getType());
5274                 e = Nucleus::createShuffleVector(d, V(Nucleus::createNullValue(Short8::getType())), swizzle2);
5275
5276                 Value *f = Nucleus::createBitCast(e, Int4::getType());
5277                 storeValue(f);
5278         }
5279
5280         Int4::Int4(RValue<SByte4> cast)
5281         {
5282                 Value *x = Nucleus::createBitCast(cast.value, Int::getType());
5283                 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
5284
5285                 int swizzle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7};
5286                 Value *b = Nucleus::createBitCast(a, Byte16::getType());
5287                 Value *c = Nucleus::createShuffleVector(b, b, swizzle);
5288
5289                 int swizzle2[8] = {0, 0, 1, 1, 2, 2, 3, 3};
5290                 Value *d = Nucleus::createBitCast(c, Short8::getType());
5291                 Value *e = Nucleus::createShuffleVector(d, d, swizzle2);
5292
5293                 *this = As<Int4>(e) >> 24;
5294         }
5295
5296         Int4::Int4(RValue<Float4> cast)
5297         {
5298                 Value *xyzw = Nucleus::createFPToSI(cast.value, Int4::getType());
5299
5300                 storeValue(xyzw);
5301         }
5302
5303         Int4::Int4(RValue<Short4> cast)
5304         {
5305                 int swizzle[8] = {0, 0, 1, 1, 2, 2, 3, 3};
5306                 Value *c = Nucleus::createShuffleVector(cast.value, cast.value, swizzle);
5307
5308                 *this = As<Int4>(c) >> 16;
5309         }
5310
5311         Int4::Int4(RValue<UShort4> cast)
5312         {
5313                 int swizzle[8] = {0, 8, 1, 9, 2, 10, 3, 11};
5314                 Value *c = Nucleus::createShuffleVector(cast.value, Short8(0, 0, 0, 0, 0, 0, 0, 0).loadValue(), swizzle);
5315                 Value *d = Nucleus::createBitCast(c, Int4::getType());
5316                 storeValue(d);
5317         }
5318
5319         Int4::Int4(int xyzw)
5320         {
5321                 constant(xyzw, xyzw, xyzw, xyzw);
5322         }
5323
5324         Int4::Int4(int x, int yzw)
5325         {
5326                 constant(x, yzw, yzw, yzw);
5327         }
5328
5329         Int4::Int4(int x, int y, int zw)
5330         {
5331                 constant(x, y, zw, zw);
5332         }
5333
5334         Int4::Int4(int x, int y, int z, int w)
5335         {
5336                 constant(x, y, z, w);
5337         }
5338
5339         void Int4::constant(int x, int y, int z, int w)
5340         {
5341                 int64_t constantVector[4] = {x, y, z, w};
5342                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
5343         }
5344
5345         Int4::Int4(RValue<Int4> rhs)
5346         {
5347                 storeValue(rhs.value);
5348         }
5349
5350         Int4::Int4(const Int4 &rhs)
5351         {
5352                 Value *value = rhs.loadValue();
5353                 storeValue(value);
5354         }
5355
5356         Int4::Int4(const Reference<Int4> &rhs)
5357         {
5358                 Value *value = rhs.loadValue();
5359                 storeValue(value);
5360         }
5361
5362         Int4::Int4(RValue<UInt4> rhs)
5363         {
5364                 storeValue(rhs.value);
5365         }
5366
5367         Int4::Int4(const UInt4 &rhs)
5368         {
5369                 Value *value = rhs.loadValue();
5370                 storeValue(value);
5371         }
5372
5373         Int4::Int4(const Reference<UInt4> &rhs)
5374         {
5375                 Value *value = rhs.loadValue();
5376                 storeValue(value);
5377         }
5378
5379         Int4::Int4(RValue<Int2> lo, RValue<Int2> hi)
5380         {
5381                 int shuffle[4] = {0, 1, 4, 5};   // Real type is v4i32
5382                 Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
5383
5384                 storeValue(packed);
5385         }
5386
5387         Int4::Int4(RValue<Int> rhs)
5388         {
5389                 Value *vector = Nucleus::createBitCast(rhs.value, Int4::getType());
5390
5391                 int swizzle[4] = {0, 0, 0, 0};
5392                 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
5393
5394                 storeValue(replicate);
5395         }
5396
5397         Int4::Int4(const Int &rhs)
5398         {
5399                 *this = RValue<Int>(rhs.loadValue());
5400         }
5401
5402         Int4::Int4(const Reference<Int> &rhs)
5403         {
5404                 *this = RValue<Int>(rhs.loadValue());
5405         }
5406
5407         RValue<Int4> Int4::operator=(RValue<Int4> rhs)
5408         {
5409                 storeValue(rhs.value);
5410
5411                 return rhs;
5412         }
5413
5414         RValue<Int4> Int4::operator=(const Int4 &rhs)
5415         {
5416                 Value *value = rhs.loadValue();
5417                 storeValue(value);
5418
5419                 return RValue<Int4>(value);
5420         }
5421
5422         RValue<Int4> Int4::operator=(const Reference<Int4> &rhs)
5423         {
5424                 Value *value = rhs.loadValue();
5425                 storeValue(value);
5426
5427                 return RValue<Int4>(value);
5428         }
5429
5430         RValue<Int4> operator+(RValue<Int4> lhs, RValue<Int4> rhs)
5431         {
5432                 return RValue<Int4>(Nucleus::createAdd(lhs.value, rhs.value));
5433         }
5434
5435         RValue<Int4> operator-(RValue<Int4> lhs, RValue<Int4> rhs)
5436         {
5437                 return RValue<Int4>(Nucleus::createSub(lhs.value, rhs.value));
5438         }
5439
5440         RValue<Int4> operator*(RValue<Int4> lhs, RValue<Int4> rhs)
5441         {
5442                 return RValue<Int4>(Nucleus::createMul(lhs.value, rhs.value));
5443         }
5444
5445         RValue<Int4> operator/(RValue<Int4> lhs, RValue<Int4> rhs)
5446         {
5447                 return RValue<Int4>(Nucleus::createSDiv(lhs.value, rhs.value));
5448         }
5449
5450         RValue<Int4> operator%(RValue<Int4> lhs, RValue<Int4> rhs)
5451         {
5452                 return RValue<Int4>(Nucleus::createSRem(lhs.value, rhs.value));
5453         }
5454
5455         RValue<Int4> operator&(RValue<Int4> lhs, RValue<Int4> rhs)
5456         {
5457                 return RValue<Int4>(Nucleus::createAnd(lhs.value, rhs.value));
5458         }
5459
5460         RValue<Int4> operator|(RValue<Int4> lhs, RValue<Int4> rhs)
5461         {
5462                 return RValue<Int4>(Nucleus::createOr(lhs.value, rhs.value));
5463         }
5464
5465         RValue<Int4> operator^(RValue<Int4> lhs, RValue<Int4> rhs)
5466         {
5467                 return RValue<Int4>(Nucleus::createXor(lhs.value, rhs.value));
5468         }
5469
5470         RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
5471         {
5472                 if(emulateIntrinsics)
5473                 {
5474                         Int4 result;
5475                         result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
5476                         result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
5477                         result = Insert(result, Extract(lhs, 2) << Int(rhs), 2);
5478                         result = Insert(result, Extract(lhs, 3) << Int(rhs), 3);
5479
5480                         return result;
5481                 }
5482                 else
5483                 {
5484                         return RValue<Int4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
5485                 }
5486         }
5487
5488         RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
5489         {
5490                 if(emulateIntrinsics)
5491                 {
5492                         Int4 result;
5493                         result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
5494                         result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
5495                         result = Insert(result, Extract(lhs, 2) >> Int(rhs), 2);
5496                         result = Insert(result, Extract(lhs, 3) >> Int(rhs), 3);
5497
5498                         return result;
5499                 }
5500                 else
5501                 {
5502                         return RValue<Int4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
5503                 }
5504         }
5505
5506         RValue<Int4> operator<<(RValue<Int4> lhs, RValue<Int4> rhs)
5507         {
5508                 return RValue<Int4>(Nucleus::createShl(lhs.value, rhs.value));
5509         }
5510
5511         RValue<Int4> operator>>(RValue<Int4> lhs, RValue<Int4> rhs)
5512         {
5513                 return RValue<Int4>(Nucleus::createAShr(lhs.value, rhs.value));
5514         }
5515
5516         RValue<Int4> operator+=(Int4 &lhs, RValue<Int4> rhs)
5517         {
5518                 return lhs = lhs + rhs;
5519         }
5520
5521         RValue<Int4> operator-=(Int4 &lhs, RValue<Int4> rhs)
5522         {
5523                 return lhs = lhs - rhs;
5524         }
5525
5526         RValue<Int4> operator*=(Int4 &lhs, RValue<Int4> rhs)
5527         {
5528                 return lhs = lhs * rhs;
5529         }
5530
5531 //      RValue<Int4> operator/=(Int4 &lhs, RValue<Int4> rhs)
5532 //      {
5533 //              return lhs = lhs / rhs;
5534 //      }
5535
5536 //      RValue<Int4> operator%=(Int4 &lhs, RValue<Int4> rhs)
5537 //      {
5538 //              return lhs = lhs % rhs;
5539 //      }
5540
5541         RValue<Int4> operator&=(Int4 &lhs, RValue<Int4> rhs)
5542         {
5543                 return lhs = lhs & rhs;
5544         }
5545
5546         RValue<Int4> operator|=(Int4 &lhs, RValue<Int4> rhs)
5547         {
5548                 return lhs = lhs | rhs;
5549         }
5550
5551         RValue<Int4> operator^=(Int4 &lhs, RValue<Int4> rhs)
5552         {
5553                 return lhs = lhs ^ rhs;
5554         }
5555
5556         RValue<Int4> operator<<=(Int4 &lhs, unsigned char rhs)
5557         {
5558                 return lhs = lhs << rhs;
5559         }
5560
5561         RValue<Int4> operator>>=(Int4 &lhs, unsigned char rhs)
5562         {
5563                 return lhs = lhs >> rhs;
5564         }
5565
5566         RValue<Int4> operator+(RValue<Int4> val)
5567         {
5568                 return val;
5569         }
5570
5571         RValue<Int4> operator-(RValue<Int4> val)
5572         {
5573                 return RValue<Int4>(Nucleus::createNeg(val.value));
5574         }
5575
5576         RValue<Int4> operator~(RValue<Int4> val)
5577         {
5578                 return RValue<Int4>(Nucleus::createNot(val.value));
5579         }
5580
5581         RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
5582         {
5583                 return RValue<Int4>(Nucleus::createICmpEQ(x.value, y.value));
5584         }
5585
5586         RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
5587         {
5588                 return RValue<Int4>(Nucleus::createICmpSLT(x.value, y.value));
5589         }
5590
5591         RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
5592         {
5593                 return RValue<Int4>(Nucleus::createICmpSLE(x.value, y.value));
5594         }
5595
5596         RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
5597         {
5598                 return RValue<Int4>(Nucleus::createICmpNE(x.value, y.value));
5599         }
5600
5601         RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
5602         {
5603                 return RValue<Int4>(Nucleus::createICmpSGE(x.value, y.value));
5604         }
5605
5606         RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
5607         {
5608                 return RValue<Int4>(Nucleus::createICmpSGT(x.value, y.value));
5609         }
5610
5611         RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
5612         {
5613                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
5614                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value, y.value);
5615                 ::basicBlock->appendInst(cmp);
5616
5617                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
5618                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
5619                 ::basicBlock->appendInst(select);
5620
5621                 return RValue<Int4>(V(result));
5622         }
5623
5624         RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
5625         {
5626                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
5627                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value, y.value);
5628                 ::basicBlock->appendInst(cmp);
5629
5630                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
5631                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
5632                 ::basicBlock->appendInst(select);
5633
5634                 return RValue<Int4>(V(result));
5635         }
5636
5637         RValue<Int4> RoundInt(RValue<Float4> cast)
5638         {
5639                 if(emulateIntrinsics)
5640                 {
5641                         // Push the fractional part off the mantissa. Accurate up to +/-2^22.
5642                         return Int4((cast + Float4(0x00C00000)) - Float4(0x00C00000));
5643                 }
5644                 else
5645                 {
5646                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
5647                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
5648                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
5649                         auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
5650                         nearbyint->addArg(cast.value);
5651                         ::basicBlock->appendInst(nearbyint);
5652
5653                         return RValue<Int4>(V(result));
5654                 }
5655         }
5656
5657         RValue<Short8> Pack(RValue<Int4> x, RValue<Int4> y)
5658         {
5659                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
5660                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
5661                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
5662                 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
5663                 pack->addArg(x.value);
5664                 pack->addArg(y.value);
5665                 ::basicBlock->appendInst(pack);
5666
5667                 return RValue<Short8>(V(result));
5668         }
5669
5670         RValue<Int> Extract(RValue<Int4> x, int i)
5671         {
5672                 return RValue<Int>(Nucleus::createExtractElement(x.value, Int::getType(), i));
5673         }
5674
5675         RValue<Int4> Insert(RValue<Int4> x, RValue<Int> element, int i)
5676         {
5677                 return RValue<Int4>(Nucleus::createInsertElement(x.value, element.value, i));
5678         }
5679
5680         RValue<Int> SignMask(RValue<Int4> x)
5681         {
5682                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
5683                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
5684                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
5685                 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
5686                 movmsk->addArg(x.value);
5687                 ::basicBlock->appendInst(movmsk);
5688
5689                 return RValue<Int>(V(result));
5690         }
5691
5692         RValue<Int4> Swizzle(RValue<Int4> x, unsigned char select)
5693         {
5694                 return RValue<Int4>(createSwizzle4(x.value, select));
5695         }
5696
5697         Type *Int4::getType()
5698         {
5699                 return T(Ice::IceType_v4i32);
5700         }
5701
5702         UInt4::UInt4(RValue<Float4> cast)
5703         {
5704                 // Smallest positive value representable in UInt, but not in Int
5705                 const unsigned int ustart = 0x80000000u;
5706                 const float ustartf = float(ustart);
5707
5708                 // Check if the value can be represented as an Int
5709                 Int4 uiValue = CmpNLT(cast, Float4(ustartf));
5710                 // If the value is too large, subtract ustart and re-add it after conversion.
5711                 uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) |
5712                 // Otherwise, just convert normally
5713                           (~uiValue & Int4(cast));
5714                 // If the value is negative, store 0, otherwise store the result of the conversion
5715                 storeValue((~(As<Int4>(cast) >> 31) & uiValue).value);
5716         }
5717
5718         UInt4::UInt4(int xyzw)
5719         {
5720                 constant(xyzw, xyzw, xyzw, xyzw);
5721         }
5722
5723         UInt4::UInt4(int x, int yzw)
5724         {
5725                 constant(x, yzw, yzw, yzw);
5726         }
5727
5728         UInt4::UInt4(int x, int y, int zw)
5729         {
5730                 constant(x, y, zw, zw);
5731         }
5732
5733         UInt4::UInt4(int x, int y, int z, int w)
5734         {
5735                 constant(x, y, z, w);
5736         }
5737
5738         void UInt4::constant(int x, int y, int z, int w)
5739         {
5740                 int64_t constantVector[4] = {x, y, z, w};
5741                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
5742         }
5743
5744         UInt4::UInt4(RValue<UInt4> rhs)
5745         {
5746                 storeValue(rhs.value);
5747         }
5748
5749         UInt4::UInt4(const UInt4 &rhs)
5750         {
5751                 Value *value = rhs.loadValue();
5752                 storeValue(value);
5753         }
5754
5755         UInt4::UInt4(const Reference<UInt4> &rhs)
5756         {
5757                 Value *value = rhs.loadValue();
5758                 storeValue(value);
5759         }
5760
5761         UInt4::UInt4(RValue<Int4> rhs)
5762         {
5763                 storeValue(rhs.value);
5764         }
5765
5766         UInt4::UInt4(const Int4 &rhs)
5767         {
5768                 Value *value = rhs.loadValue();
5769                 storeValue(value);
5770         }
5771
5772         UInt4::UInt4(const Reference<Int4> &rhs)
5773         {
5774                 Value *value = rhs.loadValue();
5775                 storeValue(value);
5776         }
5777
5778         UInt4::UInt4(RValue<UInt2> lo, RValue<UInt2> hi)
5779         {
5780                 int shuffle[4] = {0, 1, 4, 5};   // Real type is v4i32
5781                 Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
5782
5783                 storeValue(packed);
5784         }
5785
5786         RValue<UInt4> UInt4::operator=(RValue<UInt4> rhs)
5787         {
5788                 storeValue(rhs.value);
5789
5790                 return rhs;
5791         }
5792
5793         RValue<UInt4> UInt4::operator=(const UInt4 &rhs)
5794         {
5795                 Value *value = rhs.loadValue();
5796                 storeValue(value);
5797
5798                 return RValue<UInt4>(value);
5799         }
5800
5801         RValue<UInt4> UInt4::operator=(const Reference<UInt4> &rhs)
5802         {
5803                 Value *value = rhs.loadValue();
5804                 storeValue(value);
5805
5806                 return RValue<UInt4>(value);
5807         }
5808
5809         RValue<UInt4> operator+(RValue<UInt4> lhs, RValue<UInt4> rhs)
5810         {
5811                 return RValue<UInt4>(Nucleus::createAdd(lhs.value, rhs.value));
5812         }
5813
5814         RValue<UInt4> operator-(RValue<UInt4> lhs, RValue<UInt4> rhs)
5815         {
5816                 return RValue<UInt4>(Nucleus::createSub(lhs.value, rhs.value));
5817         }
5818
5819         RValue<UInt4> operator*(RValue<UInt4> lhs, RValue<UInt4> rhs)
5820         {
5821                 return RValue<UInt4>(Nucleus::createMul(lhs.value, rhs.value));
5822         }
5823
5824         RValue<UInt4> operator/(RValue<UInt4> lhs, RValue<UInt4> rhs)
5825         {
5826                 return RValue<UInt4>(Nucleus::createUDiv(lhs.value, rhs.value));
5827         }
5828
5829         RValue<UInt4> operator%(RValue<UInt4> lhs, RValue<UInt4> rhs)
5830         {
5831                 return RValue<UInt4>(Nucleus::createURem(lhs.value, rhs.value));
5832         }
5833
5834         RValue<UInt4> operator&(RValue<UInt4> lhs, RValue<UInt4> rhs)
5835         {
5836                 return RValue<UInt4>(Nucleus::createAnd(lhs.value, rhs.value));
5837         }
5838
5839         RValue<UInt4> operator|(RValue<UInt4> lhs, RValue<UInt4> rhs)
5840         {
5841                 return RValue<UInt4>(Nucleus::createOr(lhs.value, rhs.value));
5842         }
5843
5844         RValue<UInt4> operator^(RValue<UInt4> lhs, RValue<UInt4> rhs)
5845         {
5846                 return RValue<UInt4>(Nucleus::createXor(lhs.value, rhs.value));
5847         }
5848
5849         RValue<UInt> Extract(RValue<UInt4> x, int i)
5850         {
5851                 return RValue<UInt>(Nucleus::createExtractElement(x.value, UInt::getType(), i));
5852         }
5853
5854         RValue<UInt4> Insert(RValue<UInt4> x, RValue<UInt> element, int i)
5855         {
5856                 return RValue<UInt4>(Nucleus::createInsertElement(x.value, element.value, i));
5857         }
5858
5859         RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
5860         {
5861                 if(emulateIntrinsics)
5862                 {
5863                         UInt4 result;
5864                         result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
5865                         result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
5866                         result = Insert(result, Extract(lhs, 2) << UInt(rhs), 2);
5867                         result = Insert(result, Extract(lhs, 3) << UInt(rhs), 3);
5868
5869                         return result;
5870                 }
5871                 else
5872                 {
5873                         return RValue<UInt4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
5874                 }
5875         }
5876
5877         RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
5878         {
5879                 if(emulateIntrinsics)
5880                 {
5881                         UInt4 result;
5882                         result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
5883                         result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
5884                         result = Insert(result, Extract(lhs, 2) >> UInt(rhs), 2);
5885                         result = Insert(result, Extract(lhs, 3) >> UInt(rhs), 3);
5886
5887                         return result;
5888                 }
5889                 else
5890                 {
5891                         return RValue<UInt4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
5892                 }
5893         }
5894
5895         RValue<UInt4> operator<<(RValue<UInt4> lhs, RValue<UInt4> rhs)
5896         {
5897                 return RValue<UInt4>(Nucleus::createShl(lhs.value, rhs.value));
5898         }
5899
5900         RValue<UInt4> operator>>(RValue<UInt4> lhs, RValue<UInt4> rhs)
5901         {
5902                 return RValue<UInt4>(Nucleus::createLShr(lhs.value, rhs.value));
5903         }
5904
5905         RValue<UInt4> operator+=(UInt4 &lhs, RValue<UInt4> rhs)
5906         {
5907                 return lhs = lhs + rhs;
5908         }
5909
5910         RValue<UInt4> operator-=(UInt4 &lhs, RValue<UInt4> rhs)
5911         {
5912                 return lhs = lhs - rhs;
5913         }
5914
5915         RValue<UInt4> operator*=(UInt4 &lhs, RValue<UInt4> rhs)
5916         {
5917                 return lhs = lhs * rhs;
5918         }
5919
5920 //      RValue<UInt4> operator/=(UInt4 &lhs, RValue<UInt4> rhs)
5921 //      {
5922 //              return lhs = lhs / rhs;
5923 //      }
5924
5925 //      RValue<UInt4> operator%=(UInt4 &lhs, RValue<UInt4> rhs)
5926 //      {
5927 //              return lhs = lhs % rhs;
5928 //      }
5929
5930         RValue<UInt4> operator&=(UInt4 &lhs, RValue<UInt4> rhs)
5931         {
5932                 return lhs = lhs & rhs;
5933         }
5934
5935         RValue<UInt4> operator|=(UInt4 &lhs, RValue<UInt4> rhs)
5936         {
5937                 return lhs = lhs | rhs;
5938         }
5939
5940         RValue<UInt4> operator^=(UInt4 &lhs, RValue<UInt4> rhs)
5941         {
5942                 return lhs = lhs ^ rhs;
5943         }
5944
5945         RValue<UInt4> operator<<=(UInt4 &lhs, unsigned char rhs)
5946         {
5947                 return lhs = lhs << rhs;
5948         }
5949
5950         RValue<UInt4> operator>>=(UInt4 &lhs, unsigned char rhs)
5951         {
5952                 return lhs = lhs >> rhs;
5953         }
5954
5955         RValue<UInt4> operator+(RValue<UInt4> val)
5956         {
5957                 return val;
5958         }
5959
5960         RValue<UInt4> operator-(RValue<UInt4> val)
5961         {
5962                 return RValue<UInt4>(Nucleus::createNeg(val.value));
5963         }
5964
5965         RValue<UInt4> operator~(RValue<UInt4> val)
5966         {
5967                 return RValue<UInt4>(Nucleus::createNot(val.value));
5968         }
5969
5970         RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
5971         {
5972                 return RValue<UInt4>(Nucleus::createICmpEQ(x.value, y.value));
5973         }
5974
5975         RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
5976         {
5977                 return RValue<UInt4>(Nucleus::createICmpULT(x.value, y.value));
5978         }
5979
5980         RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
5981         {
5982                 return RValue<UInt4>(Nucleus::createICmpULE(x.value, y.value));
5983         }
5984
5985         RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
5986         {
5987                 return RValue<UInt4>(Nucleus::createICmpNE(x.value, y.value));
5988         }
5989
5990         RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
5991         {
5992                 return RValue<UInt4>(Nucleus::createICmpUGE(x.value, y.value));
5993         }
5994
5995         RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
5996         {
5997                 return RValue<UInt4>(Nucleus::createICmpUGT(x.value, y.value));
5998         }
5999
6000         RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
6001         {
6002                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
6003                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value, y.value);
6004                 ::basicBlock->appendInst(cmp);
6005
6006                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
6007                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
6008                 ::basicBlock->appendInst(select);
6009
6010                 return RValue<UInt4>(V(result));
6011         }
6012
6013         RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
6014         {
6015                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
6016                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value, y.value);
6017                 ::basicBlock->appendInst(cmp);
6018
6019                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
6020                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
6021                 ::basicBlock->appendInst(select);
6022
6023                 return RValue<UInt4>(V(result));
6024         }
6025
6026         RValue<UShort8> Pack(RValue<UInt4> x, RValue<UInt4> y)
6027         {
6028                 if(CPUID::SSE4_1)
6029                 {
6030                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
6031                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
6032                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
6033                         auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
6034                         pack->addArg(x.value);
6035                         pack->addArg(y.value);
6036                         ::basicBlock->appendInst(pack);
6037
6038                         return RValue<UShort8>(V(result));
6039                 }
6040                 else
6041                 {
6042                         RValue<Int4> sx = As<Int4>(x);
6043                         RValue<Int4> bx = (sx & ~(sx >> 31)) - Int4(0x8000);
6044
6045                         RValue<Int4> sy = As<Int4>(y);
6046                         RValue<Int4> by = (sy & ~(sy >> 31)) - Int4(0x8000);
6047
6048                         return As<UShort8>(Pack(bx, by) + Short8(0x8000u));
6049                 }
6050         }
6051
6052         Type *UInt4::getType()
6053         {
6054                 return T(Ice::IceType_v4i32);
6055         }
6056
6057         Float::Float(RValue<Int> cast)
6058         {
6059                 Value *integer = Nucleus::createSIToFP(cast.value, Float::getType());
6060
6061                 storeValue(integer);
6062         }
6063
6064         Float::Float(RValue<UInt> cast)
6065         {
6066                 RValue<Float> result = Float(Int(cast & UInt(0x7FFFFFFF))) +
6067                                        As<Float>((As<Int>(cast) >> 31) & As<Int>(Float(0x80000000u)));
6068
6069                 storeValue(result.value);
6070         }
6071
6072         Float::Float(float x)
6073         {
6074                 storeValue(Nucleus::createConstantFloat(x));
6075         }
6076
6077         Float::Float(RValue<Float> rhs)
6078         {
6079                 storeValue(rhs.value);
6080         }
6081
6082         Float::Float(const Float &rhs)
6083         {
6084                 Value *value = rhs.loadValue();
6085                 storeValue(value);
6086         }
6087
6088         Float::Float(const Reference<Float> &rhs)
6089         {
6090                 Value *value = rhs.loadValue();
6091                 storeValue(value);
6092         }
6093
6094         RValue<Float> Float::operator=(RValue<Float> rhs)
6095         {
6096                 storeValue(rhs.value);
6097
6098                 return rhs;
6099         }
6100
6101         RValue<Float> Float::operator=(const Float &rhs)
6102         {
6103                 Value *value = rhs.loadValue();
6104                 storeValue(value);
6105
6106                 return RValue<Float>(value);
6107         }
6108
6109         RValue<Float> Float::operator=(const Reference<Float> &rhs)
6110         {
6111                 Value *value = rhs.loadValue();
6112                 storeValue(value);
6113
6114                 return RValue<Float>(value);
6115         }
6116
6117         RValue<Float> operator+(RValue<Float> lhs, RValue<Float> rhs)
6118         {
6119                 return RValue<Float>(Nucleus::createFAdd(lhs.value, rhs.value));
6120         }
6121
6122         RValue<Float> operator-(RValue<Float> lhs, RValue<Float> rhs)
6123         {
6124                 return RValue<Float>(Nucleus::createFSub(lhs.value, rhs.value));
6125         }
6126
6127         RValue<Float> operator*(RValue<Float> lhs, RValue<Float> rhs)
6128         {
6129                 return RValue<Float>(Nucleus::createFMul(lhs.value, rhs.value));
6130         }
6131
6132         RValue<Float> operator/(RValue<Float> lhs, RValue<Float> rhs)
6133         {
6134                 return RValue<Float>(Nucleus::createFDiv(lhs.value, rhs.value));
6135         }
6136
6137         RValue<Float> operator+=(Float &lhs, RValue<Float> rhs)
6138         {
6139                 return lhs = lhs + rhs;
6140         }
6141
6142         RValue<Float> operator-=(Float &lhs, RValue<Float> rhs)
6143         {
6144                 return lhs = lhs - rhs;
6145         }
6146
6147         RValue<Float> operator*=(Float &lhs, RValue<Float> rhs)
6148         {
6149                 return lhs = lhs * rhs;
6150         }
6151
6152         RValue<Float> operator/=(Float &lhs, RValue<Float> rhs)
6153         {
6154                 return lhs = lhs / rhs;
6155         }
6156
6157         RValue<Float> operator+(RValue<Float> val)
6158         {
6159                 return val;
6160         }
6161
6162         RValue<Float> operator-(RValue<Float> val)
6163         {
6164                 return RValue<Float>(Nucleus::createFNeg(val.value));
6165         }
6166
6167         RValue<Bool> operator<(RValue<Float> lhs, RValue<Float> rhs)
6168         {
6169                 return RValue<Bool>(Nucleus::createFCmpOLT(lhs.value, rhs.value));
6170         }
6171
6172         RValue<Bool> operator<=(RValue<Float> lhs, RValue<Float> rhs)
6173         {
6174                 return RValue<Bool>(Nucleus::createFCmpOLE(lhs.value, rhs.value));
6175         }
6176
6177         RValue<Bool> operator>(RValue<Float> lhs, RValue<Float> rhs)
6178         {
6179                 return RValue<Bool>(Nucleus::createFCmpOGT(lhs.value, rhs.value));
6180         }
6181
6182         RValue<Bool> operator>=(RValue<Float> lhs, RValue<Float> rhs)
6183         {
6184                 return RValue<Bool>(Nucleus::createFCmpOGE(lhs.value, rhs.value));
6185         }
6186
6187         RValue<Bool> operator!=(RValue<Float> lhs, RValue<Float> rhs)
6188         {
6189                 return RValue<Bool>(Nucleus::createFCmpONE(lhs.value, rhs.value));
6190         }
6191
6192         RValue<Bool> operator==(RValue<Float> lhs, RValue<Float> rhs)
6193         {
6194                 return RValue<Bool>(Nucleus::createFCmpOEQ(lhs.value, rhs.value));
6195         }
6196
6197         RValue<Float> Abs(RValue<Float> x)
6198         {
6199                 return IfThenElse(x > 0.0f, x, -x);
6200         }
6201
6202         RValue<Float> Max(RValue<Float> x, RValue<Float> y)
6203         {
6204                 return IfThenElse(x > y, x, y);
6205         }
6206
6207         RValue<Float> Min(RValue<Float> x, RValue<Float> y)
6208         {
6209                 return IfThenElse(x < y, x, y);
6210         }
6211
6212         RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
6213         {
6214                 return 1.0f / x;
6215         }
6216
6217         RValue<Float> RcpSqrt_pp(RValue<Float> x)
6218         {
6219                 return Rcp_pp(Sqrt(x));
6220         }
6221
6222         RValue<Float> Sqrt(RValue<Float> x)
6223         {
6224                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_f32);
6225                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
6226                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
6227                 auto sqrt = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
6228                 sqrt->addArg(x.value);
6229                 ::basicBlock->appendInst(sqrt);
6230
6231                 return RValue<Float>(V(result));
6232         }
6233
6234         RValue<Float> Round(RValue<Float> x)
6235         {
6236                 return Float4(Round(Float4(x))).x;
6237         }
6238
6239         RValue<Float> Trunc(RValue<Float> x)
6240         {
6241                 return Float4(Trunc(Float4(x))).x;
6242         }
6243
6244         RValue<Float> Frac(RValue<Float> x)
6245         {
6246                 return Float4(Frac(Float4(x))).x;
6247         }
6248
6249         RValue<Float> Floor(RValue<Float> x)
6250         {
6251                 return Float4(Floor(Float4(x))).x;
6252         }
6253
6254         RValue<Float> Ceil(RValue<Float> x)
6255         {
6256                 return Float4(Ceil(Float4(x))).x;
6257         }
6258
6259         Type *Float::getType()
6260         {
6261                 return T(Ice::IceType_f32);
6262         }
6263
6264         Float2::Float2(RValue<Float4> cast)
6265         {
6266                 storeValue(Nucleus::createBitCast(cast.value, getType()));
6267         }
6268
6269         Type *Float2::getType()
6270         {
6271                 return T(Type_v2f32);
6272         }
6273
6274         Float4::Float4(RValue<Byte4> cast) : FloatXYZW(this)
6275         {
6276                 Value *a = Int4(cast).loadValue();
6277                 Value *xyzw = Nucleus::createSIToFP(a, Float4::getType());
6278
6279                 storeValue(xyzw);
6280         }
6281
6282         Float4::Float4(RValue<SByte4> cast) : FloatXYZW(this)
6283         {
6284                 Value *a = Int4(cast).loadValue();
6285                 Value *xyzw = Nucleus::createSIToFP(a, Float4::getType());
6286
6287                 storeValue(xyzw);
6288         }
6289
6290         Float4::Float4(RValue<Short4> cast) : FloatXYZW(this)
6291         {
6292                 Int4 c(cast);
6293                 storeValue(Nucleus::createSIToFP(RValue<Int4>(c).value, Float4::getType()));
6294         }
6295
6296         Float4::Float4(RValue<UShort4> cast) : FloatXYZW(this)
6297         {
6298                 Int4 c(cast);
6299                 storeValue(Nucleus::createSIToFP(RValue<Int4>(c).value, Float4::getType()));
6300         }
6301
6302         Float4::Float4(RValue<Int4> cast) : FloatXYZW(this)
6303         {
6304                 Value *xyzw = Nucleus::createSIToFP(cast.value, Float4::getType());
6305
6306                 storeValue(xyzw);
6307         }
6308
6309         Float4::Float4(RValue<UInt4> cast) : FloatXYZW(this)
6310         {
6311                 RValue<Float4> result = Float4(Int4(cast & UInt4(0x7FFFFFFF))) +
6312                                         As<Float4>((As<Int4>(cast) >> 31) & As<Int4>(Float4(0x80000000u)));
6313
6314                 storeValue(result.value);
6315         }
6316
6317         Float4::Float4() : FloatXYZW(this)
6318         {
6319         }
6320
6321         Float4::Float4(float xyzw) : FloatXYZW(this)
6322         {
6323                 constant(xyzw, xyzw, xyzw, xyzw);
6324         }
6325
6326         Float4::Float4(float x, float yzw) : FloatXYZW(this)
6327         {
6328                 constant(x, yzw, yzw, yzw);
6329         }
6330
6331         Float4::Float4(float x, float y, float zw) : FloatXYZW(this)
6332         {
6333                 constant(x, y, zw, zw);
6334         }
6335
6336         Float4::Float4(float x, float y, float z, float w) : FloatXYZW(this)
6337         {
6338                 constant(x, y, z, w);
6339         }
6340
6341         void Float4::constant(float x, float y, float z, float w)
6342         {
6343                 double constantVector[4] = {x, y, z, w};
6344                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
6345         }
6346
6347         Float4::Float4(RValue<Float4> rhs) : FloatXYZW(this)
6348         {
6349                 storeValue(rhs.value);
6350         }
6351
6352         Float4::Float4(const Float4 &rhs) : FloatXYZW(this)
6353         {
6354                 Value *value = rhs.loadValue();
6355                 storeValue(value);
6356         }
6357
6358         Float4::Float4(const Reference<Float4> &rhs) : FloatXYZW(this)
6359         {
6360                 Value *value = rhs.loadValue();
6361                 storeValue(value);
6362         }
6363
6364         Float4::Float4(RValue<Float> rhs) : FloatXYZW(this)
6365         {
6366                 Value *vector = Nucleus::createBitCast(rhs.value, Float4::getType());
6367
6368                 int swizzle[4] = {0, 0, 0, 0};
6369                 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
6370
6371                 storeValue(replicate);
6372         }
6373
6374         Float4::Float4(const Float &rhs) : FloatXYZW(this)
6375         {
6376                 *this = RValue<Float>(rhs.loadValue());
6377         }
6378
6379         Float4::Float4(const Reference<Float> &rhs) : FloatXYZW(this)
6380         {
6381                 *this = RValue<Float>(rhs.loadValue());
6382         }
6383
6384         RValue<Float4> Float4::operator=(float x)
6385         {
6386                 return *this = Float4(x, x, x, x);
6387         }
6388
6389         RValue<Float4> Float4::operator=(RValue<Float4> rhs)
6390         {
6391                 storeValue(rhs.value);
6392
6393                 return rhs;
6394         }
6395
6396         RValue<Float4> Float4::operator=(const Float4 &rhs)
6397         {
6398                 Value *value = rhs.loadValue();
6399                 storeValue(value);
6400
6401                 return RValue<Float4>(value);
6402         }
6403
6404         RValue<Float4> Float4::operator=(const Reference<Float4> &rhs)
6405         {
6406                 Value *value = rhs.loadValue();
6407                 storeValue(value);
6408
6409                 return RValue<Float4>(value);
6410         }
6411
6412         RValue<Float4> Float4::operator=(RValue<Float> rhs)
6413         {
6414                 return *this = Float4(rhs);
6415         }
6416
6417         RValue<Float4> Float4::operator=(const Float &rhs)
6418         {
6419                 return *this = Float4(rhs);
6420         }
6421
6422         RValue<Float4> Float4::operator=(const Reference<Float> &rhs)
6423         {
6424                 return *this = Float4(rhs);
6425         }
6426
6427         RValue<Float4> operator+(RValue<Float4> lhs, RValue<Float4> rhs)
6428         {
6429                 return RValue<Float4>(Nucleus::createFAdd(lhs.value, rhs.value));
6430         }
6431
6432         RValue<Float4> operator-(RValue<Float4> lhs, RValue<Float4> rhs)
6433         {
6434                 return RValue<Float4>(Nucleus::createFSub(lhs.value, rhs.value));
6435         }
6436
6437         RValue<Float4> operator*(RValue<Float4> lhs, RValue<Float4> rhs)
6438         {
6439                 return RValue<Float4>(Nucleus::createFMul(lhs.value, rhs.value));
6440         }
6441
6442         RValue<Float4> operator/(RValue<Float4> lhs, RValue<Float4> rhs)
6443         {
6444                 return RValue<Float4>(Nucleus::createFDiv(lhs.value, rhs.value));
6445         }
6446
6447         RValue<Float4> operator%(RValue<Float4> lhs, RValue<Float4> rhs)
6448         {
6449                 return RValue<Float4>(Nucleus::createFRem(lhs.value, rhs.value));
6450         }
6451
6452         RValue<Float4> operator+=(Float4 &lhs, RValue<Float4> rhs)
6453         {
6454                 return lhs = lhs + rhs;
6455         }
6456
6457         RValue<Float4> operator-=(Float4 &lhs, RValue<Float4> rhs)
6458         {
6459                 return lhs = lhs - rhs;
6460         }
6461
6462         RValue<Float4> operator*=(Float4 &lhs, RValue<Float4> rhs)
6463         {
6464                 return lhs = lhs * rhs;
6465         }
6466
6467         RValue<Float4> operator/=(Float4 &lhs, RValue<Float4> rhs)
6468         {
6469                 return lhs = lhs / rhs;
6470         }
6471
6472         RValue<Float4> operator%=(Float4 &lhs, RValue<Float4> rhs)
6473         {
6474                 return lhs = lhs % rhs;
6475         }
6476
6477         RValue<Float4> operator+(RValue<Float4> val)
6478         {
6479                 return val;
6480         }
6481
6482         RValue<Float4> operator-(RValue<Float4> val)
6483         {
6484                 return RValue<Float4>(Nucleus::createFNeg(val.value));
6485         }
6486
6487         RValue<Float4> Abs(RValue<Float4> x)
6488         {
6489                 Value *vector = Nucleus::createBitCast(x.value, Int4::getType());
6490                 int64_t constantVector[4] = {0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF};
6491                 Value *result = Nucleus::createAnd(vector, V(Nucleus::createConstantVector(constantVector, Int4::getType())));
6492
6493                 return As<Float4>(result);
6494         }
6495
6496         RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
6497         {
6498                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
6499                 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Ogt, condition, x.value, y.value);
6500                 ::basicBlock->appendInst(cmp);
6501
6502                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
6503                 auto select = Ice::InstSelect::create(::function, result, condition, x.value, y.value);
6504                 ::basicBlock->appendInst(select);
6505
6506                 return RValue<Float4>(V(result));
6507         }
6508
6509         RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
6510         {
6511                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
6512                 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Olt, condition, x.value, y.value);
6513                 ::basicBlock->appendInst(cmp);
6514
6515                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
6516                 auto select = Ice::InstSelect::create(::function, result, condition, x.value, y.value);
6517                 ::basicBlock->appendInst(select);
6518
6519                 return RValue<Float4>(V(result));
6520         }
6521
6522         RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
6523         {
6524                 return Float4(1.0f) / x;
6525         }
6526
6527         RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
6528         {
6529                 return Rcp_pp(Sqrt(x));
6530         }
6531
6532         RValue<Float4> Sqrt(RValue<Float4> x)
6533         {
6534                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
6535                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
6536                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
6537                 auto sqrt = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
6538                 sqrt->addArg(x.value);
6539                 ::basicBlock->appendInst(sqrt);
6540
6541                 return RValue<Float4>(V(result));
6542         }
6543
6544         RValue<Float4> Insert(RValue<Float4> x, RValue<Float> element, int i)
6545         {
6546                 return RValue<Float4>(Nucleus::createInsertElement(x.value, element.value, i));
6547         }
6548
6549         RValue<Float> Extract(RValue<Float4> x, int i)
6550         {
6551                 return RValue<Float>(Nucleus::createExtractElement(x.value, Float::getType(), i));
6552         }
6553
6554         RValue<Float4> Swizzle(RValue<Float4> x, unsigned char select)
6555         {
6556                 return RValue<Float4>(createSwizzle4(x.value, select));
6557         }
6558
6559         RValue<Float4> ShuffleLowHigh(RValue<Float4> x, RValue<Float4> y, unsigned char imm)
6560         {
6561                 int shuffle[4] =
6562                 {
6563                         ((imm >> 0) & 0x03) + 0,
6564                         ((imm >> 2) & 0x03) + 0,
6565                         ((imm >> 4) & 0x03) + 4,
6566                         ((imm >> 6) & 0x03) + 4,
6567                 };
6568
6569                 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
6570         }
6571
6572         RValue<Float4> UnpackLow(RValue<Float4> x, RValue<Float4> y)
6573         {
6574                 int shuffle[4] = {0, 4, 1, 5};
6575                 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
6576         }
6577
6578         RValue<Float4> UnpackHigh(RValue<Float4> x, RValue<Float4> y)
6579         {
6580                 int shuffle[4] = {2, 6, 3, 7};
6581                 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
6582         }
6583
6584         RValue<Float4> Mask(Float4 &lhs, RValue<Float4> rhs, unsigned char select)
6585         {
6586                 Value *vector = lhs.loadValue();
6587                 Value *result = createMask4(vector, rhs.value, select);
6588                 lhs.storeValue(result);
6589
6590                 return RValue<Float4>(result);
6591         }
6592
6593         RValue<Int> SignMask(RValue<Float4> x)
6594         {
6595                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
6596                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
6597                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
6598                 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
6599                 movmsk->addArg(x.value);
6600                 ::basicBlock->appendInst(movmsk);
6601
6602                 return RValue<Int>(V(result));
6603         }
6604
6605         RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
6606         {
6607                 return RValue<Int4>(Nucleus::createFCmpOEQ(x.value, y.value));
6608         }
6609
6610         RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
6611         {
6612                 return RValue<Int4>(Nucleus::createFCmpOLT(x.value, y.value));
6613         }
6614
6615         RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
6616         {
6617                 return RValue<Int4>(Nucleus::createFCmpOLE(x.value, y.value));
6618         }
6619
6620         RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
6621         {
6622                 return RValue<Int4>(Nucleus::createFCmpONE(x.value, y.value));
6623         }
6624
6625         RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
6626         {
6627                 return RValue<Int4>(Nucleus::createFCmpOGE(x.value, y.value));
6628         }
6629
6630         RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
6631         {
6632                 return RValue<Int4>(Nucleus::createFCmpOGT(x.value, y.value));
6633         }
6634
6635         RValue<Float4> Round(RValue<Float4> x)
6636         {
6637                 if(emulateIntrinsics)
6638                 {
6639                         // Push the fractional part off the mantissa. Accurate up to +/-2^22.
6640                         return (x + Float4(0x00C00000)) - Float4(0x00C00000);
6641                 }
6642                 else if(CPUID::SSE4_1)
6643                 {
6644                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
6645                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
6646                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
6647                         auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
6648                         round->addArg(x.value);
6649                         round->addArg(::context->getConstantInt32(0));
6650                         ::basicBlock->appendInst(round);
6651
6652                         return RValue<Float4>(V(result));
6653                 }
6654                 else
6655                 {
6656                         return Float4(RoundInt(x));
6657                 }
6658         }
6659
6660         RValue<Float4> Trunc(RValue<Float4> x)
6661         {
6662                 if(CPUID::SSE4_1)
6663                 {
6664                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
6665                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
6666                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
6667                         auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
6668                         round->addArg(x.value);
6669                         round->addArg(::context->getConstantInt32(3));
6670                         ::basicBlock->appendInst(round);
6671
6672                         return RValue<Float4>(V(result));
6673                 }
6674                 else
6675                 {
6676                         return Float4(Int4(x));
6677                 }
6678         }
6679
6680         RValue<Float4> Frac(RValue<Float4> x)
6681         {
6682                 Float4 frc;
6683
6684                 if(CPUID::SSE4_1)
6685                 {
6686                         frc = x - Floor(x);
6687                 }
6688                 else
6689                 {
6690                         frc = x - Float4(Int4(x));   // Signed fractional part.
6691
6692                         frc += As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1, 1, 1, 1)));   // Add 1.0 if negative.
6693                 }
6694
6695                 // x - floor(x) can be 1.0 for very small negative x.
6696                 // Clamp against the value just below 1.0.
6697                 return Min(frc, As<Float4>(Int4(0x3F7FFFFF)));
6698         }
6699
6700         RValue<Float4> Floor(RValue<Float4> x)
6701         {
6702                 if(CPUID::SSE4_1)
6703                 {
6704                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
6705                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
6706                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
6707                         auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
6708                         round->addArg(x.value);
6709                         round->addArg(::context->getConstantInt32(1));
6710                         ::basicBlock->appendInst(round);
6711
6712                         return RValue<Float4>(V(result));
6713                 }
6714                 else
6715                 {
6716                         return x - Frac(x);
6717                 }
6718         }
6719
6720         RValue<Float4> Ceil(RValue<Float4> x)
6721         {
6722                 if(CPUID::SSE4_1)
6723                 {
6724                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
6725                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
6726                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
6727                         auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
6728                         round->addArg(x.value);
6729                         round->addArg(::context->getConstantInt32(2));
6730                         ::basicBlock->appendInst(round);
6731
6732                         return RValue<Float4>(V(result));
6733                 }
6734                 else
6735                 {
6736                         return -Floor(-x);
6737                 }
6738         }
6739
6740         Type *Float4::getType()
6741         {
6742                 return T(Ice::IceType_v4f32);
6743         }
6744
6745         RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, int offset)
6746         {
6747                 return lhs + RValue<Int>(Nucleus::createConstantInt(offset));
6748         }
6749
6750         RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<Int> offset)
6751         {
6752                 return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), offset.value, false));
6753         }
6754
6755         RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<UInt> offset)
6756         {
6757                 return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), offset.value, true));
6758         }
6759
6760         RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, int offset)
6761         {
6762                 return lhs = lhs + offset;
6763         }
6764
6765         RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, RValue<Int> offset)
6766         {
6767                 return lhs = lhs + offset;
6768         }
6769
6770         RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, RValue<UInt> offset)
6771         {
6772                 return lhs = lhs + offset;
6773         }
6774
6775         RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, int offset)
6776         {
6777                 return lhs + -offset;
6778         }
6779
6780         RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, RValue<Int> offset)
6781         {
6782                 return lhs + -offset;
6783         }
6784
6785         RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, RValue<UInt> offset)
6786         {
6787                 return lhs + -offset;
6788         }
6789
6790         RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, int offset)
6791         {
6792                 return lhs = lhs - offset;
6793         }
6794
6795         RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, RValue<Int> offset)
6796         {
6797                 return lhs = lhs - offset;
6798         }
6799
6800         RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, RValue<UInt> offset)
6801         {
6802                 return lhs = lhs - offset;
6803         }
6804
6805         void Return()
6806         {
6807                 Nucleus::createRetVoid();
6808                 Nucleus::setInsertBlock(Nucleus::createBasicBlock());
6809                 Nucleus::createUnreachable();
6810         }
6811
6812         void Return(RValue<Int> ret)
6813         {
6814                 Nucleus::createRet(ret.value);
6815                 Nucleus::setInsertBlock(Nucleus::createBasicBlock());
6816                 Nucleus::createUnreachable();
6817         }
6818
6819         void branch(RValue<Bool> cmp, BasicBlock *bodyBB, BasicBlock *endBB)
6820         {
6821                 Nucleus::createCondBr(cmp.value, bodyBB, endBB);
6822                 Nucleus::setInsertBlock(bodyBB);
6823         }
6824
6825         RValue<Long> Ticks()
6826         {
6827                 assert(false && "UNIMPLEMENTED"); return RValue<Long>(V(nullptr));
6828         }
6829 }