src/Reactor/SubzeroReactor.cpp

   1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
   2 //
   3 // Licensed under the Apache License, Version 2.0 (the "License");
   4 // you may not use this file except in compliance with the License.
   5 // You may obtain a copy of the License at
   6 //
   7 //    http://www.apache.org/licenses/LICENSE-2.0
   8 //
   9 // Unless required by applicable law or agreed to in writing, software
  10 // distributed under the License is distributed on an "AS IS" BASIS,
  11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 // See the License for the specific language governing permissions and
  13 // limitations under the License.
  14
  15 #include "Nucleus.hpp"
  16
  17 #include "Reactor.hpp"
  18 #include "Routine.hpp"
  19
  20 #include "Optimizer.hpp"
  21
  22 #include "src/IceTypes.h"
  23 #include "src/IceCfg.h"
  24 #include "src/IceELFStreamer.h"
  25 #include "src/IceGlobalContext.h"
  26 #include "src/IceCfgNode.h"
  27 #include "src/IceELFObjectWriter.h"
  28 #include "src/IceGlobalInits.h"
  29
  30 #include "llvm/Support/FileSystem.h"
  31 #include "llvm/Support/raw_os_ostream.h"
  32
  33 #if defined(_WIN32)
  34 #ifndef WIN32_LEAN_AND_MEAN
  35 #define WIN32_LEAN_AND_MEAN
  36 #endif // !WIN32_LEAN_AND_MEAN
  37 #ifndef NOMINMAX
  38 #define NOMINMAX
  39 #endif // !NOMINMAX
  40 #include <Windows.h>
  41 #else
  42 #include <sys/mman.h>
  43 #if !defined(MAP_ANONYMOUS)
  44 #define MAP_ANONYMOUS MAP_ANON
  45 #endif
  46 #endif
  47
  48 #include <mutex>
  49 #include <limits>
  50 #include <iostream>
  51 #include <cassert>
  52
  53 namespace
  54 {
  55         Ice::GlobalContext *context = nullptr;
  56         Ice::Cfg *function = nullptr;
  57         Ice::CfgNode *basicBlock = nullptr;
  58         Ice::CfgLocalAllocatorScope *allocator = nullptr;
  59         sw::Routine *routine = nullptr;
  60
  61         std::mutex codegenMutex;
  62
  63         Ice::ELFFileStreamer *elfFile = nullptr;
  64         Ice::Fdstream *out = nullptr;
  65 }
  66
  67 namespace
  68 {
  69         #if !defined(__i386__) && defined(_M_IX86)
  70                 #define __i386__ 1
  71         #endif
  72
  73         #if !defined(__x86_64__) && (defined(_M_AMD64) || defined (_M_X64))
  74                 #define __x86_64__ 1
  75         #endif
  76
  77         class CPUID
  78         {
  79         public:
  80                 const static bool ARM;
  81                 const static bool SSE4_1;
  82
  83         private:
  84                 static void cpuid(int registers[4], int info)
  85                 {
  86                         #if defined(__i386__) || defined(__x86_64__)
  87                                 #if defined(_WIN32)
  88                                         __cpuid(registers, info);
  89                                 #else
  90                                         __asm volatile("cpuid": "=a" (registers[0]), "=b" (registers[1]), "=c" (registers[2]), "=d" (registers[3]): "a" (info));
  91                                 #endif
  92                         #else
  93                                 registers[0] = 0;
  94                                 registers[1] = 0;
  95                                 registers[2] = 0;
  96                                 registers[3] = 0;
  97                         #endif
  98                 }
  99
 100                 static bool detectARM()
 101                 {
 102                         #if defined(__arm__)
 103                                 return true;
 104                         #elif defined(__i386__) || defined(__x86_64__)
 105                                 return false;
 106                         #else
 107                                 #error "Unknown architecture"
 108                         #endif
 109                 }
 110
 111                 static bool detectSSE4_1()
 112                 {
 113                         #if defined(__i386__) || defined(__x86_64__)
 114                                 int registers[4];
 115                                 cpuid(registers, 1);
 116                                 return (registers[2] & 0x00080000) != 0;
 117                         #else
 118                                 return false;
 119                         #endif
 120                 }
 121         };
 122
 123         const bool CPUID::ARM = CPUID::detectARM();
 124         const bool CPUID::SSE4_1 = CPUID::detectSSE4_1();
 125         const bool emulateIntrinsics = CPUID::ARM;
 126         const bool emulateMismatchedBitCast = CPUID::ARM;
 127 }
 128
 129 namespace sw
 130 {
 131         enum EmulatedType
 132         {
 133                 EmulatedShift = 16,
 134                 EmulatedV2 = 2 << EmulatedShift,
 135                 EmulatedV4 = 4 << EmulatedShift,
 136                 EmulatedV8 = 8 << EmulatedShift,
 137                 EmulatedBits = EmulatedV2 | EmulatedV4 | EmulatedV8,
 138
 139                 Type_v2i32 = Ice::IceType_v4i32 | EmulatedV2,
 140                 Type_v4i16 = Ice::IceType_v8i16 | EmulatedV4,
 141                 Type_v2i16 = Ice::IceType_v8i16 | EmulatedV2,
 142                 Type_v8i8 =  Ice::IceType_v16i8 | EmulatedV8,
 143                 Type_v4i8 =  Ice::IceType_v16i8 | EmulatedV4,
 144                 Type_v2f32 = Ice::IceType_v4f32 | EmulatedV2,
 145         };
 146
 147         class Value : public Ice::Operand {};
 148         class SwitchCases : public Ice::InstSwitch {};
 149         class BasicBlock : public Ice::CfgNode {};
 150
 151         Ice::Type T(Type *t)
 152         {
 153                 static_assert(static_cast<unsigned int>(Ice::IceType_NUM) < static_cast<unsigned int>(EmulatedBits), "Ice::Type overlaps with our emulated types!");
 154                 return (Ice::Type)(reinterpret_cast<std::intptr_t>(t) & ~EmulatedBits);
 155         }
 156
 157         Type *T(Ice::Type t)
 158         {
 159                 return reinterpret_cast<Type*>(t);
 160         }
 161
 162         Type *T(EmulatedType t)
 163         {
 164                 return reinterpret_cast<Type*>(t);
 165         }
 166
 167         Value *V(Ice::Operand *v)
 168         {
 169                 return reinterpret_cast<Value*>(v);
 170         }
 171
 172         BasicBlock *B(Ice::CfgNode *b)
 173         {
 174                 return reinterpret_cast<BasicBlock*>(b);
 175         }
 176
 177         static size_t typeSize(Type *type)
 178         {
 179                 if(reinterpret_cast<std::intptr_t>(type) & EmulatedBits)
 180                 {
 181                         switch(reinterpret_cast<std::intptr_t>(type))
 182                         {
 183                         case Type_v2i32: return 8;
 184                         case Type_v4i16: return 8;
 185                         case Type_v2i16: return 4;
 186                         case Type_v8i8:  return 8;
 187                         case Type_v4i8:  return 4;
 188                         case Type_v2f32: return 8;
 189                         default: assert(false);
 190                         }
 191                 }
 192
 193                 return Ice::typeWidthInBytes(T(type));
 194         }
 195
 196         Optimization optimization[10] = {InstructionCombining, Disabled};
 197
 198         using ElfHeader = std::conditional<sizeof(void*) == 8, Elf64_Ehdr, Elf32_Ehdr>::type;
 199         using SectionHeader = std::conditional<sizeof(void*) == 8, Elf64_Shdr, Elf32_Shdr>::type;
 200
 201         inline const SectionHeader *sectionHeader(const ElfHeader *elfHeader)
 202         {
 203                 return reinterpret_cast<const SectionHeader*>((intptr_t)elfHeader + elfHeader->e_shoff);
 204         }
 205
 206         inline const SectionHeader *elfSection(const ElfHeader *elfHeader, int index)
 207         {
 208                 return &sectionHeader(elfHeader)[index];
 209         }
 210
 211         static void *relocateSymbol(const ElfHeader *elfHeader, const Elf32_Rel &relocation, const SectionHeader &relocationTable)
 212         {
 213                 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
 214
 215                 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
 216                 int32_t *patchSite = (int*)(address + relocation.r_offset);
 217                 uint32_t index = relocation.getSymbol();
 218                 int table = relocationTable.sh_link;
 219                 void *symbolValue = nullptr;
 220
 221                 if(index != SHN_UNDEF)
 222                 {
 223                         if(table == SHN_UNDEF) return nullptr;
 224                         const SectionHeader *symbolTable = elfSection(elfHeader, table);
 225
 226                         uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
 227                         if(index >= symtab_entries)
 228                         {
 229                                 assert(index < symtab_entries && "Symbol Index out of range");
 230                                 return nullptr;
 231                         }
 232
 233                         intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
 234                         Elf32_Sym &symbol = ((Elf32_Sym*)symbolAddress)[index];
 235                         uint16_t section = symbol.st_shndx;
 236
 237                         if(section != SHN_UNDEF && section < SHN_LORESERVE)
 238                         {
 239                                 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
 240                                 symbolValue = reinterpret_cast<void*>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
 241                         }
 242                         else
 243                         {
 244                                 return nullptr;
 245                         }
 246                 }
 247
 248                 if(CPUID::ARM)
 249                 {
 250                         switch(relocation.getType())
 251                         {
 252                         case R_ARM_NONE:
 253                                 // No relocation
 254                                 break;
 255                         case R_ARM_MOVW_ABS_NC:
 256                                 {
 257                                         uint32_t thumb = 0;   // Calls to Thumb code not supported.
 258                                         uint32_t lo = (uint32_t)(intptr_t)symbolValue | thumb;
 259                                         *patchSite = (*patchSite & 0xFFF0F000) | ((lo & 0xF000) << 4) | (lo & 0x0FFF);
 260                                 }
 261                                 break;
 262                         case R_ARM_MOVT_ABS:
 263                                 {
 264                                         uint32_t hi = (uint32_t)(intptr_t)(symbolValue) >> 16;
 265                                         *patchSite = (*patchSite & 0xFFF0F000) | ((hi & 0xF000) << 4) | (hi & 0x0FFF);
 266                                 }
 267                                 break;
 268                         default:
 269                                 assert(false && "Unsupported relocation type");
 270                                 return nullptr;
 271                         }
 272                 }
 273                 else
 274                 {
 275                         switch(relocation.getType())
 276                         {
 277                         case R_386_NONE:
 278                                 // No relocation
 279                                 break;
 280                         case R_386_32:
 281                                 *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite);
 282                                 break;
 283                 //      case R_386_PC32:
 284                 //              *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite - (intptr_t)patchSite);
 285                 //              break;
 286                         default:
 287                                 assert(false && "Unsupported relocation type");
 288                                 return nullptr;
 289                         }
 290                 }
 291
 292                 return symbolValue;
 293         }
 294
 295         static void *relocateSymbol(const ElfHeader *elfHeader, const Elf64_Rela &relocation, const SectionHeader &relocationTable)
 296         {
 297                 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
 298
 299                 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
 300                 int32_t *patchSite = (int*)(address + relocation.r_offset);
 301                 uint32_t index = relocation.getSymbol();
 302                 int table = relocationTable.sh_link;
 303                 void *symbolValue = nullptr;
 304
 305                 if(index != SHN_UNDEF)
 306                 {
 307                         if(table == SHN_UNDEF) return nullptr;
 308                         const SectionHeader *symbolTable = elfSection(elfHeader, table);
 309
 310                         uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
 311                         if(index >= symtab_entries)
 312                         {
 313                                 assert(index < symtab_entries && "Symbol Index out of range");
 314                                 return nullptr;
 315                         }
 316
 317                         intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
 318                         Elf64_Sym &symbol = ((Elf64_Sym*)symbolAddress)[index];
 319                         uint16_t section = symbol.st_shndx;
 320
 321                         if(section != SHN_UNDEF && section < SHN_LORESERVE)
 322                         {
 323                                 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
 324                                 symbolValue = reinterpret_cast<void*>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
 325                         }
 326                         else
 327                         {
 328                                 return nullptr;
 329                         }
 330                 }
 331
 332                 switch(relocation.getType())
 333                 {
 334                 case R_X86_64_NONE:
 335                         // No relocation
 336                         break;
 337                 case R_X86_64_64:
 338                         *(int64_t*)patchSite = (int64_t)((intptr_t)symbolValue + *(int64_t*)patchSite) + relocation.r_addend;
 339                         break;
 340                 case R_X86_64_PC32:
 341                         *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite - (intptr_t)patchSite) + relocation.r_addend;
 342                         break;
 343                 case R_X86_64_32S:
 344                         *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite) + relocation.r_addend;
 345                         break;
 346                 default:
 347                         assert(false && "Unsupported relocation type");
 348                         return nullptr;
 349                 }
 350
 351                 return symbolValue;
 352         }
 353
 354         void *loadImage(uint8_t *const elfImage, size_t &codeSize)
 355         {
 356                 ElfHeader *elfHeader = (ElfHeader*)elfImage;
 357
 358                 if(!elfHeader->checkMagic())
 359                 {
 360                         return nullptr;
 361                 }
 362
 363                 // Expect ELF bitness to match platform
 364                 assert(sizeof(void*) == 8 ? elfHeader->getFileClass() == ELFCLASS64 : elfHeader->getFileClass() == ELFCLASS32);
 365                 #if defined(__i386__)
 366                         assert(sizeof(void*) == 4 && elfHeader->e_machine == EM_386);
 367                 #elif defined(__x86_64__)
 368                         assert(sizeof(void*) == 8 && elfHeader->e_machine == EM_X86_64);
 369                 #elif defined(__arm__)
 370                         assert(sizeof(void*) == 4 && elfHeader->e_machine == EM_ARM);
 371                 #else
 372                         #error "Unsupported platform"
 373                 #endif
 374
 375                 SectionHeader *sectionHeader = (SectionHeader*)(elfImage + elfHeader->e_shoff);
 376                 void *entry = nullptr;
 377
 378                 for(int i = 0; i < elfHeader->e_shnum; i++)
 379                 {
 380                         if(sectionHeader[i].sh_type == SHT_PROGBITS)
 381                         {
 382                                 if(sectionHeader[i].sh_flags & SHF_EXECINSTR)
 383                                 {
 384                                         entry = elfImage + sectionHeader[i].sh_offset;
 385                                         codeSize = sectionHeader[i].sh_size;
 386                                 }
 387                         }
 388                         else if(sectionHeader[i].sh_type == SHT_REL)
 389                         {
 390                                 assert(sizeof(void*) == 4 && "UNIMPLEMENTED");   // Only expected/implemented for 32-bit code
 391
 392                                 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
 393                                 {
 394                                         const Elf32_Rel &relocation = ((const Elf32_Rel*)(elfImage + sectionHeader[i].sh_offset))[index];
 395                                         relocateSymbol(elfHeader, relocation, sectionHeader[i]);
 396                                 }
 397                         }
 398                         else if(sectionHeader[i].sh_type == SHT_RELA)
 399                         {
 400                                 assert(sizeof(void*) == 8 && "UNIMPLEMENTED");   // Only expected/implemented for 64-bit code
 401
 402                                 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
 403                                 {
 404                                         const Elf64_Rela &relocation = ((const Elf64_Rela*)(elfImage + sectionHeader[i].sh_offset))[index];
 405                                         relocateSymbol(elfHeader, relocation, sectionHeader[i]);
 406                                 }
 407                         }
 408                 }
 409
 410                 return entry;
 411         }
 412
 413         template<typename T>
 414         struct ExecutableAllocator
 415         {
 416                 ExecutableAllocator() {};
 417                 template<class U> ExecutableAllocator(const ExecutableAllocator<U> &other) {};
 418
 419                 using value_type = T;
 420                 using size_type = std::size_t;
 421
 422                 T *allocate(size_type n)
 423                 {
 424                         #if defined(_WIN32)
 425                                 return (T*)VirtualAlloc(NULL, sizeof(T) * n, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
 426                         #else
 427                                 return (T*)mmap(nullptr, sizeof(T) * n, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
 428                         #endif
 429                 }
 430
 431                 void deallocate(T *p, size_type n)
 432                 {
 433                         #if defined(_WIN32)
 434                                 VirtualFree(p, 0, MEM_RELEASE);
 435                         #else
 436                                 munmap(p, sizeof(T) * n);
 437                         #endif
 438                 }
 439         };
 440
 441         class ELFMemoryStreamer : public Ice::ELFStreamer, public Routine
 442         {
 443                 ELFMemoryStreamer(const ELFMemoryStreamer &) = delete;
 444                 ELFMemoryStreamer &operator=(const ELFMemoryStreamer &) = delete;
 445
 446         public:
 447                 ELFMemoryStreamer() : Routine(), entry(nullptr)
 448                 {
 449                         position = 0;
 450                         buffer.reserve(0x1000);
 451                 }
 452
 453                 ~ELFMemoryStreamer() override
 454                 {
 455                         #if defined(_WIN32)
 456                                 if(buffer.size() != 0)
 457                                 {
 458                                         DWORD exeProtection;
 459                                         VirtualProtect(&buffer[0], buffer.size(), oldProtection, &exeProtection);
 460                                 }
 461                         #endif
 462                 }
 463
 464                 void write8(uint8_t Value) override
 465                 {
 466                         if(position == (uint64_t)buffer.size())
 467                         {
 468                                 buffer.push_back(Value);
 469                                 position++;
 470                         }
 471                         else if(position < (uint64_t)buffer.size())
 472                         {
 473                                 buffer[position] = Value;
 474                                 position++;
 475                         }
 476                         else assert(false && "UNIMPLEMENTED");
 477                 }
 478
 479                 void writeBytes(llvm::StringRef Bytes) override
 480                 {
 481                         std::size_t oldSize = buffer.size();
 482                         buffer.resize(oldSize + Bytes.size());
 483                         memcpy(&buffer[oldSize], Bytes.begin(), Bytes.size());
 484                         position += Bytes.size();
 485                 }
 486
 487                 uint64_t tell() const override { return position; }
 488
 489                 void seek(uint64_t Off) override { position = Off; }
 490
 491                 const void *getEntry() override
 492                 {
 493                         if(!entry)
 494                         {
 495                                 position = std::numeric_limits<std::size_t>::max();   // Can't stream more data after this
 496
 497                                 size_t codeSize = 0;
 498                                 entry = loadImage(&buffer[0], codeSize);
 499
 500                                 #if defined(_WIN32)
 501                                         VirtualProtect(&buffer[0], buffer.size(), PAGE_EXECUTE_READ, &oldProtection);
 502                                         FlushInstructionCache(GetCurrentProcess(), NULL, 0);
 503                                 #else
 504                                         mprotect(&buffer[0], buffer.size(), PROT_READ | PROT_EXEC);
 505                                         __builtin___clear_cache((char*)entry, (char*)entry + codeSize);
 506                                 #endif
 507                         }
 508
 509                         return entry;
 510                 }
 511
 512         private:
 513                 void *entry;
 514                 std::vector<uint8_t, ExecutableAllocator<uint8_t>> buffer;
 515                 std::size_t position;
 516
 517                 #if defined(_WIN32)
 518                 DWORD oldProtection;
 519                 #endif
 520         };
 521
 522         Nucleus::Nucleus()
 523         {
 524                 ::codegenMutex.lock();   // Reactor is currently not thread safe
 525
 526                 Ice::ClFlags &Flags = Ice::ClFlags::Flags;
 527                 Ice::ClFlags::getParsedClFlags(Flags);
 528
 529                 #if defined(__arm__)
 530                         Flags.setTargetArch(Ice::Target_ARM32);
 531                         Flags.setTargetInstructionSet(Ice::ARM32InstructionSet_HWDivArm);
 532                 #else   // x86
 533                         Flags.setTargetArch(sizeof(void*) == 8 ? Ice::Target_X8664 : Ice::Target_X8632);
 534                         Flags.setTargetInstructionSet(CPUID::SSE4_1 ? Ice::X86InstructionSet_SSE4_1 : Ice::X86InstructionSet_SSE2);
 535                 #endif
 536                 Flags.setOutFileType(Ice::FT_Elf);
 537                 Flags.setOptLevel(Ice::Opt_2);
 538                 Flags.setApplicationBinaryInterface(Ice::ABI_Platform);
 539                 Flags.setVerbose(false ? Ice::IceV_Most : Ice::IceV_None);
 540                 Flags.setDisableHybridAssembly(true);
 541
 542                 static llvm::raw_os_ostream cout(std::cout);
 543                 static llvm::raw_os_ostream cerr(std::cerr);
 544
 545                 if(false)   // Write out to a file
 546                 {
 547                         std::error_code errorCode;
 548                         ::out = new Ice::Fdstream("out.o", errorCode, llvm::sys::fs::F_None);
 549                         ::elfFile = new Ice::ELFFileStreamer(*out);
 550                         ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfFile);
 551                 }
 552                 else
 553                 {
 554                         ELFMemoryStreamer *elfMemory = new ELFMemoryStreamer();
 555                         ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfMemory);
 556                         ::routine = elfMemory;
 557                 }
 558         }
 559
 560         Nucleus::~Nucleus()
 561         {
 562                 delete ::routine;
 563
 564                 delete ::allocator;
 565                 delete ::function;
 566                 delete ::context;
 567
 568                 delete ::elfFile;
 569                 delete ::out;
 570
 571                 ::codegenMutex.unlock();
 572         }
 573
 574         Routine *Nucleus::acquireRoutine(const wchar_t *name, bool runOptimizations)
 575         {
 576                 if(basicBlock->getInsts().empty() || basicBlock->getInsts().back().getKind() != Ice::Inst::Ret)
 577                 {
 578                         createRetVoid();
 579                 }
 580
 581                 std::wstring wideName(name);
 582                 std::string asciiName(wideName.begin(), wideName.end());
 583                 ::function->setFunctionName(Ice::GlobalString::createWithString(::context, asciiName));
 584
 585                 optimize();
 586
 587                 ::function->translate();
 588                 assert(!::function->hasError());
 589
 590                 auto globals = ::function->getGlobalInits();
 591
 592                 if(globals && !globals->empty())
 593                 {
 594                         ::context->getGlobals()->merge(globals.get());
 595                 }
 596
 597                 ::context->emitFileHeader();
 598                 ::function->emitIAS();
 599                 auto assembler = ::function->releaseAssembler();
 600                 auto objectWriter = ::context->getObjectWriter();
 601                 assembler->alignFunction();
 602                 objectWriter->writeFunctionCode(::function->getFunctionName(), false, assembler.get());
 603                 ::context->lowerGlobals("last");
 604                 ::context->lowerConstants();
 605                 ::context->lowerJumpTables();
 606                 objectWriter->setUndefinedSyms(::context->getConstantExternSyms());
 607                 objectWriter->writeNonUserSections();
 608
 609                 Routine *handoffRoutine = ::routine;
 610                 ::routine = nullptr;
 611
 612                 return handoffRoutine;
 613         }
 614
 615         void Nucleus::optimize()
 616         {
 617                 sw::optimize(::function);
 618         }
 619
 620         Value *Nucleus::allocateStackVariable(Type *t, int arraySize)
 621         {
 622                 Ice::Type type = T(t);
 623                 int typeSize = Ice::typeWidthInBytes(type);
 624                 int totalSize = typeSize * (arraySize ? arraySize : 1);
 625
 626                 auto bytes = Ice::ConstantInteger32::create(::context, type, totalSize);
 627                 auto address = ::function->makeVariable(T(getPointerType(t)));
 628                 auto alloca = Ice::InstAlloca::create(::function, address, bytes, typeSize);
 629                 ::function->getEntryNode()->getInsts().push_front(alloca);
 630
 631                 return V(address);
 632         }
 633
 634         BasicBlock *Nucleus::createBasicBlock()
 635         {
 636                 return B(::function->makeNode());
 637         }
 638
 639         BasicBlock *Nucleus::getInsertBlock()
 640         {
 641                 return B(::basicBlock);
 642         }
 643
 644         void Nucleus::setInsertBlock(BasicBlock *basicBlock)
 645         {
 646         //      assert(::basicBlock->getInsts().back().getTerminatorEdges().size() >= 0 && "Previous basic block must have a terminator");
 647                 ::basicBlock = basicBlock;
 648         }
 649
 650         void Nucleus::createFunction(Type *ReturnType, std::vector<Type*> &Params)
 651         {
 652                 uint32_t sequenceNumber = 0;
 653                 ::function = Ice::Cfg::create(::context, sequenceNumber).release();
 654                 ::allocator = new Ice::CfgLocalAllocatorScope(::function);
 655
 656                 for(Type *type : Params)
 657                 {
 658                         Ice::Variable *arg = ::function->makeVariable(T(type));
 659                         ::function->addArg(arg);
 660                 }
 661
 662                 Ice::CfgNode *node = ::function->makeNode();
 663                 ::function->setEntryNode(node);
 664                 ::basicBlock = node;
 665         }
 666
 667         Value *Nucleus::getArgument(unsigned int index)
 668         {
 669                 return V(::function->getArgs()[index]);
 670         }
 671
 672         void Nucleus::createRetVoid()
 673         {
 674                 Ice::InstRet *ret = Ice::InstRet::create(::function);
 675                 ::basicBlock->appendInst(ret);
 676         }
 677
 678         void Nucleus::createRet(Value *v)
 679         {
 680                 Ice::InstRet *ret = Ice::InstRet::create(::function, v);
 681                 ::basicBlock->appendInst(ret);
 682         }
 683
 684         void Nucleus::createBr(BasicBlock *dest)
 685         {
 686                 auto br = Ice::InstBr::create(::function, dest);
 687                 ::basicBlock->appendInst(br);
 688         }
 689
 690         void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
 691         {
 692                 auto br = Ice::InstBr::create(::function, cond, ifTrue, ifFalse);
 693                 ::basicBlock->appendInst(br);
 694         }
 695
 696         static bool isCommutative(Ice::InstArithmetic::OpKind op)
 697         {
 698                 switch(op)
 699                 {
 700                 case Ice::InstArithmetic::Add:
 701                 case Ice::InstArithmetic::Fadd:
 702                 case Ice::InstArithmetic::Mul:
 703                 case Ice::InstArithmetic::Fmul:
 704                 case Ice::InstArithmetic::And:
 705                 case Ice::InstArithmetic::Or:
 706                 case Ice::InstArithmetic::Xor:
 707                         return true;
 708                 default:
 709                         return false;
 710                 }
 711         }
 712
 713         static Value *createArithmetic(Ice::InstArithmetic::OpKind op, Value *lhs, Value *rhs)
 714         {
 715                 assert(lhs->getType() == rhs->getType() || (llvm::isa<Ice::Constant>(rhs) && (op == Ice::InstArithmetic::Shl || Ice::InstArithmetic::Lshr || Ice::InstArithmetic::Ashr)));
 716
 717                 bool swapOperands = llvm::isa<Ice::Constant>(lhs) && isCommutative(op);
 718
 719                 Ice::Variable *result = ::function->makeVariable(lhs->getType());
 720                 Ice::InstArithmetic *arithmetic = Ice::InstArithmetic::create(::function, op, result, swapOperands ? rhs : lhs, swapOperands ? lhs : rhs);
 721                 ::basicBlock->appendInst(arithmetic);
 722
 723                 return V(result);
 724         }
 725
 726         Value *Nucleus::createAdd(Value *lhs, Value *rhs)
 727         {
 728                 return createArithmetic(Ice::InstArithmetic::Add, lhs, rhs);
 729         }
 730
 731         Value *Nucleus::createSub(Value *lhs, Value *rhs)
 732         {
 733                 return createArithmetic(Ice::InstArithmetic::Sub, lhs, rhs);
 734         }
 735
 736         Value *Nucleus::createMul(Value *lhs, Value *rhs)
 737         {
 738                 return createArithmetic(Ice::InstArithmetic::Mul, lhs, rhs);
 739         }
 740
 741         Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
 742         {
 743                 return createArithmetic(Ice::InstArithmetic::Udiv, lhs, rhs);
 744         }
 745
 746         Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
 747         {
 748                 return createArithmetic(Ice::InstArithmetic::Sdiv, lhs, rhs);
 749         }
 750
 751         Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
 752         {
 753                 return createArithmetic(Ice::InstArithmetic::Fadd, lhs, rhs);
 754         }
 755
 756         Value *Nucleus::createFSub(Value *lhs, Value *rhs)
 757         {
 758                 return createArithmetic(Ice::InstArithmetic::Fsub, lhs, rhs);
 759         }
 760
 761         Value *Nucleus::createFMul(Value *lhs, Value *rhs)
 762         {
 763                 return createArithmetic(Ice::InstArithmetic::Fmul, lhs, rhs);
 764         }
 765
 766         Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
 767         {
 768                 return createArithmetic(Ice::InstArithmetic::Fdiv, lhs, rhs);
 769         }
 770
 771         Value *Nucleus::createURem(Value *lhs, Value *rhs)
 772         {
 773                 return createArithmetic(Ice::InstArithmetic::Urem, lhs, rhs);
 774         }
 775
 776         Value *Nucleus::createSRem(Value *lhs, Value *rhs)
 777         {
 778                 return createArithmetic(Ice::InstArithmetic::Srem, lhs, rhs);
 779         }
 780
 781         Value *Nucleus::createFRem(Value *lhs, Value *rhs)
 782         {
 783                 return createArithmetic(Ice::InstArithmetic::Frem, lhs, rhs);
 784         }
 785
 786         Value *Nucleus::createShl(Value *lhs, Value *rhs)
 787         {
 788                 return createArithmetic(Ice::InstArithmetic::Shl, lhs, rhs);
 789         }
 790
 791         Value *Nucleus::createLShr(Value *lhs, Value *rhs)
 792         {
 793                 return createArithmetic(Ice::InstArithmetic::Lshr, lhs, rhs);
 794         }
 795
 796         Value *Nucleus::createAShr(Value *lhs, Value *rhs)
 797         {
 798                 return createArithmetic(Ice::InstArithmetic::Ashr, lhs, rhs);
 799         }
 800
 801         Value *Nucleus::createAnd(Value *lhs, Value *rhs)
 802         {
 803                 return createArithmetic(Ice::InstArithmetic::And, lhs, rhs);
 804         }
 805
 806         Value *Nucleus::createOr(Value *lhs, Value *rhs)
 807         {
 808                 return createArithmetic(Ice::InstArithmetic::Or, lhs, rhs);
 809         }
 810
 811         Value *Nucleus::createXor(Value *lhs, Value *rhs)
 812         {
 813                 return createArithmetic(Ice::InstArithmetic::Xor, lhs, rhs);
 814         }
 815
 816         Value *Nucleus::createNeg(Value *v)
 817         {
 818                 return createSub(createNullValue(T(v->getType())), v);
 819         }
 820
 821         Value *Nucleus::createFNeg(Value *v)
 822         {
 823                 double c[4] = {-0.0, -0.0, -0.0, -0.0};
 824                 Value *negativeZero = Ice::isVectorType(v->getType()) ?
 825                                       createConstantVector(c, T(v->getType())) :
 826                                       V(::context->getConstantFloat(-0.0f));
 827
 828                 return createFSub(negativeZero, v);
 829         }
 830
 831         Value *Nucleus::createNot(Value *v)
 832         {
 833                 if(Ice::isScalarIntegerType(v->getType()))
 834                 {
 835                         return createXor(v, V(::context->getConstantInt(v->getType(), -1)));
 836                 }
 837                 else   // Vector
 838                 {
 839                         int64_t c[16] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1};
 840                         return createXor(v, createConstantVector(c, T(v->getType())));
 841                 }
 842         }
 843
 844         Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int align)
 845         {
 846                 int valueType = (int)reinterpret_cast<intptr_t>(type);
 847                 Ice::Variable *result = ::function->makeVariable(T(type));
 848
 849                 if(valueType & EmulatedBits)
 850                 {
 851                         if(emulateIntrinsics)
 852                         {
 853                                 if(typeSize(type) == 4)
 854                                 {
 855                                         auto pointer = RValue<Pointer<Byte>>(ptr);
 856                                         Int x = *Pointer<Int>(pointer);
 857
 858                                         Int4 vector;
 859                                         vector = Insert(vector, x, 0);
 860
 861                                         auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
 862                                         ::basicBlock->appendInst(bitcast);
 863                                 }
 864                                 else if(typeSize(type) == 8)
 865                                 {
 866                                         auto pointer = RValue<Pointer<Byte>>(ptr);
 867                                         Int x = *Pointer<Int>(pointer);
 868                                         Int y = *Pointer<Int>(pointer + 4);
 869
 870                                         Int4 vector;
 871                                         vector = Insert(vector, x, 0);
 872                                         vector = Insert(vector, y, 1);
 873
 874                                         auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
 875                                         ::basicBlock->appendInst(bitcast);
 876                                 }
 877                                 else assert(false);
 878                         }
 879                         else
 880                         {
 881                                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::LoadSubVector, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
 882                                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
 883                                 auto load = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
 884                                 load->addArg(ptr);
 885                                 load->addArg(::context->getConstantInt32(typeSize(type)));
 886                                 ::basicBlock->appendInst(load);
 887                         }
 888                 }
 889                 else
 890                 {
 891                         auto load = Ice::InstLoad::create(::function, result, ptr, align);
 892                         ::basicBlock->appendInst(load);
 893                 }
 894
 895                 return V(result);
 896         }
 897
 898         Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int align)
 899         {
 900                 int valueType = (int)reinterpret_cast<intptr_t>(type);
 901
 902                 if(valueType & EmulatedBits)
 903                 {
 904                         if(emulateIntrinsics)
 905                         {
 906                                 if(typeSize(type) == 4)
 907                                 {
 908                                         Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
 909                                         auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
 910                                         ::basicBlock->appendInst(bitcast);
 911
 912                                         RValue<Int4> v(V(vector));
 913
 914                                         auto pointer = RValue<Pointer<Byte>>(ptr);
 915                                         Int x = Extract(v, 0);
 916                                         *Pointer<Int>(pointer) = x;
 917                                 }
 918                                 else if(typeSize(type) == 8)
 919                                 {
 920                                         Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
 921                                         auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
 922                                         ::basicBlock->appendInst(bitcast);
 923
 924                                         RValue<Int4> v(V(vector));
 925
 926                                         auto pointer = RValue<Pointer<Byte>>(ptr);
 927                                         Int x = Extract(v, 0);
 928                                         *Pointer<Int>(pointer) = x;
 929                                         Int y = Extract(v, 1);
 930                                         *Pointer<Int>(pointer + 4) = y;
 931                                 }
 932                                 else assert(false);
 933                         }
 934                         else
 935                         {
 936                                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::StoreSubVector, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T};
 937                                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
 938                                 auto store = Ice::InstIntrinsicCall::create(::function, 3, nullptr, target, intrinsic);
 939                                 store->addArg(value);
 940                                 store->addArg(ptr);
 941                                 store->addArg(::context->getConstantInt32(typeSize(type)));
 942                                 ::basicBlock->appendInst(store);
 943                         }
 944                 }
 945                 else
 946                 {
 947                         assert(T(value->getType()) == type);
 948
 949                         auto store = Ice::InstStore::create(::function, value, ptr, align);
 950                         ::basicBlock->appendInst(store);
 951                 }
 952
 953                 return value;
 954         }
 955
 956         Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex)
 957         {
 958                 assert(index->getType() == Ice::IceType_i32);
 959
 960                 if(auto *constant = llvm::dyn_cast<Ice::ConstantInteger32>(index))
 961                 {
 962                         int32_t offset = constant->getValue() * (int)typeSize(type);
 963
 964                         if(offset == 0)
 965                         {
 966                                 return ptr;
 967                         }
 968
 969                         return createAdd(ptr, createConstantInt(offset));
 970                 }
 971
 972                 if(!Ice::isByteSizedType(T(type)))
 973                 {
 974                         index = createMul(index, createConstantInt((int)typeSize(type)));
 975                 }
 976
 977                 if(sizeof(void*) == 8)
 978                 {
 979                         if(unsignedIndex)
 980                         {
 981                                 index = createZExt(index, T(Ice::IceType_i64));
 982                         }
 983                         else
 984                         {
 985                                 index = createSExt(index, T(Ice::IceType_i64));
 986                         }
 987                 }
 988
 989                 return createAdd(ptr, index);
 990         }
 991
 992         Value *Nucleus::createAtomicAdd(Value *ptr, Value *value)
 993         {
 994                 assert(false && "UNIMPLEMENTED"); return nullptr;
 995         }
 996
 997         static Value *createCast(Ice::InstCast::OpKind op, Value *v, Type *destType)
 998         {
 999                 if(v->getType() == T(destType))
1000                 {
1001                         return v;
1002                 }
1003
1004                 Ice::Variable *result = ::function->makeVariable(T(destType));
1005                 Ice::InstCast *cast = Ice::InstCast::create(::function, op, result, v);
1006                 ::basicBlock->appendInst(cast);
1007
1008                 return V(result);
1009         }
1010
1011         Value *Nucleus::createTrunc(Value *v, Type *destType)
1012         {
1013                 return createCast(Ice::InstCast::Trunc, v, destType);
1014         }
1015
1016         Value *Nucleus::createZExt(Value *v, Type *destType)
1017         {
1018                 return createCast(Ice::InstCast::Zext, v, destType);
1019         }
1020
1021         Value *Nucleus::createSExt(Value *v, Type *destType)
1022         {
1023                 return createCast(Ice::InstCast::Sext, v, destType);
1024         }
1025
1026         Value *Nucleus::createFPToSI(Value *v, Type *destType)
1027         {
1028                 return createCast(Ice::InstCast::Fptosi, v, destType);
1029         }
1030
1031         Value *Nucleus::createSIToFP(Value *v, Type *destType)
1032         {
1033                 return createCast(Ice::InstCast::Sitofp, v, destType);
1034         }
1035
1036         Value *Nucleus::createFPTrunc(Value *v, Type *destType)
1037         {
1038                 return createCast(Ice::InstCast::Fptrunc, v, destType);
1039         }
1040
1041         Value *Nucleus::createFPExt(Value *v, Type *destType)
1042         {
1043                 return createCast(Ice::InstCast::Fpext, v, destType);
1044         }
1045
1046         Value *Nucleus::createBitCast(Value *v, Type *destType)
1047         {
1048                 // Bitcasts must be between types of the same logical size. But with emulated narrow vectors we need
1049                 // support for casting between scalars and wide vectors. For platforms where this is not supported,
1050                 // emulate them by writing to the stack and reading back as the destination type.
1051                 if(emulateMismatchedBitCast)
1052                 {
1053                         if(!Ice::isVectorType(v->getType()) && Ice::isVectorType(T(destType)))
1054                         {
1055                                 Value *address = allocateStackVariable(destType);
1056                                 createStore(v, address, T(v->getType()));
1057                                 return createLoad(address, destType);
1058                         }
1059                         else if(Ice::isVectorType(v->getType()) && !Ice::isVectorType(T(destType)))
1060                         {
1061                                 Value *address = allocateStackVariable(T(v->getType()));
1062                                 createStore(v, address, T(v->getType()));
1063                                 return createLoad(address, destType);
1064                         }
1065                 }
1066
1067                 return createCast(Ice::InstCast::Bitcast, v, destType);
1068         }
1069
1070         static Value *createIntCompare(Ice::InstIcmp::ICond condition, Value *lhs, Value *rhs)
1071         {
1072                 assert(lhs->getType() == rhs->getType());
1073
1074                 auto result = ::function->makeVariable(Ice::isScalarIntegerType(lhs->getType()) ? Ice::IceType_i1 : lhs->getType());
1075                 auto cmp = Ice::InstIcmp::create(::function, condition, result, lhs, rhs);
1076                 ::basicBlock->appendInst(cmp);
1077
1078                 return V(result);
1079         }
1080
1081         Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
1082         {
1083                 return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
1084         }
1085
1086         Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
1087         {
1088                 return createIntCompare(Ice::InstIcmp::Ne, lhs, rhs);
1089         }
1090
1091         Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
1092         {
1093                 return createIntCompare(Ice::InstIcmp::Ugt, lhs, rhs);
1094         }
1095
1096         Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
1097         {
1098                 return createIntCompare(Ice::InstIcmp::Uge, lhs, rhs);
1099         }
1100
1101         Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
1102         {
1103                 return createIntCompare(Ice::InstIcmp::Ult, lhs, rhs);
1104         }
1105
1106         Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
1107         {
1108                 return createIntCompare(Ice::InstIcmp::Ule, lhs, rhs);
1109         }
1110
1111         Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
1112         {
1113                 return createIntCompare(Ice::InstIcmp::Sgt, lhs, rhs);
1114         }
1115
1116         Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
1117         {
1118                 return createIntCompare(Ice::InstIcmp::Sge, lhs, rhs);
1119         }
1120
1121         Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
1122         {
1123                 return createIntCompare(Ice::InstIcmp::Slt, lhs, rhs);
1124         }
1125
1126         Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
1127         {
1128                 return createIntCompare(Ice::InstIcmp::Sle, lhs, rhs);
1129         }
1130
1131         static Value *createFloatCompare(Ice::InstFcmp::FCond condition, Value *lhs, Value *rhs)
1132         {
1133                 assert(lhs->getType() == rhs->getType());
1134                 assert(Ice::isScalarFloatingType(lhs->getType()) || lhs->getType() == Ice::IceType_v4f32);
1135
1136                 auto result = ::function->makeVariable(Ice::isScalarFloatingType(lhs->getType()) ? Ice::IceType_i1 : Ice::IceType_v4i32);
1137                 auto cmp = Ice::InstFcmp::create(::function, condition, result, lhs, rhs);
1138                 ::basicBlock->appendInst(cmp);
1139
1140                 return V(result);
1141         }
1142
1143         Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
1144         {
1145                 return createFloatCompare(Ice::InstFcmp::Oeq, lhs, rhs);
1146         }
1147
1148         Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
1149         {
1150                 return createFloatCompare(Ice::InstFcmp::Ogt, lhs, rhs);
1151         }
1152
1153         Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
1154         {
1155                 return createFloatCompare(Ice::InstFcmp::Oge, lhs, rhs);
1156         }
1157
1158         Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
1159         {
1160                 return createFloatCompare(Ice::InstFcmp::Olt, lhs, rhs);
1161         }
1162
1163         Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
1164         {
1165                 return createFloatCompare(Ice::InstFcmp::Ole, lhs, rhs);
1166         }
1167
1168         Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
1169         {
1170                 return createFloatCompare(Ice::InstFcmp::One, lhs, rhs);
1171         }
1172
1173         Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
1174         {
1175                 return createFloatCompare(Ice::InstFcmp::Ord, lhs, rhs);
1176         }
1177
1178         Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
1179         {
1180                 return createFloatCompare(Ice::InstFcmp::Uno, lhs, rhs);
1181         }
1182
1183         Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
1184         {
1185                 return createFloatCompare(Ice::InstFcmp::Ueq, lhs, rhs);
1186         }
1187
1188         Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
1189         {
1190                 return createFloatCompare(Ice::InstFcmp::Ugt, lhs, rhs);
1191         }
1192
1193         Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
1194         {
1195                 return createFloatCompare(Ice::InstFcmp::Uge, lhs, rhs);
1196         }
1197
1198         Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
1199         {
1200                 return createFloatCompare(Ice::InstFcmp::Ult, lhs, rhs);
1201         }
1202
1203         Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
1204         {
1205                 return createFloatCompare(Ice::InstFcmp::Ule, lhs, rhs);
1206         }
1207
1208         Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
1209         {
1210                 return createFloatCompare(Ice::InstFcmp::Une, lhs, rhs);
1211         }
1212
1213         Value *Nucleus::createExtractElement(Value *vector, Type *type, int index)
1214         {
1215                 auto result = ::function->makeVariable(T(type));
1216                 auto extract = Ice::InstExtractElement::create(::function, result, vector, ::context->getConstantInt32(index));
1217                 ::basicBlock->appendInst(extract);
1218
1219                 return V(result);
1220         }
1221
1222         Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
1223         {
1224                 auto result = ::function->makeVariable(vector->getType());
1225                 auto insert = Ice::InstInsertElement::create(::function, result, vector, element, ::context->getConstantInt32(index));
1226                 ::basicBlock->appendInst(insert);
1227
1228                 return V(result);
1229         }
1230
1231         Value *Nucleus::createShuffleVector(Value *V1, Value *V2, const int *select)
1232         {
1233                 assert(V1->getType() == V2->getType());
1234
1235                 int size = Ice::typeNumElements(V1->getType());
1236                 auto result = ::function->makeVariable(V1->getType());
1237                 auto shuffle = Ice::InstShuffleVector::create(::function, result, V1, V2);
1238
1239                 for(int i = 0; i < size; i++)
1240                 {
1241                         shuffle->addIndex(llvm::cast<Ice::ConstantInteger32>(::context->getConstantInt32(select[i])));
1242                 }
1243
1244                 ::basicBlock->appendInst(shuffle);
1245
1246                 return V(result);
1247         }
1248
1249         Value *Nucleus::createSelect(Value *C, Value *ifTrue, Value *ifFalse)
1250         {
1251                 assert(ifTrue->getType() == ifFalse->getType());
1252
1253                 auto result = ::function->makeVariable(ifTrue->getType());
1254                 auto *select = Ice::InstSelect::create(::function, result, C, ifTrue, ifFalse);
1255                 ::basicBlock->appendInst(select);
1256
1257                 return V(result);
1258         }
1259
1260         SwitchCases *Nucleus::createSwitch(Value *control, BasicBlock *defaultBranch, unsigned numCases)
1261         {
1262                 auto switchInst = Ice::InstSwitch::create(::function, numCases, control, defaultBranch);
1263                 ::basicBlock->appendInst(switchInst);
1264
1265                 return reinterpret_cast<SwitchCases*>(switchInst);
1266         }
1267
1268         void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch)
1269         {
1270                 switchCases->addBranch(label, label, branch);
1271         }
1272
1273         void Nucleus::createUnreachable()
1274         {
1275                 Ice::InstUnreachable *unreachable = Ice::InstUnreachable::create(::function);
1276                 ::basicBlock->appendInst(unreachable);
1277         }
1278
1279         static Value *createSwizzle4(Value *val, unsigned char select)
1280         {
1281                 int swizzle[4] =
1282                 {
1283                         (select >> 0) & 0x03,
1284                         (select >> 2) & 0x03,
1285                         (select >> 4) & 0x03,
1286                         (select >> 6) & 0x03,
1287                 };
1288
1289                 return Nucleus::createShuffleVector(val, val, swizzle);
1290         }
1291
1292         static Value *createMask4(Value *lhs, Value *rhs, unsigned char select)
1293         {
1294                 int64_t mask[4] = {0, 0, 0, 0};
1295
1296                 mask[(select >> 0) & 0x03] = -1;
1297                 mask[(select >> 2) & 0x03] = -1;
1298                 mask[(select >> 4) & 0x03] = -1;
1299                 mask[(select >> 6) & 0x03] = -1;
1300
1301                 Value *condition = Nucleus::createConstantVector(mask, T(Ice::IceType_v4i1));
1302                 Value *result = Nucleus::createSelect(condition, rhs, lhs);
1303
1304                 return result;
1305         }
1306
1307         Type *Nucleus::getPointerType(Type *ElementType)
1308         {
1309                 if(sizeof(void*) == 8)
1310                 {
1311                         return T(Ice::IceType_i64);
1312                 }
1313                 else
1314                 {
1315                         return T(Ice::IceType_i32);
1316                 }
1317         }
1318
1319         Value *Nucleus::createNullValue(Type *Ty)
1320         {
1321                 if(Ice::isVectorType(T(Ty)))
1322                 {
1323                         assert(Ice::typeNumElements(T(Ty)) <= 16);
1324                         int64_t c[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
1325                         return createConstantVector(c, Ty);
1326                 }
1327                 else
1328                 {
1329                         return V(::context->getConstantZero(T(Ty)));
1330                 }
1331         }
1332
1333         Value *Nucleus::createConstantLong(int64_t i)
1334         {
1335                 return V(::context->getConstantInt64(i));
1336         }
1337
1338         Value *Nucleus::createConstantInt(int i)
1339         {
1340                 return V(::context->getConstantInt32(i));
1341         }
1342
1343         Value *Nucleus::createConstantInt(unsigned int i)
1344         {
1345                 return V(::context->getConstantInt32(i));
1346         }
1347
1348         Value *Nucleus::createConstantBool(bool b)
1349         {
1350                 return V(::context->getConstantInt1(b));
1351         }
1352
1353         Value *Nucleus::createConstantByte(signed char i)
1354         {
1355                 return V(::context->getConstantInt8(i));
1356         }
1357
1358         Value *Nucleus::createConstantByte(unsigned char i)
1359         {
1360                 return V(::context->getConstantInt8(i));
1361         }
1362
1363         Value *Nucleus::createConstantShort(short i)
1364         {
1365                 return V(::context->getConstantInt16(i));
1366         }
1367
1368         Value *Nucleus::createConstantShort(unsigned short i)
1369         {
1370                 return V(::context->getConstantInt16(i));
1371         }
1372
1373         Value *Nucleus::createConstantFloat(float x)
1374         {
1375                 return V(::context->getConstantFloat(x));
1376         }
1377
1378         Value *Nucleus::createNullPointer(Type *Ty)
1379         {
1380                 return createNullValue(T(sizeof(void*) == 8 ? Ice::IceType_i64 : Ice::IceType_i32));
1381         }
1382
1383         Value *Nucleus::createConstantVector(const int64_t *constants, Type *type)
1384         {
1385                 const int vectorSize = 16;
1386                 assert(Ice::typeWidthInBytes(T(type)) == vectorSize);
1387                 const int alignment = vectorSize;
1388                 auto globalPool = ::function->getGlobalPool();
1389
1390                 const int64_t *i = constants;
1391                 const double *f = reinterpret_cast<const double*>(constants);
1392                 Ice::VariableDeclaration::DataInitializer *dataInitializer = nullptr;
1393
1394                 switch((int)reinterpret_cast<intptr_t>(type))
1395                 {
1396                 case Ice::IceType_v4i32:
1397                 case Ice::IceType_v4i1:
1398                         {
1399                                 const int initializer[4] = {(int)i[0], (int)i[1], (int)i[2], (int)i[3]};
1400                                 static_assert(sizeof(initializer) == vectorSize, "!");
1401                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1402                         }
1403                         break;
1404                 case Ice::IceType_v4f32:
1405                         {
1406                                 const float initializer[4] = {(float)f[0], (float)f[1], (float)f[2], (float)f[3]};
1407                                 static_assert(sizeof(initializer) == vectorSize, "!");
1408                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1409                         }
1410                         break;
1411                 case Ice::IceType_v8i16:
1412                 case Ice::IceType_v8i1:
1413                         {
1414                                 const short initializer[8] = {(short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[4], (short)i[5], (short)i[6], (short)i[7]};
1415                                 static_assert(sizeof(initializer) == vectorSize, "!");
1416                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1417                         }
1418                         break;
1419                 case Ice::IceType_v16i8:
1420                 case Ice::IceType_v16i1:
1421                         {
1422                                 const char initializer[16] = {(char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[8], (char)i[9], (char)i[10], (char)i[11], (char)i[12], (char)i[13], (char)i[14], (char)i[15]};
1423                                 static_assert(sizeof(initializer) == vectorSize, "!");
1424                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1425                         }
1426                         break;
1427                 case Type_v2i32:
1428                         {
1429                                 const int initializer[4] = {(int)i[0], (int)i[1], (int)i[0], (int)i[1]};
1430                                 static_assert(sizeof(initializer) == vectorSize, "!");
1431                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1432                         }
1433                         break;
1434                 case Type_v2f32:
1435                         {
1436                                 const float initializer[4] = {(float)f[0], (float)f[1], (float)f[0], (float)f[1]};
1437                                 static_assert(sizeof(initializer) == vectorSize, "!");
1438                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1439                         }
1440                         break;
1441                 case Type_v4i16:
1442                         {
1443                                 const short initializer[8] = {(short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[0], (short)i[1], (short)i[2], (short)i[3]};
1444                                 static_assert(sizeof(initializer) == vectorSize, "!");
1445                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1446                         }
1447                         break;
1448                 case Type_v8i8:
1449                         {
1450                                 const char initializer[16] = {(char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7]};
1451                                 static_assert(sizeof(initializer) == vectorSize, "!");
1452                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1453                         }
1454                         break;
1455                 case Type_v4i8:
1456                         {
1457                                 const char initializer[16] = {(char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3]};
1458                                 static_assert(sizeof(initializer) == vectorSize, "!");
1459                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1460                         }
1461                         break;
1462                 default:
1463                         assert(false && "Unknown constant vector type" && type);
1464                 }
1465
1466                 auto name = Ice::GlobalString::createWithoutString(::context);
1467                 auto *variableDeclaration = Ice::VariableDeclaration::create(globalPool);
1468                 variableDeclaration->setName(name);
1469                 variableDeclaration->setAlignment(alignment);
1470                 variableDeclaration->setIsConstant(true);
1471                 variableDeclaration->addInitializer(dataInitializer);
1472
1473                 ::function->addGlobal(variableDeclaration);
1474
1475                 constexpr int32_t offset = 0;
1476                 Ice::Operand *ptr = ::context->getConstantSym(offset, name);
1477
1478                 Ice::Variable *result = ::function->makeVariable(T(type));
1479                 auto load = Ice::InstLoad::create(::function, result, ptr, alignment);
1480                 ::basicBlock->appendInst(load);
1481
1482                 return V(result);
1483         }
1484
1485         Value *Nucleus::createConstantVector(const double *constants, Type *type)
1486         {
1487                 return createConstantVector((const int64_t*)constants, type);
1488         }
1489
1490         Type *Void::getType()
1491         {
1492                 return T(Ice::IceType_void);
1493         }
1494
1495         Bool::Bool(Argument<Bool> argument)
1496         {
1497                 storeValue(argument.value);
1498         }
1499
1500         Bool::Bool(bool x)
1501         {
1502                 storeValue(Nucleus::createConstantBool(x));
1503         }
1504
1505         Bool::Bool(RValue<Bool> rhs)
1506         {
1507                 storeValue(rhs.value);
1508         }
1509
1510         Bool::Bool(const Bool &rhs)
1511         {
1512                 Value *value = rhs.loadValue();
1513                 storeValue(value);
1514         }
1515
1516         Bool::Bool(const Reference<Bool> &rhs)
1517         {
1518                 Value *value = rhs.loadValue();
1519                 storeValue(value);
1520         }
1521
1522         RValue<Bool> Bool::operator=(RValue<Bool> rhs)
1523         {
1524                 storeValue(rhs.value);
1525
1526                 return rhs;
1527         }
1528
1529         RValue<Bool> Bool::operator=(const Bool &rhs)
1530         {
1531                 Value *value = rhs.loadValue();
1532                 storeValue(value);
1533
1534                 return RValue<Bool>(value);
1535         }
1536
1537         RValue<Bool> Bool::operator=(const Reference<Bool> &rhs)
1538         {
1539                 Value *value = rhs.loadValue();
1540                 storeValue(value);
1541
1542                 return RValue<Bool>(value);
1543         }
1544
1545         RValue<Bool> operator!(RValue<Bool> val)
1546         {
1547                 return RValue<Bool>(Nucleus::createNot(val.value));
1548         }
1549
1550         RValue<Bool> operator&&(RValue<Bool> lhs, RValue<Bool> rhs)
1551         {
1552                 return RValue<Bool>(Nucleus::createAnd(lhs.value, rhs.value));
1553         }
1554
1555         RValue<Bool> operator||(RValue<Bool> lhs, RValue<Bool> rhs)
1556         {
1557                 return RValue<Bool>(Nucleus::createOr(lhs.value, rhs.value));
1558         }
1559
1560         Type *Bool::getType()
1561         {
1562                 return T(Ice::IceType_i1);
1563         }
1564
1565         Byte::Byte(Argument<Byte> argument)
1566         {
1567                 storeValue(argument.value);
1568         }
1569
1570         Byte::Byte(RValue<Int> cast)
1571         {
1572                 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
1573
1574                 storeValue(integer);
1575         }
1576
1577         Byte::Byte(RValue<UInt> cast)
1578         {
1579                 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
1580
1581                 storeValue(integer);
1582         }
1583
1584         Byte::Byte(RValue<UShort> cast)
1585         {
1586                 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
1587
1588                 storeValue(integer);
1589         }
1590
1591         Byte::Byte(int x)
1592         {
1593                 storeValue(Nucleus::createConstantByte((unsigned char)x));
1594         }
1595
1596         Byte::Byte(unsigned char x)
1597         {
1598                 storeValue(Nucleus::createConstantByte(x));
1599         }
1600
1601         Byte::Byte(RValue<Byte> rhs)
1602         {
1603                 storeValue(rhs.value);
1604         }
1605
1606         Byte::Byte(const Byte &rhs)
1607         {
1608                 Value *value = rhs.loadValue();
1609                 storeValue(value);
1610         }
1611
1612         Byte::Byte(const Reference<Byte> &rhs)
1613         {
1614                 Value *value = rhs.loadValue();
1615                 storeValue(value);
1616         }
1617
1618         RValue<Byte> Byte::operator=(RValue<Byte> rhs)
1619         {
1620                 storeValue(rhs.value);
1621
1622                 return rhs;
1623         }
1624
1625         RValue<Byte> Byte::operator=(const Byte &rhs)
1626         {
1627                 Value *value = rhs.loadValue();
1628                 storeValue(value);
1629
1630                 return RValue<Byte>(value);
1631         }
1632
1633         RValue<Byte> Byte::operator=(const Reference<Byte> &rhs)
1634         {
1635                 Value *value = rhs.loadValue();
1636                 storeValue(value);
1637
1638                 return RValue<Byte>(value);
1639         }
1640
1641         RValue<Byte> operator+(RValue<Byte> lhs, RValue<Byte> rhs)
1642         {
1643                 return RValue<Byte>(Nucleus::createAdd(lhs.value, rhs.value));
1644         }
1645
1646         RValue<Byte> operator-(RValue<Byte> lhs, RValue<Byte> rhs)
1647         {
1648                 return RValue<Byte>(Nucleus::createSub(lhs.value, rhs.value));
1649         }
1650
1651         RValue<Byte> operator*(RValue<Byte> lhs, RValue<Byte> rhs)
1652         {
1653                 return RValue<Byte>(Nucleus::createMul(lhs.value, rhs.value));
1654         }
1655
1656         RValue<Byte> operator/(RValue<Byte> lhs, RValue<Byte> rhs)
1657         {
1658                 return RValue<Byte>(Nucleus::createUDiv(lhs.value, rhs.value));
1659         }
1660
1661         RValue<Byte> operator%(RValue<Byte> lhs, RValue<Byte> rhs)
1662         {
1663                 return RValue<Byte>(Nucleus::createURem(lhs.value, rhs.value));
1664         }
1665
1666         RValue<Byte> operator&(RValue<Byte> lhs, RValue<Byte> rhs)
1667         {
1668                 return RValue<Byte>(Nucleus::createAnd(lhs.value, rhs.value));
1669         }
1670
1671         RValue<Byte> operator|(RValue<Byte> lhs, RValue<Byte> rhs)
1672         {
1673                 return RValue<Byte>(Nucleus::createOr(lhs.value, rhs.value));
1674         }
1675
1676         RValue<Byte> operator^(RValue<Byte> lhs, RValue<Byte> rhs)
1677         {
1678                 return RValue<Byte>(Nucleus::createXor(lhs.value, rhs.value));
1679         }
1680
1681         RValue<Byte> operator<<(RValue<Byte> lhs, RValue<Byte> rhs)
1682         {
1683                 return RValue<Byte>(Nucleus::createShl(lhs.value, rhs.value));
1684         }
1685
1686         RValue<Byte> operator>>(RValue<Byte> lhs, RValue<Byte> rhs)
1687         {
1688                 return RValue<Byte>(Nucleus::createLShr(lhs.value, rhs.value));
1689         }
1690
1691         RValue<Byte> operator+=(Byte &lhs, RValue<Byte> rhs)
1692         {
1693                 return lhs = lhs + rhs;
1694         }
1695
1696         RValue<Byte> operator-=(Byte &lhs, RValue<Byte> rhs)
1697         {
1698                 return lhs = lhs - rhs;
1699         }
1700
1701         RValue<Byte> operator*=(Byte &lhs, RValue<Byte> rhs)
1702         {
1703                 return lhs = lhs * rhs;
1704         }
1705
1706         RValue<Byte> operator/=(Byte &lhs, RValue<Byte> rhs)
1707         {
1708                 return lhs = lhs / rhs;
1709         }
1710
1711         RValue<Byte> operator%=(Byte &lhs, RValue<Byte> rhs)
1712         {
1713                 return lhs = lhs % rhs;
1714         }
1715
1716         RValue<Byte> operator&=(Byte &lhs, RValue<Byte> rhs)
1717         {
1718                 return lhs = lhs & rhs;
1719         }
1720
1721         RValue<Byte> operator|=(Byte &lhs, RValue<Byte> rhs)
1722         {
1723                 return lhs = lhs | rhs;
1724         }
1725
1726         RValue<Byte> operator^=(Byte &lhs, RValue<Byte> rhs)
1727         {
1728                 return lhs = lhs ^ rhs;
1729         }
1730
1731         RValue<Byte> operator<<=(Byte &lhs, RValue<Byte> rhs)
1732         {
1733                 return lhs = lhs << rhs;
1734         }
1735
1736         RValue<Byte> operator>>=(Byte &lhs, RValue<Byte> rhs)
1737         {
1738                 return lhs = lhs >> rhs;
1739         }
1740
1741         RValue<Byte> operator+(RValue<Byte> val)
1742         {
1743                 return val;
1744         }
1745
1746         RValue<Byte> operator-(RValue<Byte> val)
1747         {
1748                 return RValue<Byte>(Nucleus::createNeg(val.value));
1749         }
1750
1751         RValue<Byte> operator~(RValue<Byte> val)
1752         {
1753                 return RValue<Byte>(Nucleus::createNot(val.value));
1754         }
1755
1756         RValue<Byte> operator++(Byte &val, int)   // Post-increment
1757         {
1758                 RValue<Byte> res = val;
1759                 val += Byte(1);
1760                 return res;
1761         }
1762
1763         const Byte &operator++(Byte &val)   // Pre-increment
1764         {
1765                 val += Byte(1);
1766                 return val;
1767         }
1768
1769         RValue<Byte> operator--(Byte &val, int)   // Post-decrement
1770         {
1771                 RValue<Byte> res = val;
1772                 val -= Byte(1);
1773                 return res;
1774         }
1775
1776         const Byte &operator--(Byte &val)   // Pre-decrement
1777         {
1778                 val -= Byte(1);
1779                 return val;
1780         }
1781
1782         RValue<Bool> operator<(RValue<Byte> lhs, RValue<Byte> rhs)
1783         {
1784                 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
1785         }
1786
1787         RValue<Bool> operator<=(RValue<Byte> lhs, RValue<Byte> rhs)
1788         {
1789                 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
1790         }
1791
1792         RValue<Bool> operator>(RValue<Byte> lhs, RValue<Byte> rhs)
1793         {
1794                 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
1795         }
1796
1797         RValue<Bool> operator>=(RValue<Byte> lhs, RValue<Byte> rhs)
1798         {
1799                 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
1800         }
1801
1802         RValue<Bool> operator!=(RValue<Byte> lhs, RValue<Byte> rhs)
1803         {
1804                 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
1805         }
1806
1807         RValue<Bool> operator==(RValue<Byte> lhs, RValue<Byte> rhs)
1808         {
1809                 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
1810         }
1811
1812         Type *Byte::getType()
1813         {
1814                 return T(Ice::IceType_i8);
1815         }
1816
1817         SByte::SByte(Argument<SByte> argument)
1818         {
1819                 storeValue(argument.value);
1820         }
1821
1822         SByte::SByte(RValue<Int> cast)
1823         {
1824                 Value *integer = Nucleus::createTrunc(cast.value, SByte::getType());
1825
1826                 storeValue(integer);
1827         }
1828
1829         SByte::SByte(RValue<Short> cast)
1830         {
1831                 Value *integer = Nucleus::createTrunc(cast.value, SByte::getType());
1832
1833                 storeValue(integer);
1834         }
1835
1836         SByte::SByte(signed char x)
1837         {
1838                 storeValue(Nucleus::createConstantByte(x));
1839         }
1840
1841         SByte::SByte(RValue<SByte> rhs)
1842         {
1843                 storeValue(rhs.value);
1844         }
1845
1846         SByte::SByte(const SByte &rhs)
1847         {
1848                 Value *value = rhs.loadValue();
1849                 storeValue(value);
1850         }
1851
1852         SByte::SByte(const Reference<SByte> &rhs)
1853         {
1854                 Value *value = rhs.loadValue();
1855                 storeValue(value);
1856         }
1857
1858         RValue<SByte> SByte::operator=(RValue<SByte> rhs)
1859         {
1860                 storeValue(rhs.value);
1861
1862                 return rhs;
1863         }
1864
1865         RValue<SByte> SByte::operator=(const SByte &rhs)
1866         {
1867                 Value *value = rhs.loadValue();
1868                 storeValue(value);
1869
1870                 return RValue<SByte>(value);
1871         }
1872
1873         RValue<SByte> SByte::operator=(const Reference<SByte> &rhs)
1874         {
1875                 Value *value = rhs.loadValue();
1876                 storeValue(value);
1877
1878                 return RValue<SByte>(value);
1879         }
1880
1881         RValue<SByte> operator+(RValue<SByte> lhs, RValue<SByte> rhs)
1882         {
1883                 return RValue<SByte>(Nucleus::createAdd(lhs.value, rhs.value));
1884         }
1885
1886         RValue<SByte> operator-(RValue<SByte> lhs, RValue<SByte> rhs)
1887         {
1888                 return RValue<SByte>(Nucleus::createSub(lhs.value, rhs.value));
1889         }
1890
1891         RValue<SByte> operator*(RValue<SByte> lhs, RValue<SByte> rhs)
1892         {
1893                 return RValue<SByte>(Nucleus::createMul(lhs.value, rhs.value));
1894         }
1895
1896         RValue<SByte> operator/(RValue<SByte> lhs, RValue<SByte> rhs)
1897         {
1898                 return RValue<SByte>(Nucleus::createSDiv(lhs.value, rhs.value));
1899         }
1900
1901         RValue<SByte> operator%(RValue<SByte> lhs, RValue<SByte> rhs)
1902         {
1903                 return RValue<SByte>(Nucleus::createSRem(lhs.value, rhs.value));
1904         }
1905
1906         RValue<SByte> operator&(RValue<SByte> lhs, RValue<SByte> rhs)
1907         {
1908                 return RValue<SByte>(Nucleus::createAnd(lhs.value, rhs.value));
1909         }
1910
1911         RValue<SByte> operator|(RValue<SByte> lhs, RValue<SByte> rhs)
1912         {
1913                 return RValue<SByte>(Nucleus::createOr(lhs.value, rhs.value));
1914         }
1915
1916         RValue<SByte> operator^(RValue<SByte> lhs, RValue<SByte> rhs)
1917         {
1918                 return RValue<SByte>(Nucleus::createXor(lhs.value, rhs.value));
1919         }
1920
1921         RValue<SByte> operator<<(RValue<SByte> lhs, RValue<SByte> rhs)
1922         {
1923                 return RValue<SByte>(Nucleus::createShl(lhs.value, rhs.value));
1924         }
1925
1926         RValue<SByte> operator>>(RValue<SByte> lhs, RValue<SByte> rhs)
1927         {
1928                 return RValue<SByte>(Nucleus::createAShr(lhs.value, rhs.value));
1929         }
1930
1931         RValue<SByte> operator+=(SByte &lhs, RValue<SByte> rhs)
1932         {
1933                 return lhs = lhs + rhs;
1934         }
1935
1936         RValue<SByte> operator-=(SByte &lhs, RValue<SByte> rhs)
1937         {
1938                 return lhs = lhs - rhs;
1939         }
1940
1941         RValue<SByte> operator*=(SByte &lhs, RValue<SByte> rhs)
1942         {
1943                 return lhs = lhs * rhs;
1944         }
1945
1946         RValue<SByte> operator/=(SByte &lhs, RValue<SByte> rhs)
1947         {
1948                 return lhs = lhs / rhs;
1949         }
1950
1951         RValue<SByte> operator%=(SByte &lhs, RValue<SByte> rhs)
1952         {
1953                 return lhs = lhs % rhs;
1954         }
1955
1956         RValue<SByte> operator&=(SByte &lhs, RValue<SByte> rhs)
1957         {
1958                 return lhs = lhs & rhs;
1959         }
1960
1961         RValue<SByte> operator|=(SByte &lhs, RValue<SByte> rhs)
1962         {
1963                 return lhs = lhs | rhs;
1964         }
1965
1966         RValue<SByte> operator^=(SByte &lhs, RValue<SByte> rhs)
1967         {
1968                 return lhs = lhs ^ rhs;
1969         }
1970
1971         RValue<SByte> operator<<=(SByte &lhs, RValue<SByte> rhs)
1972         {
1973                 return lhs = lhs << rhs;
1974         }
1975
1976         RValue<SByte> operator>>=(SByte &lhs, RValue<SByte> rhs)
1977         {
1978                 return lhs = lhs >> rhs;
1979         }
1980
1981         RValue<SByte> operator+(RValue<SByte> val)
1982         {
1983                 return val;
1984         }
1985
1986         RValue<SByte> operator-(RValue<SByte> val)
1987         {
1988                 return RValue<SByte>(Nucleus::createNeg(val.value));
1989         }
1990
1991         RValue<SByte> operator~(RValue<SByte> val)
1992         {
1993                 return RValue<SByte>(Nucleus::createNot(val.value));
1994         }
1995
1996         RValue<SByte> operator++(SByte &val, int)   // Post-increment
1997         {
1998                 RValue<SByte> res = val;
1999                 val += SByte(1);
2000                 return res;
2001         }
2002
2003         const SByte &operator++(SByte &val)   // Pre-increment
2004         {
2005                 val += SByte(1);
2006                 return val;
2007         }
2008
2009         RValue<SByte> operator--(SByte &val, int)   // Post-decrement
2010         {
2011                 RValue<SByte> res = val;
2012                 val -= SByte(1);
2013                 return res;
2014         }
2015
2016         const SByte &operator--(SByte &val)   // Pre-decrement
2017         {
2018                 val -= SByte(1);
2019                 return val;
2020         }
2021
2022         RValue<Bool> operator<(RValue<SByte> lhs, RValue<SByte> rhs)
2023         {
2024                 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
2025         }
2026
2027         RValue<Bool> operator<=(RValue<SByte> lhs, RValue<SByte> rhs)
2028         {
2029                 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
2030         }
2031
2032         RValue<Bool> operator>(RValue<SByte> lhs, RValue<SByte> rhs)
2033         {
2034                 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
2035         }
2036
2037         RValue<Bool> operator>=(RValue<SByte> lhs, RValue<SByte> rhs)
2038         {
2039                 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
2040         }
2041
2042         RValue<Bool> operator!=(RValue<SByte> lhs, RValue<SByte> rhs)
2043         {
2044                 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
2045         }
2046
2047         RValue<Bool> operator==(RValue<SByte> lhs, RValue<SByte> rhs)
2048         {
2049                 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
2050         }
2051
2052         Type *SByte::getType()
2053         {
2054                 return T(Ice::IceType_i8);
2055         }
2056
2057         Short::Short(Argument<Short> argument)
2058         {
2059                 storeValue(argument.value);
2060         }
2061
2062         Short::Short(RValue<Int> cast)
2063         {
2064                 Value *integer = Nucleus::createTrunc(cast.value, Short::getType());
2065
2066                 storeValue(integer);
2067         }
2068
2069         Short::Short(short x)
2070         {
2071                 storeValue(Nucleus::createConstantShort(x));
2072         }
2073
2074         Short::Short(RValue<Short> rhs)
2075         {
2076                 storeValue(rhs.value);
2077         }
2078
2079         Short::Short(const Short &rhs)
2080         {
2081                 Value *value = rhs.loadValue();
2082                 storeValue(value);
2083         }
2084
2085         Short::Short(const Reference<Short> &rhs)
2086         {
2087                 Value *value = rhs.loadValue();
2088                 storeValue(value);
2089         }
2090
2091         RValue<Short> Short::operator=(RValue<Short> rhs)
2092         {
2093                 storeValue(rhs.value);
2094
2095                 return rhs;
2096         }
2097
2098         RValue<Short> Short::operator=(const Short &rhs)
2099         {
2100                 Value *value = rhs.loadValue();
2101                 storeValue(value);
2102
2103                 return RValue<Short>(value);
2104         }
2105
2106         RValue<Short> Short::operator=(const Reference<Short> &rhs)
2107         {
2108                 Value *value = rhs.loadValue();
2109                 storeValue(value);
2110
2111                 return RValue<Short>(value);
2112         }
2113
2114         RValue<Short> operator+(RValue<Short> lhs, RValue<Short> rhs)
2115         {
2116                 return RValue<Short>(Nucleus::createAdd(lhs.value, rhs.value));
2117         }
2118
2119         RValue<Short> operator-(RValue<Short> lhs, RValue<Short> rhs)
2120         {
2121                 return RValue<Short>(Nucleus::createSub(lhs.value, rhs.value));
2122         }
2123
2124         RValue<Short> operator*(RValue<Short> lhs, RValue<Short> rhs)
2125         {
2126                 return RValue<Short>(Nucleus::createMul(lhs.value, rhs.value));
2127         }
2128
2129         RValue<Short> operator/(RValue<Short> lhs, RValue<Short> rhs)
2130         {
2131                 return RValue<Short>(Nucleus::createSDiv(lhs.value, rhs.value));
2132         }
2133
2134         RValue<Short> operator%(RValue<Short> lhs, RValue<Short> rhs)
2135         {
2136                 return RValue<Short>(Nucleus::createSRem(lhs.value, rhs.value));
2137         }
2138
2139         RValue<Short> operator&(RValue<Short> lhs, RValue<Short> rhs)
2140         {
2141                 return RValue<Short>(Nucleus::createAnd(lhs.value, rhs.value));
2142         }
2143
2144         RValue<Short> operator|(RValue<Short> lhs, RValue<Short> rhs)
2145         {
2146                 return RValue<Short>(Nucleus::createOr(lhs.value, rhs.value));
2147         }
2148
2149         RValue<Short> operator^(RValue<Short> lhs, RValue<Short> rhs)
2150         {
2151                 return RValue<Short>(Nucleus::createXor(lhs.value, rhs.value));
2152         }
2153
2154         RValue<Short> operator<<(RValue<Short> lhs, RValue<Short> rhs)
2155         {
2156                 return RValue<Short>(Nucleus::createShl(lhs.value, rhs.value));
2157         }
2158
2159         RValue<Short> operator>>(RValue<Short> lhs, RValue<Short> rhs)
2160         {
2161                 return RValue<Short>(Nucleus::createAShr(lhs.value, rhs.value));
2162         }
2163
2164         RValue<Short> operator+=(Short &lhs, RValue<Short> rhs)
2165         {
2166                 return lhs = lhs + rhs;
2167         }
2168
2169         RValue<Short> operator-=(Short &lhs, RValue<Short> rhs)
2170         {
2171                 return lhs = lhs - rhs;
2172         }
2173
2174         RValue<Short> operator*=(Short &lhs, RValue<Short> rhs)
2175         {
2176                 return lhs = lhs * rhs;
2177         }
2178
2179         RValue<Short> operator/=(Short &lhs, RValue<Short> rhs)
2180         {
2181                 return lhs = lhs / rhs;
2182         }
2183
2184         RValue<Short> operator%=(Short &lhs, RValue<Short> rhs)
2185         {
2186                 return lhs = lhs % rhs;
2187         }
2188
2189         RValue<Short> operator&=(Short &lhs, RValue<Short> rhs)
2190         {
2191                 return lhs = lhs & rhs;
2192         }
2193
2194         RValue<Short> operator|=(Short &lhs, RValue<Short> rhs)
2195         {
2196                 return lhs = lhs | rhs;
2197         }
2198
2199         RValue<Short> operator^=(Short &lhs, RValue<Short> rhs)
2200         {
2201                 return lhs = lhs ^ rhs;
2202         }
2203
2204         RValue<Short> operator<<=(Short &lhs, RValue<Short> rhs)
2205         {
2206                 return lhs = lhs << rhs;
2207         }
2208
2209         RValue<Short> operator>>=(Short &lhs, RValue<Short> rhs)
2210         {
2211                 return lhs = lhs >> rhs;
2212         }
2213
2214         RValue<Short> operator+(RValue<Short> val)
2215         {
2216                 return val;
2217         }
2218
2219         RValue<Short> operator-(RValue<Short> val)
2220         {
2221                 return RValue<Short>(Nucleus::createNeg(val.value));
2222         }
2223
2224         RValue<Short> operator~(RValue<Short> val)
2225         {
2226                 return RValue<Short>(Nucleus::createNot(val.value));
2227         }
2228
2229         RValue<Short> operator++(Short &val, int)   // Post-increment
2230         {
2231                 RValue<Short> res = val;
2232                 val += Short(1);
2233                 return res;
2234         }
2235
2236         const Short &operator++(Short &val)   // Pre-increment
2237         {
2238                 val += Short(1);
2239                 return val;
2240         }
2241
2242         RValue<Short> operator--(Short &val, int)   // Post-decrement
2243         {
2244                 RValue<Short> res = val;
2245                 val -= Short(1);
2246                 return res;
2247         }
2248
2249         const Short &operator--(Short &val)   // Pre-decrement
2250         {
2251                 val -= Short(1);
2252                 return val;
2253         }
2254
2255         RValue<Bool> operator<(RValue<Short> lhs, RValue<Short> rhs)
2256         {
2257                 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
2258         }
2259
2260         RValue<Bool> operator<=(RValue<Short> lhs, RValue<Short> rhs)
2261         {
2262                 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
2263         }
2264
2265         RValue<Bool> operator>(RValue<Short> lhs, RValue<Short> rhs)
2266         {
2267                 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
2268         }
2269
2270         RValue<Bool> operator>=(RValue<Short> lhs, RValue<Short> rhs)
2271         {
2272                 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
2273         }
2274
2275         RValue<Bool> operator!=(RValue<Short> lhs, RValue<Short> rhs)
2276         {
2277                 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
2278         }
2279
2280         RValue<Bool> operator==(RValue<Short> lhs, RValue<Short> rhs)
2281         {
2282                 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
2283         }
2284
2285         Type *Short::getType()
2286         {
2287                 return T(Ice::IceType_i16);
2288         }
2289
2290         UShort::UShort(Argument<UShort> argument)
2291         {
2292                 storeValue(argument.value);
2293         }
2294
2295         UShort::UShort(RValue<UInt> cast)
2296         {
2297                 Value *integer = Nucleus::createTrunc(cast.value, UShort::getType());
2298
2299                 storeValue(integer);
2300         }
2301
2302         UShort::UShort(RValue<Int> cast)
2303         {
2304                 Value *integer = Nucleus::createTrunc(cast.value, UShort::getType());
2305
2306                 storeValue(integer);
2307         }
2308
2309         UShort::UShort(unsigned short x)
2310         {
2311                 storeValue(Nucleus::createConstantShort(x));
2312         }
2313
2314         UShort::UShort(RValue<UShort> rhs)
2315         {
2316                 storeValue(rhs.value);
2317         }
2318
2319         UShort::UShort(const UShort &rhs)
2320         {
2321                 Value *value = rhs.loadValue();
2322                 storeValue(value);
2323         }
2324
2325         UShort::UShort(const Reference<UShort> &rhs)
2326         {
2327                 Value *value = rhs.loadValue();
2328                 storeValue(value);
2329         }
2330
2331         RValue<UShort> UShort::operator=(RValue<UShort> rhs)
2332         {
2333                 storeValue(rhs.value);
2334
2335                 return rhs;
2336         }
2337
2338         RValue<UShort> UShort::operator=(const UShort &rhs)
2339         {
2340                 Value *value = rhs.loadValue();
2341                 storeValue(value);
2342
2343                 return RValue<UShort>(value);
2344         }
2345
2346         RValue<UShort> UShort::operator=(const Reference<UShort> &rhs)
2347         {
2348                 Value *value = rhs.loadValue();
2349                 storeValue(value);
2350
2351                 return RValue<UShort>(value);
2352         }
2353
2354         RValue<UShort> operator+(RValue<UShort> lhs, RValue<UShort> rhs)
2355         {
2356                 return RValue<UShort>(Nucleus::createAdd(lhs.value, rhs.value));
2357         }
2358
2359         RValue<UShort> operator-(RValue<UShort> lhs, RValue<UShort> rhs)
2360         {
2361                 return RValue<UShort>(Nucleus::createSub(lhs.value, rhs.value));
2362         }
2363
2364         RValue<UShort> operator*(RValue<UShort> lhs, RValue<UShort> rhs)
2365         {
2366                 return RValue<UShort>(Nucleus::createMul(lhs.value, rhs.value));
2367         }
2368
2369         RValue<UShort> operator/(RValue<UShort> lhs, RValue<UShort> rhs)
2370         {
2371                 return RValue<UShort>(Nucleus::createUDiv(lhs.value, rhs.value));
2372         }
2373
2374         RValue<UShort> operator%(RValue<UShort> lhs, RValue<UShort> rhs)
2375         {
2376                 return RValue<UShort>(Nucleus::createURem(lhs.value, rhs.value));
2377         }
2378
2379         RValue<UShort> operator&(RValue<UShort> lhs, RValue<UShort> rhs)
2380         {
2381                 return RValue<UShort>(Nucleus::createAnd(lhs.value, rhs.value));
2382         }
2383
2384         RValue<UShort> operator|(RValue<UShort> lhs, RValue<UShort> rhs)
2385         {
2386                 return RValue<UShort>(Nucleus::createOr(lhs.value, rhs.value));
2387         }
2388
2389         RValue<UShort> operator^(RValue<UShort> lhs, RValue<UShort> rhs)
2390         {
2391                 return RValue<UShort>(Nucleus::createXor(lhs.value, rhs.value));
2392         }
2393
2394         RValue<UShort> operator<<(RValue<UShort> lhs, RValue<UShort> rhs)
2395         {
2396                 return RValue<UShort>(Nucleus::createShl(lhs.value, rhs.value));
2397         }
2398
2399         RValue<UShort> operator>>(RValue<UShort> lhs, RValue<UShort> rhs)
2400         {
2401                 return RValue<UShort>(Nucleus::createLShr(lhs.value, rhs.value));
2402         }
2403
2404         RValue<UShort> operator+=(UShort &lhs, RValue<UShort> rhs)
2405         {
2406                 return lhs = lhs + rhs;
2407         }
2408
2409         RValue<UShort> operator-=(UShort &lhs, RValue<UShort> rhs)
2410         {
2411                 return lhs = lhs - rhs;
2412         }
2413
2414         RValue<UShort> operator*=(UShort &lhs, RValue<UShort> rhs)
2415         {
2416                 return lhs = lhs * rhs;
2417         }
2418
2419         RValue<UShort> operator/=(UShort &lhs, RValue<UShort> rhs)
2420         {
2421                 return lhs = lhs / rhs;
2422         }
2423
2424         RValue<UShort> operator%=(UShort &lhs, RValue<UShort> rhs)
2425         {
2426                 return lhs = lhs % rhs;
2427         }
2428
2429         RValue<UShort> operator&=(UShort &lhs, RValue<UShort> rhs)
2430         {
2431                 return lhs = lhs & rhs;
2432         }
2433
2434         RValue<UShort> operator|=(UShort &lhs, RValue<UShort> rhs)
2435         {
2436                 return lhs = lhs | rhs;
2437         }
2438
2439         RValue<UShort> operator^=(UShort &lhs, RValue<UShort> rhs)
2440         {
2441                 return lhs = lhs ^ rhs;
2442         }
2443
2444         RValue<UShort> operator<<=(UShort &lhs, RValue<UShort> rhs)
2445         {
2446                 return lhs = lhs << rhs;
2447         }
2448
2449         RValue<UShort> operator>>=(UShort &lhs, RValue<UShort> rhs)
2450         {
2451                 return lhs = lhs >> rhs;
2452         }
2453
2454         RValue<UShort> operator+(RValue<UShort> val)
2455         {
2456                 return val;
2457         }
2458
2459         RValue<UShort> operator-(RValue<UShort> val)
2460         {
2461                 return RValue<UShort>(Nucleus::createNeg(val.value));
2462         }
2463
2464         RValue<UShort> operator~(RValue<UShort> val)
2465         {
2466                 return RValue<UShort>(Nucleus::createNot(val.value));
2467         }
2468
2469         RValue<UShort> operator++(UShort &val, int)   // Post-increment
2470         {
2471                 RValue<UShort> res = val;
2472                 val += UShort(1);
2473                 return res;
2474         }
2475
2476         const UShort &operator++(UShort &val)   // Pre-increment
2477         {
2478                 val += UShort(1);
2479                 return val;
2480         }
2481
2482         RValue<UShort> operator--(UShort &val, int)   // Post-decrement
2483         {
2484                 RValue<UShort> res = val;
2485                 val -= UShort(1);
2486                 return res;
2487         }
2488
2489         const UShort &operator--(UShort &val)   // Pre-decrement
2490         {
2491                 val -= UShort(1);
2492                 return val;
2493         }
2494
2495         RValue<Bool> operator<(RValue<UShort> lhs, RValue<UShort> rhs)
2496         {
2497                 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
2498         }
2499
2500         RValue<Bool> operator<=(RValue<UShort> lhs, RValue<UShort> rhs)
2501         {
2502                 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
2503         }
2504
2505         RValue<Bool> operator>(RValue<UShort> lhs, RValue<UShort> rhs)
2506         {
2507                 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
2508         }
2509
2510         RValue<Bool> operator>=(RValue<UShort> lhs, RValue<UShort> rhs)
2511         {
2512                 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
2513         }
2514
2515         RValue<Bool> operator!=(RValue<UShort> lhs, RValue<UShort> rhs)
2516         {
2517                 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
2518         }
2519
2520         RValue<Bool> operator==(RValue<UShort> lhs, RValue<UShort> rhs)
2521         {
2522                 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
2523         }
2524
2525         Type *UShort::getType()
2526         {
2527                 return T(Ice::IceType_i16);
2528         }
2529
2530         Byte4::Byte4(RValue<Byte8> cast)
2531         {
2532                 storeValue(Nucleus::createBitCast(cast.value, getType()));
2533         }
2534
2535         Byte4::Byte4(const Reference<Byte4> &rhs)
2536         {
2537                 Value *value = rhs.loadValue();
2538                 storeValue(value);
2539         }
2540
2541         Type *Byte4::getType()
2542         {
2543                 return T(Type_v4i8);
2544         }
2545
2546         Type *SByte4::getType()
2547         {
2548                 return T(Type_v4i8);
2549         }
2550
2551         Byte8::Byte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7)
2552         {
2553                 int64_t constantVector[8] = {x0, x1, x2, x3, x4, x5, x6, x7};
2554                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
2555         }
2556
2557         Byte8::Byte8(RValue<Byte8> rhs)
2558         {
2559                 storeValue(rhs.value);
2560         }
2561
2562         Byte8::Byte8(const Byte8 &rhs)
2563         {
2564                 Value *value = rhs.loadValue();
2565                 storeValue(value);
2566         }
2567
2568         Byte8::Byte8(const Reference<Byte8> &rhs)
2569         {
2570                 Value *value = rhs.loadValue();
2571                 storeValue(value);
2572         }
2573
2574         RValue<Byte8> Byte8::operator=(RValue<Byte8> rhs)
2575         {
2576                 storeValue(rhs.value);
2577
2578                 return rhs;
2579         }
2580
2581         RValue<Byte8> Byte8::operator=(const Byte8 &rhs)
2582         {
2583                 Value *value = rhs.loadValue();
2584                 storeValue(value);
2585
2586                 return RValue<Byte8>(value);
2587         }
2588
2589         RValue<Byte8> Byte8::operator=(const Reference<Byte8> &rhs)
2590         {
2591                 Value *value = rhs.loadValue();
2592                 storeValue(value);
2593
2594                 return RValue<Byte8>(value);
2595         }
2596
2597         RValue<Byte8> operator+(RValue<Byte8> lhs, RValue<Byte8> rhs)
2598         {
2599                 return RValue<Byte8>(Nucleus::createAdd(lhs.value, rhs.value));
2600         }
2601
2602         RValue<Byte8> operator-(RValue<Byte8> lhs, RValue<Byte8> rhs)
2603         {
2604                 return RValue<Byte8>(Nucleus::createSub(lhs.value, rhs.value));
2605         }
2606
2607 //      RValue<Byte8> operator*(RValue<Byte8> lhs, RValue<Byte8> rhs)
2608 //      {
2609 //              return RValue<Byte8>(Nucleus::createMul(lhs.value, rhs.value));
2610 //      }
2611
2612 //      RValue<Byte8> operator/(RValue<Byte8> lhs, RValue<Byte8> rhs)
2613 //      {
2614 //              return RValue<Byte8>(Nucleus::createUDiv(lhs.value, rhs.value));
2615 //      }
2616
2617 //      RValue<Byte8> operator%(RValue<Byte8> lhs, RValue<Byte8> rhs)
2618 //      {
2619 //              return RValue<Byte8>(Nucleus::createURem(lhs.value, rhs.value));
2620 //      }
2621
2622         RValue<Byte8> operator&(RValue<Byte8> lhs, RValue<Byte8> rhs)
2623         {
2624                 return RValue<Byte8>(Nucleus::createAnd(lhs.value, rhs.value));
2625         }
2626
2627         RValue<Byte8> operator|(RValue<Byte8> lhs, RValue<Byte8> rhs)
2628         {
2629                 return RValue<Byte8>(Nucleus::createOr(lhs.value, rhs.value));
2630         }
2631
2632         RValue<Byte8> operator^(RValue<Byte8> lhs, RValue<Byte8> rhs)
2633         {
2634                 return RValue<Byte8>(Nucleus::createXor(lhs.value, rhs.value));
2635         }
2636
2637 //      RValue<Byte8> operator<<(RValue<Byte8> lhs, unsigned char rhs)
2638 //      {
2639 //              return RValue<Byte8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2640 //      }
2641
2642 //      RValue<Byte8> operator>>(RValue<Byte8> lhs, unsigned char rhs)
2643 //      {
2644 //              return RValue<Byte8>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
2645 //      }
2646
2647         RValue<Byte8> operator+=(Byte8 &lhs, RValue<Byte8> rhs)
2648         {
2649                 return lhs = lhs + rhs;
2650         }
2651
2652         RValue<Byte8> operator-=(Byte8 &lhs, RValue<Byte8> rhs)
2653         {
2654                 return lhs = lhs - rhs;
2655         }
2656
2657 //      RValue<Byte8> operator*=(Byte8 &lhs, RValue<Byte8> rhs)
2658 //      {
2659 //              return lhs = lhs * rhs;
2660 //      }
2661
2662 //      RValue<Byte8> operator/=(Byte8 &lhs, RValue<Byte8> rhs)
2663 //      {
2664 //              return lhs = lhs / rhs;
2665 //      }
2666
2667 //      RValue<Byte8> operator%=(Byte8 &lhs, RValue<Byte8> rhs)
2668 //      {
2669 //              return lhs = lhs % rhs;
2670 //      }
2671
2672         RValue<Byte8> operator&=(Byte8 &lhs, RValue<Byte8> rhs)
2673         {
2674                 return lhs = lhs & rhs;
2675         }
2676
2677         RValue<Byte8> operator|=(Byte8 &lhs, RValue<Byte8> rhs)
2678         {
2679                 return lhs = lhs | rhs;
2680         }
2681
2682         RValue<Byte8> operator^=(Byte8 &lhs, RValue<Byte8> rhs)
2683         {
2684                 return lhs = lhs ^ rhs;
2685         }
2686
2687 //      RValue<Byte8> operator<<=(Byte8 &lhs, RValue<Byte8> rhs)
2688 //      {
2689 //              return lhs = lhs << rhs;
2690 //      }
2691
2692 //      RValue<Byte8> operator>>=(Byte8 &lhs, RValue<Byte8> rhs)
2693 //      {
2694 //              return lhs = lhs >> rhs;
2695 //      }
2696
2697 //      RValue<Byte8> operator+(RValue<Byte8> val)
2698 //      {
2699 //              return val;
2700 //      }
2701
2702 //      RValue<Byte8> operator-(RValue<Byte8> val)
2703 //      {
2704 //              return RValue<Byte8>(Nucleus::createNeg(val.value));
2705 //      }
2706
2707         RValue<Byte8> operator~(RValue<Byte8> val)
2708         {
2709                 return RValue<Byte8>(Nucleus::createNot(val.value));
2710         }
2711
2712         RValue<Byte> Extract(RValue<Byte8> val, int i)
2713         {
2714                 return RValue<Byte>(Nucleus::createExtractElement(val.value, Byte::getType(), i));
2715         }
2716
2717         RValue<Byte8> Insert(RValue<Byte8> val, RValue<Byte> element, int i)
2718         {
2719                 return RValue<Byte8>(Nucleus::createInsertElement(val.value, element.value, i));
2720         }
2721
2722         RValue<Byte> Saturate(RValue<UShort> x)
2723         {
2724                 return Byte(IfThenElse(Int(x) > 0xFF, Int(0xFF), Int(x)));
2725         }
2726
2727         RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
2728         {
2729                 if(emulateIntrinsics)
2730                 {
2731                         Byte8 result;
2732                         result = Insert(result, Saturate(UShort(Int(Extract(x, 0))) + UShort(Int(Extract(y, 0)))), 0);
2733                         result = Insert(result, Saturate(UShort(Int(Extract(x, 1))) + UShort(Int(Extract(y, 1)))), 1);
2734                         result = Insert(result, Saturate(UShort(Int(Extract(x, 2))) + UShort(Int(Extract(y, 2)))), 2);
2735                         result = Insert(result, Saturate(UShort(Int(Extract(x, 3))) + UShort(Int(Extract(y, 3)))), 3);
2736                         result = Insert(result, Saturate(UShort(Int(Extract(x, 4))) + UShort(Int(Extract(y, 4)))), 4);
2737                         result = Insert(result, Saturate(UShort(Int(Extract(x, 5))) + UShort(Int(Extract(y, 5)))), 5);
2738                         result = Insert(result, Saturate(UShort(Int(Extract(x, 6))) + UShort(Int(Extract(y, 6)))), 6);
2739                         result = Insert(result, Saturate(UShort(Int(Extract(x, 7))) + UShort(Int(Extract(y, 7)))), 7);
2740
2741                         return result;
2742                 }
2743                 else
2744                 {
2745                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
2746                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2747                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
2748                         auto paddusb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2749                         paddusb->addArg(x.value);
2750                         paddusb->addArg(y.value);
2751                         ::basicBlock->appendInst(paddusb);
2752
2753                         return RValue<Byte8>(V(result));
2754                 }
2755         }
2756
2757         RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
2758         {
2759                 if(emulateIntrinsics)
2760                 {
2761                         Byte8 result;
2762                         result = Insert(result, Saturate(UShort(Int(Extract(x, 0))) - UShort(Int(Extract(y, 0)))), 0);
2763                         result = Insert(result, Saturate(UShort(Int(Extract(x, 1))) - UShort(Int(Extract(y, 1)))), 1);
2764                         result = Insert(result, Saturate(UShort(Int(Extract(x, 2))) - UShort(Int(Extract(y, 2)))), 2);
2765                         result = Insert(result, Saturate(UShort(Int(Extract(x, 3))) - UShort(Int(Extract(y, 3)))), 3);
2766                         result = Insert(result, Saturate(UShort(Int(Extract(x, 4))) - UShort(Int(Extract(y, 4)))), 4);
2767                         result = Insert(result, Saturate(UShort(Int(Extract(x, 5))) - UShort(Int(Extract(y, 5)))), 5);
2768                         result = Insert(result, Saturate(UShort(Int(Extract(x, 6))) - UShort(Int(Extract(y, 6)))), 6);
2769                         result = Insert(result, Saturate(UShort(Int(Extract(x, 7))) - UShort(Int(Extract(y, 7)))), 7);
2770
2771                         return result;
2772                 }
2773                 else
2774                 {
2775                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
2776                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2777                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
2778                         auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2779                         psubusw->addArg(x.value);
2780                         psubusw->addArg(y.value);
2781                         ::basicBlock->appendInst(psubusw);
2782
2783                         return RValue<Byte8>(V(result));
2784                 }
2785         }
2786
2787         RValue<Short4> Unpack(RValue<Byte4> x)
2788         {
2789                 int shuffle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7};   // Real type is v16i8
2790                 return As<Short4>(Nucleus::createShuffleVector(x.value, x.value, shuffle));
2791         }
2792
2793         RValue<Short4> Unpack(RValue<Byte4> x, RValue<Byte4> y)
2794         {
2795                 return UnpackLow(As<Byte8>(x), As<Byte8>(y));
2796         }
2797
2798         RValue<Short4> UnpackLow(RValue<Byte8> x, RValue<Byte8> y)
2799         {
2800                 int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};   // Real type is v16i8
2801                 return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
2802         }
2803
2804         RValue<Short4> UnpackHigh(RValue<Byte8> x, RValue<Byte8> y)
2805         {
2806                 int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};   // Real type is v16i8
2807                 auto lowHigh = RValue<Byte16>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
2808                 return As<Short4>(Swizzle(As<Int4>(lowHigh), 0xEE));
2809         }
2810
2811         RValue<SByte> Extract(RValue<SByte8> val, int i)
2812         {
2813                 return RValue<SByte>(Nucleus::createExtractElement(val.value, SByte::getType(), i));
2814         }
2815
2816         RValue<SByte8> Insert(RValue<SByte8> val, RValue<SByte> element, int i)
2817         {
2818                 return RValue<SByte8>(Nucleus::createInsertElement(val.value, element.value, i));
2819         }
2820
2821         RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2822         {
2823                 if(emulateIntrinsics)
2824                 {
2825                         SByte8 result;
2826                         result = Insert(result, Extract(lhs, 0) >> SByte(rhs), 0);
2827                         result = Insert(result, Extract(lhs, 1) >> SByte(rhs), 1);
2828                         result = Insert(result, Extract(lhs, 2) >> SByte(rhs), 2);
2829                         result = Insert(result, Extract(lhs, 3) >> SByte(rhs), 3);
2830                         result = Insert(result, Extract(lhs, 4) >> SByte(rhs), 4);
2831                         result = Insert(result, Extract(lhs, 5) >> SByte(rhs), 5);
2832                         result = Insert(result, Extract(lhs, 6) >> SByte(rhs), 6);
2833                         result = Insert(result, Extract(lhs, 7) >> SByte(rhs), 7);
2834
2835                         return result;
2836                 }
2837                 else
2838                 {
2839                         #if defined(__i386__) || defined(__x86_64__)
2840                                 // SSE2 doesn't support byte vector shifts, so shift as shorts and recombine.
2841                                 RValue<Short4> hi = (As<Short4>(lhs) >> rhs) & Short4(0xFF00);
2842                                 RValue<Short4> lo = As<Short4>(As<UShort4>((As<Short4>(lhs) << 8) >> rhs) >> 8);
2843
2844                                 return As<SByte8>(hi | lo);
2845                         #else
2846                                 return RValue<SByte8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
2847                         #endif
2848                 }
2849         }
2850
2851         RValue<Int> SignMask(RValue<Byte8> x)
2852         {
2853                 if(emulateIntrinsics)
2854                 {
2855                         Byte8 xx = As<Byte8>(As<SByte8>(x) >> 7) & Byte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
2856                         return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
2857                 }
2858                 else
2859                 {
2860                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
2861                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2862                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
2863                         auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
2864                         movmsk->addArg(x.value);
2865                         ::basicBlock->appendInst(movmsk);
2866
2867                         return RValue<Int>(V(result)) & 0xFF;
2868                 }
2869         }
2870
2871 //      RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
2872 //      {
2873 //              return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Ugt, x.value, y.value));
2874 //      }
2875
2876         RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
2877         {
2878                 return RValue<Byte8>(Nucleus::createICmpEQ(x.value, y.value));
2879         }
2880
2881         Type *Byte8::getType()
2882         {
2883                 return T(Type_v8i8);
2884         }
2885
2886         SByte8::SByte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7)
2887         {
2888                 int64_t constantVector[8] = { x0, x1, x2, x3, x4, x5, x6, x7 };
2889                 Value *vector = V(Nucleus::createConstantVector(constantVector, getType()));
2890
2891                 storeValue(Nucleus::createBitCast(vector, getType()));
2892         }
2893
2894         SByte8::SByte8(RValue<SByte8> rhs)
2895         {
2896                 storeValue(rhs.value);
2897         }
2898
2899         SByte8::SByte8(const SByte8 &rhs)
2900         {
2901                 Value *value = rhs.loadValue();
2902                 storeValue(value);
2903         }
2904
2905         SByte8::SByte8(const Reference<SByte8> &rhs)
2906         {
2907                 Value *value = rhs.loadValue();
2908                 storeValue(value);
2909         }
2910
2911         RValue<SByte8> SByte8::operator=(RValue<SByte8> rhs)
2912         {
2913                 storeValue(rhs.value);
2914
2915                 return rhs;
2916         }
2917
2918         RValue<SByte8> SByte8::operator=(const SByte8 &rhs)
2919         {
2920                 Value *value = rhs.loadValue();
2921                 storeValue(value);
2922
2923                 return RValue<SByte8>(value);
2924         }
2925
2926         RValue<SByte8> SByte8::operator=(const Reference<SByte8> &rhs)
2927         {
2928                 Value *value = rhs.loadValue();
2929                 storeValue(value);
2930
2931                 return RValue<SByte8>(value);
2932         }
2933
2934         RValue<SByte8> operator+(RValue<SByte8> lhs, RValue<SByte8> rhs)
2935         {
2936                 return RValue<SByte8>(Nucleus::createAdd(lhs.value, rhs.value));
2937         }
2938
2939         RValue<SByte8> operator-(RValue<SByte8> lhs, RValue<SByte8> rhs)
2940         {
2941                 return RValue<SByte8>(Nucleus::createSub(lhs.value, rhs.value));
2942         }
2943
2944 //      RValue<SByte8> operator*(RValue<SByte8> lhs, RValue<SByte8> rhs)
2945 //      {
2946 //              return RValue<SByte8>(Nucleus::createMul(lhs.value, rhs.value));
2947 //      }
2948
2949 //      RValue<SByte8> operator/(RValue<SByte8> lhs, RValue<SByte8> rhs)
2950 //      {
2951 //              return RValue<SByte8>(Nucleus::createSDiv(lhs.value, rhs.value));
2952 //      }
2953
2954 //      RValue<SByte8> operator%(RValue<SByte8> lhs, RValue<SByte8> rhs)
2955 //      {
2956 //              return RValue<SByte8>(Nucleus::createSRem(lhs.value, rhs.value));
2957 //      }
2958
2959         RValue<SByte8> operator&(RValue<SByte8> lhs, RValue<SByte8> rhs)
2960         {
2961                 return RValue<SByte8>(Nucleus::createAnd(lhs.value, rhs.value));
2962         }
2963
2964         RValue<SByte8> operator|(RValue<SByte8> lhs, RValue<SByte8> rhs)
2965         {
2966                 return RValue<SByte8>(Nucleus::createOr(lhs.value, rhs.value));
2967         }
2968
2969         RValue<SByte8> operator^(RValue<SByte8> lhs, RValue<SByte8> rhs)
2970         {
2971                 return RValue<SByte8>(Nucleus::createXor(lhs.value, rhs.value));
2972         }
2973
2974 //      RValue<SByte8> operator<<(RValue<SByte8> lhs, unsigned char rhs)
2975 //      {
2976 //              return RValue<SByte8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2977 //      }
2978
2979 //      RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2980 //      {
2981 //              return RValue<SByte8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
2982 //      }
2983
2984         RValue<SByte8> operator+=(SByte8 &lhs, RValue<SByte8> rhs)
2985         {
2986                 return lhs = lhs + rhs;
2987         }
2988
2989         RValue<SByte8> operator-=(SByte8 &lhs, RValue<SByte8> rhs)
2990         {
2991                 return lhs = lhs - rhs;
2992         }
2993
2994 //      RValue<SByte8> operator*=(SByte8 &lhs, RValue<SByte8> rhs)
2995 //      {
2996 //              return lhs = lhs * rhs;
2997 //      }
2998
2999 //      RValue<SByte8> operator/=(SByte8 &lhs, RValue<SByte8> rhs)
3000 //      {
3001 //              return lhs = lhs / rhs;
3002 //      }
3003
3004 //      RValue<SByte8> operator%=(SByte8 &lhs, RValue<SByte8> rhs)
3005 //      {
3006 //              return lhs = lhs % rhs;
3007 //      }
3008
3009         RValue<SByte8> operator&=(SByte8 &lhs, RValue<SByte8> rhs)
3010         {
3011                 return lhs = lhs & rhs;
3012         }
3013
3014         RValue<SByte8> operator|=(SByte8 &lhs, RValue<SByte8> rhs)
3015         {
3016                 return lhs = lhs | rhs;
3017         }
3018
3019         RValue<SByte8> operator^=(SByte8 &lhs, RValue<SByte8> rhs)
3020         {
3021                 return lhs = lhs ^ rhs;
3022         }
3023
3024 //      RValue<SByte8> operator<<=(SByte8 &lhs, RValue<SByte8> rhs)
3025 //      {
3026 //              return lhs = lhs << rhs;
3027 //      }
3028
3029 //      RValue<SByte8> operator>>=(SByte8 &lhs, RValue<SByte8> rhs)
3030 //      {
3031 //              return lhs = lhs >> rhs;
3032 //      }
3033
3034 //      RValue<SByte8> operator+(RValue<SByte8> val)
3035 //      {
3036 //              return val;
3037 //      }
3038
3039 //      RValue<SByte8> operator-(RValue<SByte8> val)
3040 //      {
3041 //              return RValue<SByte8>(Nucleus::createNeg(val.value));
3042 //      }
3043
3044         RValue<SByte8> operator~(RValue<SByte8> val)
3045         {
3046                 return RValue<SByte8>(Nucleus::createNot(val.value));
3047         }
3048
3049         RValue<SByte> Saturate(RValue<Short> x)
3050         {
3051                 return SByte(IfThenElse(Int(x) > 0x7F, Int(0x7F), IfThenElse(Int(x) < -0x80, Int(0x80), Int(x))));
3052         }
3053
3054         RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
3055         {
3056                 if(emulateIntrinsics)
3057                 {
3058                         SByte8 result;
3059                         result = Insert(result, Saturate(Short(Int(Extract(x, 0))) + Short(Int(Extract(y, 0)))), 0);
3060                         result = Insert(result, Saturate(Short(Int(Extract(x, 1))) + Short(Int(Extract(y, 1)))), 1);
3061                         result = Insert(result, Saturate(Short(Int(Extract(x, 2))) + Short(Int(Extract(y, 2)))), 2);
3062                         result = Insert(result, Saturate(Short(Int(Extract(x, 3))) + Short(Int(Extract(y, 3)))), 3);
3063                         result = Insert(result, Saturate(Short(Int(Extract(x, 4))) + Short(Int(Extract(y, 4)))), 4);
3064                         result = Insert(result, Saturate(Short(Int(Extract(x, 5))) + Short(Int(Extract(y, 5)))), 5);
3065                         result = Insert(result, Saturate(Short(Int(Extract(x, 6))) + Short(Int(Extract(y, 6)))), 6);
3066                         result = Insert(result, Saturate(Short(Int(Extract(x, 7))) + Short(Int(Extract(y, 7)))), 7);
3067
3068                         return result;
3069                 }
3070                 else
3071                 {
3072                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
3073                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3074                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
3075                         auto paddsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3076                         paddsb->addArg(x.value);
3077                         paddsb->addArg(y.value);
3078                         ::basicBlock->appendInst(paddsb);
3079
3080                         return RValue<SByte8>(V(result));
3081                 }
3082         }
3083
3084         RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
3085         {
3086                 if(emulateIntrinsics)
3087                 {
3088                         SByte8 result;
3089                         result = Insert(result, Saturate(Short(Int(Extract(x, 0))) - Short(Int(Extract(y, 0)))), 0);
3090                         result = Insert(result, Saturate(Short(Int(Extract(x, 1))) - Short(Int(Extract(y, 1)))), 1);
3091                         result = Insert(result, Saturate(Short(Int(Extract(x, 2))) - Short(Int(Extract(y, 2)))), 2);
3092                         result = Insert(result, Saturate(Short(Int(Extract(x, 3))) - Short(Int(Extract(y, 3)))), 3);
3093                         result = Insert(result, Saturate(Short(Int(Extract(x, 4))) - Short(Int(Extract(y, 4)))), 4);
3094                         result = Insert(result, Saturate(Short(Int(Extract(x, 5))) - Short(Int(Extract(y, 5)))), 5);
3095                         result = Insert(result, Saturate(Short(Int(Extract(x, 6))) - Short(Int(Extract(y, 6)))), 6);
3096                         result = Insert(result, Saturate(Short(Int(Extract(x, 7))) - Short(Int(Extract(y, 7)))), 7);
3097
3098                         return result;
3099                 }
3100                 else
3101                 {
3102                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
3103                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3104                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
3105                         auto psubsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3106                         psubsb->addArg(x.value);
3107                         psubsb->addArg(y.value);
3108                         ::basicBlock->appendInst(psubsb);
3109
3110                         return RValue<SByte8>(V(result));
3111                 }
3112         }
3113
3114         RValue<Short4> UnpackLow(RValue<SByte8> x, RValue<SByte8> y)
3115         {
3116                 int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};   // Real type is v16i8
3117                 return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
3118         }
3119
3120         RValue<Short4> UnpackHigh(RValue<SByte8> x, RValue<SByte8> y)
3121         {
3122                 int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};   // Real type is v16i8
3123                 auto lowHigh = RValue<Byte16>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
3124                 return As<Short4>(Swizzle(As<Int4>(lowHigh), 0xEE));
3125         }
3126
3127         RValue<Int> SignMask(RValue<SByte8> x)
3128         {
3129                 if(emulateIntrinsics)
3130                 {
3131                         SByte8 xx = (x >> 7) & SByte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
3132                         return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
3133                 }
3134                 else
3135                 {
3136                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
3137                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3138                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
3139                         auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3140                         movmsk->addArg(x.value);
3141                         ::basicBlock->appendInst(movmsk);
3142
3143                         return RValue<Int>(V(result)) & 0xFF;
3144                 }
3145         }
3146
3147         RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
3148         {
3149                 return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Sgt, x.value, y.value));
3150         }
3151
3152         RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
3153         {
3154                 return RValue<Byte8>(Nucleus::createICmpEQ(x.value, y.value));
3155         }
3156
3157         Type *SByte8::getType()
3158         {
3159                 return T(Type_v8i8);
3160         }
3161
3162         Byte16::Byte16(RValue<Byte16> rhs)
3163         {
3164                 storeValue(rhs.value);
3165         }
3166
3167         Byte16::Byte16(const Byte16 &rhs)
3168         {
3169                 Value *value = rhs.loadValue();
3170                 storeValue(value);
3171         }
3172
3173         Byte16::Byte16(const Reference<Byte16> &rhs)
3174         {
3175                 Value *value = rhs.loadValue();
3176                 storeValue(value);
3177         }
3178
3179         RValue<Byte16> Byte16::operator=(RValue<Byte16> rhs)
3180         {
3181                 storeValue(rhs.value);
3182
3183                 return rhs;
3184         }
3185
3186         RValue<Byte16> Byte16::operator=(const Byte16 &rhs)
3187         {
3188                 Value *value = rhs.loadValue();
3189                 storeValue(value);
3190
3191                 return RValue<Byte16>(value);
3192         }
3193
3194         RValue<Byte16> Byte16::operator=(const Reference<Byte16> &rhs)
3195         {
3196                 Value *value = rhs.loadValue();
3197                 storeValue(value);
3198
3199                 return RValue<Byte16>(value);
3200         }
3201
3202         Type *Byte16::getType()
3203         {
3204                 return T(Ice::IceType_v16i8);
3205         }
3206
3207         Type *SByte16::getType()
3208         {
3209                 return T(Ice::IceType_v16i8);
3210         }
3211
3212         Short2::Short2(RValue<Short4> cast)
3213         {
3214                 storeValue(Nucleus::createBitCast(cast.value, getType()));
3215         }
3216
3217         Type *Short2::getType()
3218         {
3219                 return T(Type_v2i16);
3220         }
3221
3222         UShort2::UShort2(RValue<UShort4> cast)
3223         {
3224                 storeValue(Nucleus::createBitCast(cast.value, getType()));
3225         }
3226
3227         Type *UShort2::getType()
3228         {
3229                 return T(Type_v2i16);
3230         }
3231
3232         Short4::Short4(RValue<Int> cast)
3233         {
3234                 Value *vector = loadValue();
3235                 Value *element = Nucleus::createTrunc(cast.value, Short::getType());
3236                 Value *insert = Nucleus::createInsertElement(vector, element, 0);
3237                 Value *swizzle = Swizzle(RValue<Short4>(insert), 0x00).value;
3238
3239                 storeValue(swizzle);
3240         }
3241
3242         Short4::Short4(RValue<Int4> cast)
3243         {
3244                 int select[8] = {0, 2, 4, 6, 0, 2, 4, 6};
3245                 Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType());
3246                 Value *packed = Nucleus::createShuffleVector(short8, short8, select);
3247
3248                 Value *int2 = RValue<Int2>(Int2(As<Int4>(packed))).value;
3249                 Value *short4 = Nucleus::createBitCast(int2, Short4::getType());
3250
3251                 storeValue(short4);
3252         }
3253
3254 //      Short4::Short4(RValue<Float> cast)
3255 //      {
3256 //      }
3257
3258         Short4::Short4(RValue<Float4> cast)
3259         {
3260                 assert(false && "UNIMPLEMENTED");
3261         }
3262
3263         Short4::Short4(short xyzw)
3264         {
3265                 int64_t constantVector[4] = {xyzw, xyzw, xyzw, xyzw};
3266                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3267         }
3268
3269         Short4::Short4(short x, short y, short z, short w)
3270         {
3271                 int64_t constantVector[4] = {x, y, z, w};
3272                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3273         }
3274
3275         Short4::Short4(RValue<Short4> rhs)
3276         {
3277                 storeValue(rhs.value);
3278         }
3279
3280         Short4::Short4(const Short4 &rhs)
3281         {
3282                 Value *value = rhs.loadValue();
3283                 storeValue(value);
3284         }
3285
3286         Short4::Short4(const Reference<Short4> &rhs)
3287         {
3288                 Value *value = rhs.loadValue();
3289                 storeValue(value);
3290         }
3291
3292         Short4::Short4(RValue<UShort4> rhs)
3293         {
3294                 storeValue(rhs.value);
3295         }
3296
3297         Short4::Short4(const UShort4 &rhs)
3298         {
3299                 storeValue(rhs.loadValue());
3300         }
3301
3302         Short4::Short4(const Reference<UShort4> &rhs)
3303         {
3304                 storeValue(rhs.loadValue());
3305         }
3306
3307         RValue<Short4> Short4::operator=(RValue<Short4> rhs)
3308         {
3309                 storeValue(rhs.value);
3310
3311                 return rhs;
3312         }
3313
3314         RValue<Short4> Short4::operator=(const Short4 &rhs)
3315         {
3316                 Value *value = rhs.loadValue();
3317                 storeValue(value);
3318
3319                 return RValue<Short4>(value);
3320         }
3321
3322         RValue<Short4> Short4::operator=(const Reference<Short4> &rhs)
3323         {
3324                 Value *value = rhs.loadValue();
3325                 storeValue(value);
3326
3327                 return RValue<Short4>(value);
3328         }
3329
3330         RValue<Short4> Short4::operator=(RValue<UShort4> rhs)
3331         {
3332                 storeValue(rhs.value);
3333
3334                 return RValue<Short4>(rhs);
3335         }
3336
3337         RValue<Short4> Short4::operator=(const UShort4 &rhs)
3338         {
3339                 Value *value = rhs.loadValue();
3340                 storeValue(value);
3341
3342                 return RValue<Short4>(value);
3343         }
3344
3345         RValue<Short4> Short4::operator=(const Reference<UShort4> &rhs)
3346         {
3347                 Value *value = rhs.loadValue();
3348                 storeValue(value);
3349
3350                 return RValue<Short4>(value);
3351         }
3352
3353         RValue<Short4> operator+(RValue<Short4> lhs, RValue<Short4> rhs)
3354         {
3355                 return RValue<Short4>(Nucleus::createAdd(lhs.value, rhs.value));
3356         }
3357
3358         RValue<Short4> operator-(RValue<Short4> lhs, RValue<Short4> rhs)
3359         {
3360                 return RValue<Short4>(Nucleus::createSub(lhs.value, rhs.value));
3361         }
3362
3363         RValue<Short4> operator*(RValue<Short4> lhs, RValue<Short4> rhs)
3364         {
3365                 return RValue<Short4>(Nucleus::createMul(lhs.value, rhs.value));
3366         }
3367
3368 //      RValue<Short4> operator/(RValue<Short4> lhs, RValue<Short4> rhs)
3369 //      {
3370 //              return RValue<Short4>(Nucleus::createSDiv(lhs.value, rhs.value));
3371 //      }
3372
3373 //      RValue<Short4> operator%(RValue<Short4> lhs, RValue<Short4> rhs)
3374 //      {
3375 //              return RValue<Short4>(Nucleus::createSRem(lhs.value, rhs.value));
3376 //      }
3377
3378         RValue<Short4> operator&(RValue<Short4> lhs, RValue<Short4> rhs)
3379         {
3380                 return RValue<Short4>(Nucleus::createAnd(lhs.value, rhs.value));
3381         }
3382
3383         RValue<Short4> operator|(RValue<Short4> lhs, RValue<Short4> rhs)
3384         {
3385                 return RValue<Short4>(Nucleus::createOr(lhs.value, rhs.value));
3386         }
3387
3388         RValue<Short4> operator^(RValue<Short4> lhs, RValue<Short4> rhs)
3389         {
3390                 return RValue<Short4>(Nucleus::createXor(lhs.value, rhs.value));
3391         }
3392
3393         RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
3394         {
3395                 if(emulateIntrinsics)
3396                 {
3397                         Short4 result;
3398                         result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
3399                         result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
3400                         result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
3401                         result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
3402
3403                         return result;
3404                 }
3405                 else
3406                 {
3407                         return RValue<Short4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
3408                 }
3409         }
3410
3411         RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
3412         {
3413                 if(emulateIntrinsics)
3414                 {
3415                         Short4 result;
3416                         result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
3417                         result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
3418                         result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
3419                         result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
3420
3421                         return result;
3422                 }
3423                 else
3424                 {
3425                         return RValue<Short4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
3426                 }
3427         }
3428
3429         RValue<Short4> operator+=(Short4 &lhs, RValue<Short4> rhs)
3430         {
3431                 return lhs = lhs + rhs;
3432         }
3433
3434         RValue<Short4> operator-=(Short4 &lhs, RValue<Short4> rhs)
3435         {
3436                 return lhs = lhs - rhs;
3437         }
3438
3439         RValue<Short4> operator*=(Short4 &lhs, RValue<Short4> rhs)
3440         {
3441                 return lhs = lhs * rhs;
3442         }
3443
3444 //      RValue<Short4> operator/=(Short4 &lhs, RValue<Short4> rhs)
3445 //      {
3446 //              return lhs = lhs / rhs;
3447 //      }
3448
3449 //      RValue<Short4> operator%=(Short4 &lhs, RValue<Short4> rhs)
3450 //      {
3451 //              return lhs = lhs % rhs;
3452 //      }
3453
3454         RValue<Short4> operator&=(Short4 &lhs, RValue<Short4> rhs)
3455         {
3456                 return lhs = lhs & rhs;
3457         }
3458
3459         RValue<Short4> operator|=(Short4 &lhs, RValue<Short4> rhs)
3460         {
3461                 return lhs = lhs | rhs;
3462         }
3463
3464         RValue<Short4> operator^=(Short4 &lhs, RValue<Short4> rhs)
3465         {
3466                 return lhs = lhs ^ rhs;
3467         }
3468
3469         RValue<Short4> operator<<=(Short4 &lhs, unsigned char rhs)
3470         {
3471                 return lhs = lhs << rhs;
3472         }
3473
3474         RValue<Short4> operator>>=(Short4 &lhs, unsigned char rhs)
3475         {
3476                 return lhs = lhs >> rhs;
3477         }
3478
3479 //      RValue<Short4> operator+(RValue<Short4> val)
3480 //      {
3481 //              return val;
3482 //      }
3483
3484         RValue<Short4> operator-(RValue<Short4> val)
3485         {
3486                 return RValue<Short4>(Nucleus::createNeg(val.value));
3487         }
3488
3489         RValue<Short4> operator~(RValue<Short4> val)
3490         {
3491                 return RValue<Short4>(Nucleus::createNot(val.value));
3492         }
3493
3494         RValue<Short4> RoundShort4(RValue<Float4> cast)
3495         {
3496                 RValue<Int4> int4 = RoundInt(cast);
3497                 return As<Short4>(Pack(int4, int4));
3498         }
3499
3500         RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
3501         {
3502                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
3503                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value, y.value);
3504                 ::basicBlock->appendInst(cmp);
3505
3506                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3507                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3508                 ::basicBlock->appendInst(select);
3509
3510                 return RValue<Short4>(V(result));
3511         }
3512
3513         RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
3514         {
3515                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
3516                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value, y.value);
3517                 ::basicBlock->appendInst(cmp);
3518
3519                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3520                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3521                 ::basicBlock->appendInst(select);
3522
3523                 return RValue<Short4>(V(result));
3524         }
3525
3526         RValue<Short> Saturate(RValue<Int> x)
3527         {
3528                 return Short(IfThenElse(x > 0x7FFF, Int(0x7FFF), IfThenElse(x < -0x8000, Int(0x8000), x)));
3529         }
3530
3531         RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
3532         {
3533                 if(emulateIntrinsics)
3534                 {
3535                         Short4 result;
3536                         result = Insert(result, Saturate(Int(Extract(x, 0)) + Int(Extract(y, 0))), 0);
3537                         result = Insert(result, Saturate(Int(Extract(x, 1)) + Int(Extract(y, 1))), 1);
3538                         result = Insert(result, Saturate(Int(Extract(x, 2)) + Int(Extract(y, 2))), 2);
3539                         result = Insert(result, Saturate(Int(Extract(x, 3)) + Int(Extract(y, 3))), 3);
3540
3541                         return result;
3542                 }
3543                 else
3544                 {
3545                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3546                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3547                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
3548                         auto paddsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3549                         paddsw->addArg(x.value);
3550                         paddsw->addArg(y.value);
3551                         ::basicBlock->appendInst(paddsw);
3552
3553                         return RValue<Short4>(V(result));
3554                 }
3555         }
3556
3557         RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
3558         {
3559                 if(emulateIntrinsics)
3560                 {
3561                         Short4 result;
3562                         result = Insert(result, Saturate(Int(Extract(x, 0)) - Int(Extract(y, 0))), 0);
3563                         result = Insert(result, Saturate(Int(Extract(x, 1)) - Int(Extract(y, 1))), 1);
3564                         result = Insert(result, Saturate(Int(Extract(x, 2)) - Int(Extract(y, 2))), 2);
3565                         result = Insert(result, Saturate(Int(Extract(x, 3)) - Int(Extract(y, 3))), 3);
3566
3567                         return result;
3568                 }
3569                 else
3570                 {
3571                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3572                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3573                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
3574                         auto psubsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3575                         psubsw->addArg(x.value);
3576                         psubsw->addArg(y.value);
3577                         ::basicBlock->appendInst(psubsw);
3578
3579                         return RValue<Short4>(V(result));
3580                 }
3581         }
3582
3583         RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
3584         {
3585                 if(emulateIntrinsics)
3586                 {
3587                         Short4 result;
3588                         result = Insert(result, Short((Int(Extract(x, 0)) * Int(Extract(y, 0))) >> 16), 0);
3589                         result = Insert(result, Short((Int(Extract(x, 1)) * Int(Extract(y, 1))) >> 16), 1);
3590                         result = Insert(result, Short((Int(Extract(x, 2)) * Int(Extract(y, 2))) >> 16), 2);
3591                         result = Insert(result, Short((Int(Extract(x, 3)) * Int(Extract(y, 3))) >> 16), 3);
3592
3593                         return result;
3594                 }
3595                 else
3596                 {
3597                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3598                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::MultiplyHighSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3599                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
3600                         auto pmulhw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3601                         pmulhw->addArg(x.value);
3602                         pmulhw->addArg(y.value);
3603                         ::basicBlock->appendInst(pmulhw);
3604
3605                         return RValue<Short4>(V(result));
3606                 }
3607         }
3608
3609         RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
3610         {
3611                 if(emulateIntrinsics)
3612                 {
3613                         Int2 result;
3614                         result = Insert(result, Int(Extract(x, 0)) * Int(Extract(y, 0)) + Int(Extract(x, 1)) * Int(Extract(y, 1)), 0);
3615                         result = Insert(result, Int(Extract(x, 2)) * Int(Extract(y, 2)) + Int(Extract(x, 3)) * Int(Extract(y, 3)), 1);
3616
3617                         return result;
3618                 }
3619                 else
3620                 {
3621                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3622                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::MultiplyAddPairs, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3623                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
3624                         auto pmaddwd = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3625                         pmaddwd->addArg(x.value);
3626                         pmaddwd->addArg(y.value);
3627                         ::basicBlock->appendInst(pmaddwd);
3628
3629                         return As<Int2>(V(result));
3630                 }
3631         }
3632
3633         RValue<SByte8> Pack(RValue<Short4> x, RValue<Short4> y)
3634         {
3635                 if(emulateIntrinsics)
3636                 {
3637                         SByte8 result;
3638                         result = Insert(result, Saturate(Extract(x, 0)), 0);
3639                         result = Insert(result, Saturate(Extract(x, 1)), 1);
3640                         result = Insert(result, Saturate(Extract(x, 2)), 2);
3641                         result = Insert(result, Saturate(Extract(x, 3)), 3);
3642                         result = Insert(result, Saturate(Extract(y, 0)), 4);
3643                         result = Insert(result, Saturate(Extract(y, 1)), 5);
3644                         result = Insert(result, Saturate(Extract(y, 2)), 6);
3645                         result = Insert(result, Saturate(Extract(y, 3)), 7);
3646
3647                         return result;
3648                 }
3649                 else
3650                 {
3651                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
3652                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3653                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
3654                         auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3655                         pack->addArg(x.value);
3656                         pack->addArg(y.value);
3657                         ::basicBlock->appendInst(pack);
3658
3659                         return As<SByte8>(Swizzle(As<Int4>(V(result)), 0x88));
3660                 }
3661         }
3662
3663         RValue<Int2> UnpackLow(RValue<Short4> x, RValue<Short4> y)
3664         {
3665                 int shuffle[8] = {0, 8, 1, 9, 2, 10, 3, 11};   // Real type is v8i16
3666                 return As<Int2>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
3667         }
3668
3669         RValue<Int2> UnpackHigh(RValue<Short4> x, RValue<Short4> y)
3670         {
3671                 int shuffle[8] = {0, 8, 1, 9, 2, 10, 3, 11};   // Real type is v8i16
3672                 auto lowHigh = RValue<Short8>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
3673                 return As<Int2>(Swizzle(As<Int4>(lowHigh), 0xEE));
3674         }
3675
3676         RValue<Short4> Swizzle(RValue<Short4> x, unsigned char select)
3677         {
3678                 // Real type is v8i16
3679                 int shuffle[8] =
3680                 {
3681                         (select >> 0) & 0x03,
3682                         (select >> 2) & 0x03,
3683                         (select >> 4) & 0x03,
3684                         (select >> 6) & 0x03,
3685                         (select >> 0) & 0x03,
3686                         (select >> 2) & 0x03,
3687                         (select >> 4) & 0x03,
3688                         (select >> 6) & 0x03,
3689                 };
3690
3691                 return RValue<Short4>(Nucleus::createShuffleVector(x.value, x.value, shuffle));
3692         }
3693
3694         RValue<Short4> Insert(RValue<Short4> val, RValue<Short> element, int i)
3695         {
3696                 return RValue<Short4>(Nucleus::createInsertElement(val.value, element.value, i));
3697         }
3698
3699         RValue<Short> Extract(RValue<Short4> val, int i)
3700         {
3701                 return RValue<Short>(Nucleus::createExtractElement(val.value, Short::getType(), i));
3702         }
3703
3704         RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
3705         {
3706                 return RValue<Short4>(createIntCompare(Ice::InstIcmp::Sgt, x.value, y.value));
3707         }
3708
3709         RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
3710         {
3711                 return RValue<Short4>(Nucleus::createICmpEQ(x.value, y.value));
3712         }
3713
3714         Type *Short4::getType()
3715         {
3716                 return T(Type_v4i16);
3717         }
3718
3719         UShort4::UShort4(RValue<Int4> cast)
3720         {
3721                 *this = Short4(cast);
3722         }
3723
3724         UShort4::UShort4(RValue<Float4> cast, bool saturate)
3725         {
3726                 if(saturate)
3727                 {
3728                         if(CPUID::SSE4_1)
3729                         {
3730                                 Int4 int4(Min(cast, Float4(0xFFFF)));   // packusdw takes care of 0x0000 saturation
3731                                 *this = As<Short4>(Pack(As<UInt4>(int4), As<UInt4>(int4)));
3732                         }
3733                         else
3734                         {
3735                                 *this = Short4(Int4(Max(Min(cast, Float4(0xFFFF)), Float4(0x0000))));
3736                         }
3737                 }
3738                 else
3739                 {
3740                         *this = Short4(Int4(cast));
3741                 }
3742         }
3743
3744         UShort4::UShort4(unsigned short xyzw)
3745         {
3746                 int64_t constantVector[4] = {xyzw, xyzw, xyzw, xyzw};
3747                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3748         }
3749
3750         UShort4::UShort4(unsigned short x, unsigned short y, unsigned short z, unsigned short w)
3751         {
3752                 int64_t constantVector[4] = {x, y, z, w};
3753                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3754         }
3755
3756         UShort4::UShort4(RValue<UShort4> rhs)
3757         {
3758                 storeValue(rhs.value);
3759         }
3760
3761         UShort4::UShort4(const UShort4 &rhs)
3762         {
3763                 Value *value = rhs.loadValue();
3764                 storeValue(value);
3765         }
3766
3767         UShort4::UShort4(const Reference<UShort4> &rhs)
3768         {
3769                 Value *value = rhs.loadValue();
3770                 storeValue(value);
3771         }
3772
3773         UShort4::UShort4(RValue<Short4> rhs)
3774         {
3775                 storeValue(rhs.value);
3776         }
3777
3778         UShort4::UShort4(const Short4 &rhs)
3779         {
3780                 Value *value = rhs.loadValue();
3781                 storeValue(value);
3782         }
3783
3784         UShort4::UShort4(const Reference<Short4> &rhs)
3785         {
3786                 Value *value = rhs.loadValue();
3787                 storeValue(value);
3788         }
3789
3790         RValue<UShort4> UShort4::operator=(RValue<UShort4> rhs)
3791         {
3792                 storeValue(rhs.value);
3793
3794                 return rhs;
3795         }
3796
3797         RValue<UShort4> UShort4::operator=(const UShort4 &rhs)
3798         {
3799                 Value *value = rhs.loadValue();
3800                 storeValue(value);
3801
3802                 return RValue<UShort4>(value);
3803         }
3804
3805         RValue<UShort4> UShort4::operator=(const Reference<UShort4> &rhs)
3806         {
3807                 Value *value = rhs.loadValue();
3808                 storeValue(value);
3809
3810                 return RValue<UShort4>(value);
3811         }
3812
3813         RValue<UShort4> UShort4::operator=(RValue<Short4> rhs)
3814         {
3815                 storeValue(rhs.value);
3816
3817                 return RValue<UShort4>(rhs);
3818         }
3819
3820         RValue<UShort4> UShort4::operator=(const Short4 &rhs)
3821         {
3822                 Value *value = rhs.loadValue();
3823                 storeValue(value);
3824
3825                 return RValue<UShort4>(value);
3826         }
3827
3828         RValue<UShort4> UShort4::operator=(const Reference<Short4> &rhs)
3829         {
3830                 Value *value = rhs.loadValue();
3831                 storeValue(value);
3832
3833                 return RValue<UShort4>(value);
3834         }
3835
3836         RValue<UShort4> operator+(RValue<UShort4> lhs, RValue<UShort4> rhs)
3837         {
3838                 return RValue<UShort4>(Nucleus::createAdd(lhs.value, rhs.value));
3839         }
3840
3841         RValue<UShort4> operator-(RValue<UShort4> lhs, RValue<UShort4> rhs)
3842         {
3843                 return RValue<UShort4>(Nucleus::createSub(lhs.value, rhs.value));
3844         }
3845
3846         RValue<UShort4> operator*(RValue<UShort4> lhs, RValue<UShort4> rhs)
3847         {
3848                 return RValue<UShort4>(Nucleus::createMul(lhs.value, rhs.value));
3849         }
3850
3851         RValue<UShort4> operator&(RValue<UShort4> lhs, RValue<UShort4> rhs)
3852         {
3853                 return RValue<UShort4>(Nucleus::createAnd(lhs.value, rhs.value));
3854         }
3855
3856         RValue<UShort4> operator|(RValue<UShort4> lhs, RValue<UShort4> rhs)
3857         {
3858                 return RValue<UShort4>(Nucleus::createOr(lhs.value, rhs.value));
3859         }
3860
3861         RValue<UShort4> operator^(RValue<UShort4> lhs, RValue<UShort4> rhs)
3862         {
3863                 return RValue<UShort4>(Nucleus::createXor(lhs.value, rhs.value));
3864         }
3865
3866         RValue<UShort> Extract(RValue<UShort4> val, int i)
3867         {
3868                 return RValue<UShort>(Nucleus::createExtractElement(val.value, UShort::getType(), i));
3869         }
3870
3871         RValue<UShort4> Insert(RValue<UShort4> val, RValue<UShort> element, int i)
3872         {
3873                 return RValue<UShort4>(Nucleus::createInsertElement(val.value, element.value, i));
3874         }
3875
3876         RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
3877         {
3878                 if(emulateIntrinsics)
3879                 {
3880                         UShort4 result;
3881                         result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
3882                         result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
3883                         result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
3884                         result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
3885
3886                         return result;
3887                 }
3888                 else
3889                 {
3890                         return RValue<UShort4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
3891                 }
3892         }
3893
3894         RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
3895         {
3896                 if(emulateIntrinsics)
3897                 {
3898                         UShort4 result;
3899                         result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
3900                         result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
3901                         result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
3902                         result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
3903
3904                         return result;
3905                 }
3906                 else
3907                 {
3908                         return RValue<UShort4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
3909                 }
3910         }
3911
3912         RValue<UShort4> operator<<=(UShort4 &lhs, unsigned char rhs)
3913         {
3914                 return lhs = lhs << rhs;
3915         }
3916
3917         RValue<UShort4> operator>>=(UShort4 &lhs, unsigned char rhs)
3918         {
3919                 return lhs = lhs >> rhs;
3920         }
3921
3922         RValue<UShort4> operator~(RValue<UShort4> val)
3923         {
3924                 return RValue<UShort4>(Nucleus::createNot(val.value));
3925         }
3926
3927         RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
3928         {
3929                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
3930                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value, y.value);
3931                 ::basicBlock->appendInst(cmp);
3932
3933                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3934                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3935                 ::basicBlock->appendInst(select);
3936
3937                 return RValue<UShort4>(V(result));
3938         }
3939
3940         RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
3941         {
3942                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
3943                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value, y.value);
3944                 ::basicBlock->appendInst(cmp);
3945
3946                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3947                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3948                 ::basicBlock->appendInst(select);
3949
3950                 return RValue<UShort4>(V(result));
3951         }
3952
3953         RValue<UShort> SaturateUShort(RValue<Int> x)
3954         {
3955                 return UShort(IfThenElse(x > 0xFFFF, Int(0xFFFF), IfThenElse(x < 0, Int(0), x)));
3956         }
3957
3958         RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
3959         {
3960                 if(emulateIntrinsics)
3961                 {
3962                         UShort4 result;
3963                         result = Insert(result, SaturateUShort(Int(Extract(x, 0)) + Int(Extract(y, 0))), 0);
3964                         result = Insert(result, SaturateUShort(Int(Extract(x, 1)) + Int(Extract(y, 1))), 1);
3965                         result = Insert(result, SaturateUShort(Int(Extract(x, 2)) + Int(Extract(y, 2))), 2);
3966                         result = Insert(result, SaturateUShort(Int(Extract(x, 3)) + Int(Extract(y, 3))), 3);
3967
3968                         return result;
3969                 }
3970                 else
3971                 {
3972                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3973                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3974                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
3975                         auto paddusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3976                         paddusw->addArg(x.value);
3977                         paddusw->addArg(y.value);
3978                         ::basicBlock->appendInst(paddusw);
3979
3980                         return RValue<UShort4>(V(result));
3981                 }
3982         }
3983
3984         RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
3985         {
3986                 if(emulateIntrinsics)
3987                 {
3988                         UShort4 result;
3989                         result = Insert(result, SaturateUShort(Int(Extract(x, 0)) - Int(Extract(y, 0))), 0);
3990                         result = Insert(result, SaturateUShort(Int(Extract(x, 1)) - Int(Extract(y, 1))), 1);
3991                         result = Insert(result, SaturateUShort(Int(Extract(x, 2)) - Int(Extract(y, 2))), 2);
3992                         result = Insert(result, SaturateUShort(Int(Extract(x, 3)) - Int(Extract(y, 3))), 3);
3993
3994                         return result;
3995                 }
3996                 else
3997                 {
3998                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3999                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
4000                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
4001                         auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
4002                         psubusw->addArg(x.value);
4003                         psubusw->addArg(y.value);
4004                         ::basicBlock->appendInst(psubusw);
4005
4006                         return RValue<UShort4>(V(result));
4007                 }
4008         }
4009
4010         RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
4011         {
4012                 if(emulateIntrinsics)
4013                 {
4014                         UShort4 result;
4015                         result = Insert(result, UShort((UInt(Extract(x, 0)) * UInt(Extract(y, 0))) >> 16), 0);
4016                         result = Insert(result, UShort((UInt(Extract(x, 1)) * UInt(Extract(y, 1))) >> 16), 1);
4017                         result = Insert(result, UShort((UInt(Extract(x, 2)) * UInt(Extract(y, 2))) >> 16), 2);
4018                         result = Insert(result, UShort((UInt(Extract(x, 3)) * UInt(Extract(y, 3))) >> 16), 3);
4019
4020                         return result;
4021                 }
4022                 else
4023                 {
4024                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
4025                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::MultiplyHighUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
4026                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
4027                         auto pmulhuw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
4028                         pmulhuw->addArg(x.value);
4029                         pmulhuw->addArg(y.value);
4030                         ::basicBlock->appendInst(pmulhuw);
4031
4032                         return RValue<UShort4>(V(result));
4033                 }
4034         }
4035
4036         RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
4037         {
4038                 assert(false && "UNIMPLEMENTED"); return RValue<UShort4>(V(nullptr));
4039         }
4040
4041         RValue<Byte8> Pack(RValue<UShort4> x, RValue<UShort4> y)
4042         {
4043                 if(emulateIntrinsics)
4044                 {
4045                         Byte8 result;
4046                         result = Insert(result, Saturate(Extract(x, 0)), 0);
4047                         result = Insert(result, Saturate(Extract(x, 1)), 1);
4048                         result = Insert(result, Saturate(Extract(x, 2)), 2);
4049                         result = Insert(result, Saturate(Extract(x, 3)), 3);
4050                         result = Insert(result, Saturate(Extract(y, 0)), 4);
4051                         result = Insert(result, Saturate(Extract(y, 1)), 5);
4052                         result = Insert(result, Saturate(Extract(y, 2)), 6);
4053                         result = Insert(result, Saturate(Extract(y, 3)), 7);
4054
4055                         return result;
4056                 }
4057                 else
4058                 {
4059                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
4060                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
4061                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
4062                         auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
4063                         pack->addArg(x.value);
4064                         pack->addArg(y.value);
4065                         ::basicBlock->appendInst(pack);
4066
4067                         return As<Byte8>(Swizzle(As<Int4>(V(result)), 0x88));
4068                 }
4069         }
4070
4071         Type *UShort4::getType()
4072         {
4073                 return T(Type_v4i16);
4074         }
4075
4076         Short8::Short8(short c)
4077         {
4078                 int64_t constantVector[8] = {c, c, c, c, c, c, c, c};
4079                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
4080         }
4081
4082         Short8::Short8(short c0, short c1, short c2, short c3, short c4, short c5, short c6, short c7)
4083         {
4084                 int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7};
4085                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
4086         }
4087
4088         Short8::Short8(RValue<Short8> rhs)
4089         {
4090                 storeValue(rhs.value);
4091         }
4092
4093         Short8::Short8(const Reference<Short8> &rhs)
4094         {
4095                 Value *value = rhs.loadValue();
4096                 storeValue(value);
4097         }
4098
4099         Short8::Short8(RValue<Short4> lo, RValue<Short4> hi)
4100         {
4101                 int shuffle[8] = {0, 1, 2, 3, 8, 9, 10, 11};   // Real type is v8i16
4102                 Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
4103
4104                 storeValue(packed);
4105         }
4106
4107         RValue<Short8> operator+(RValue<Short8> lhs, RValue<Short8> rhs)
4108         {
4109                 return RValue<Short8>(Nucleus::createAdd(lhs.value, rhs.value));
4110         }
4111
4112         RValue<Short8> operator&(RValue<Short8> lhs, RValue<Short8> rhs)
4113         {
4114                 return RValue<Short8>(Nucleus::createAnd(lhs.value, rhs.value));
4115         }
4116
4117         RValue<Short> Extract(RValue<Short8> val, int i)
4118         {
4119                 return RValue<Short>(Nucleus::createExtractElement(val.value, Short::getType(), i));
4120         }
4121
4122         RValue<Short8> Insert(RValue<Short8> val, RValue<Short> element, int i)
4123         {
4124                 return RValue<Short8>(Nucleus::createInsertElement(val.value, element.value, i));
4125         }
4126
4127         RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
4128         {
4129                 if(emulateIntrinsics)
4130                 {
4131                         Short8 result;
4132                         result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
4133                         result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
4134                         result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
4135                         result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
4136                         result = Insert(result, Extract(lhs, 4) << Short(rhs), 4);
4137                         result = Insert(result, Extract(lhs, 5) << Short(rhs), 5);
4138                         result = Insert(result, Extract(lhs, 6) << Short(rhs), 6);
4139                         result = Insert(result, Extract(lhs, 7) << Short(rhs), 7);
4140
4141                         return result;
4142                 }
4143                 else
4144                 {
4145                         return RValue<Short8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
4146                 }
4147         }
4148
4149         RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
4150         {
4151                 if(emulateIntrinsics)
4152                 {
4153                         Short8 result;
4154                         result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
4155                         result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
4156                         result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
4157                         result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
4158                         result = Insert(result, Extract(lhs, 4) >> Short(rhs), 4);
4159                         result = Insert(result, Extract(lhs, 5) >> Short(rhs), 5);
4160                         result = Insert(result, Extract(lhs, 6) >> Short(rhs), 6);
4161                         result = Insert(result, Extract(lhs, 7) >> Short(rhs), 7);
4162
4163                         return result;
4164                 }
4165                 else
4166                 {
4167                         return RValue<Short8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
4168                 }
4169         }
4170
4171         RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
4172         {
4173                 assert(false && "UNIMPLEMENTED"); return RValue<Int4>(V(nullptr));
4174         }
4175
4176         RValue<Int4> Abs(RValue<Int4> x)
4177         {
4178                 auto negative = x >> 31;
4179                 return (x ^ negative) - negative;
4180         }
4181
4182         RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
4183         {
4184                 assert(false && "UNIMPLEMENTED"); return RValue<Short8>(V(nullptr));
4185         }
4186
4187         Type *Short8::getType()
4188         {
4189                 return T(Ice::IceType_v8i16);
4190         }
4191
4192         UShort8::UShort8(unsigned short c)
4193         {
4194                 int64_t constantVector[8] = {c, c, c, c, c, c, c, c};
4195                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
4196         }
4197
4198         UShort8::UShort8(unsigned short c0, unsigned short c1, unsigned short c2, unsigned short c3, unsigned short c4, unsigned short c5, unsigned short c6, unsigned short c7)
4199         {
4200                 int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7};
4201                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
4202         }
4203
4204         UShort8::UShort8(RValue<UShort8> rhs)
4205         {
4206                 storeValue(rhs.value);
4207         }
4208
4209         UShort8::UShort8(const Reference<UShort8> &rhs)
4210         {
4211                 Value *value = rhs.loadValue();
4212                 storeValue(value);
4213         }
4214
4215         UShort8::UShort8(RValue<UShort4> lo, RValue<UShort4> hi)
4216         {
4217                 int shuffle[8] = {0, 1, 2, 3, 8, 9, 10, 11};   // Real type is v8i16
4218                 Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
4219
4220                 storeValue(packed);
4221         }
4222
4223         RValue<UShort8> UShort8::operator=(RValue<UShort8> rhs)
4224         {
4225                 storeValue(rhs.value);
4226
4227                 return rhs;
4228         }
4229
4230         RValue<UShort8> UShort8::operator=(const UShort8 &rhs)
4231         {
4232                 Value *value = rhs.loadValue();
4233                 storeValue(value);
4234
4235                 return RValue<UShort8>(value);
4236         }
4237
4238         RValue<UShort8> UShort8::operator=(const Reference<UShort8> &rhs)
4239         {
4240                 Value *value = rhs.loadValue();
4241                 storeValue(value);
4242
4243                 return RValue<UShort8>(value);
4244         }
4245
4246         RValue<UShort8> operator&(RValue<UShort8> lhs, RValue<UShort8> rhs)
4247         {
4248                 return RValue<UShort8>(Nucleus::createAnd(lhs.value, rhs.value));
4249         }
4250
4251         RValue<UShort> Extract(RValue<UShort8> val, int i)
4252         {
4253                 return RValue<UShort>(Nucleus::createExtractElement(val.value, UShort::getType(), i));
4254         }
4255
4256         RValue<UShort8> Insert(RValue<UShort8> val, RValue<UShort> element, int i)
4257         {
4258                 return RValue<UShort8>(Nucleus::createInsertElement(val.value, element.value, i));
4259         }
4260
4261         RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
4262         {
4263                 if(emulateIntrinsics)
4264                 {
4265                         UShort8 result;
4266                         result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
4267                         result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
4268                         result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
4269                         result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
4270                         result = Insert(result, Extract(lhs, 4) << UShort(rhs), 4);
4271                         result = Insert(result, Extract(lhs, 5) << UShort(rhs), 5);
4272                         result = Insert(result, Extract(lhs, 6) << UShort(rhs), 6);
4273                         result = Insert(result, Extract(lhs, 7) << UShort(rhs), 7);
4274
4275                         return result;
4276                 }
4277                 else
4278                 {
4279                         return RValue<UShort8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
4280                 }
4281         }
4282
4283         RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
4284         {
4285                 if(emulateIntrinsics)
4286                 {
4287                         UShort8 result;
4288                         result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
4289                         result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
4290                         result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
4291                         result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
4292                         result = Insert(result, Extract(lhs, 4) >> UShort(rhs), 4);
4293                         result = Insert(result, Extract(lhs, 5) >> UShort(rhs), 5);
4294                         result = Insert(result, Extract(lhs, 6) >> UShort(rhs), 6);
4295                         result = Insert(result, Extract(lhs, 7) >> UShort(rhs), 7);
4296
4297                         return result;
4298                 }
4299                 else
4300                 {
4301                         return RValue<UShort8>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
4302                 }
4303         }
4304
4305         RValue<UShort8> operator+(RValue<UShort8> lhs, RValue<UShort8> rhs)
4306         {
4307                 return RValue<UShort8>(Nucleus::createAdd(lhs.value, rhs.value));
4308         }
4309
4310         RValue<UShort8> operator*(RValue<UShort8> lhs, RValue<UShort8> rhs)
4311         {
4312                 return RValue<UShort8>(Nucleus::createMul(lhs.value, rhs.value));
4313         }
4314
4315         RValue<UShort8> operator+=(UShort8 &lhs, RValue<UShort8> rhs)
4316         {
4317                 return lhs = lhs + rhs;
4318         }
4319
4320         RValue<UShort8> operator~(RValue<UShort8> val)
4321         {
4322                 return RValue<UShort8>(Nucleus::createNot(val.value));
4323         }
4324
4325         RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7)
4326         {
4327                 assert(false && "UNIMPLEMENTED"); return RValue<UShort8>(V(nullptr));
4328         }
4329
4330         RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
4331         {
4332                 assert(false && "UNIMPLEMENTED"); return RValue<UShort8>(V(nullptr));
4333         }
4334
4335         // FIXME: Implement as Shuffle(x, y, Select(i0, ..., i16)) and Shuffle(x, y, SELECT_PACK_REPEAT(element))
4336 //      RValue<UShort8> PackRepeat(RValue<Byte16> x, RValue<Byte16> y, int element)
4337 //      {
4338 //              assert(false && "UNIMPLEMENTED"); return RValue<UShort8>(V(nullptr));
4339 //      }
4340
4341         Type *UShort8::getType()
4342         {
4343                 return T(Ice::IceType_v8i16);
4344         }
4345
4346         Int::Int(Argument<Int> argument)
4347         {
4348                 storeValue(argument.value);
4349         }
4350
4351         Int::Int(RValue<Byte> cast)
4352         {
4353                 Value *integer = Nucleus::createZExt(cast.value, Int::getType());
4354
4355                 storeValue(integer);
4356         }
4357
4358         Int::Int(RValue<SByte> cast)
4359         {
4360                 Value *integer = Nucleus::createSExt(cast.value, Int::getType());
4361
4362                 storeValue(integer);
4363         }
4364
4365         Int::Int(RValue<Short> cast)
4366         {
4367                 Value *integer = Nucleus::createSExt(cast.value, Int::getType());
4368
4369                 storeValue(integer);
4370         }
4371
4372         Int::Int(RValue<UShort> cast)
4373         {
4374                 Value *integer = Nucleus::createZExt(cast.value, Int::getType());
4375
4376                 storeValue(integer);
4377         }
4378
4379         Int::Int(RValue<Int2> cast)
4380         {
4381                 *this = Extract(cast, 0);
4382         }
4383
4384         Int::Int(RValue<Long> cast)
4385         {
4386                 Value *integer = Nucleus::createTrunc(cast.value, Int::getType());
4387
4388                 storeValue(integer);
4389         }
4390
4391         Int::Int(RValue<Float> cast)
4392         {
4393                 Value *integer = Nucleus::createFPToSI(cast.value, Int::getType());
4394
4395                 storeValue(integer);
4396         }
4397
4398         Int::Int(int x)
4399         {
4400                 storeValue(Nucleus::createConstantInt(x));
4401         }
4402
4403         Int::Int(RValue<Int> rhs)
4404         {
4405                 storeValue(rhs.value);
4406         }
4407
4408         Int::Int(RValue<UInt> rhs)
4409         {
4410                 storeValue(rhs.value);
4411         }
4412
4413         Int::Int(const Int &rhs)
4414         {
4415                 Value *value = rhs.loadValue();
4416                 storeValue(value);
4417         }
4418
4419         Int::Int(const Reference<Int> &rhs)
4420         {
4421                 Value *value = rhs.loadValue();
4422                 storeValue(value);
4423         }
4424
4425         Int::Int(const UInt &rhs)
4426         {
4427                 Value *value = rhs.loadValue();
4428                 storeValue(value);
4429         }
4430
4431         Int::Int(const Reference<UInt> &rhs)
4432         {
4433                 Value *value = rhs.loadValue();
4434                 storeValue(value);
4435         }
4436
4437         RValue<Int> Int::operator=(int rhs)
4438         {
4439                 return RValue<Int>(storeValue(Nucleus::createConstantInt(rhs)));
4440         }
4441
4442         RValue<Int> Int::operator=(RValue<Int> rhs)
4443         {
4444                 storeValue(rhs.value);
4445
4446                 return rhs;
4447         }
4448
4449         RValue<Int> Int::operator=(RValue<UInt> rhs)
4450         {
4451                 storeValue(rhs.value);
4452
4453                 return RValue<Int>(rhs);
4454         }
4455
4456         RValue<Int> Int::operator=(const Int &rhs)
4457         {
4458                 Value *value = rhs.loadValue();
4459                 storeValue(value);
4460
4461                 return RValue<Int>(value);
4462         }
4463
4464         RValue<Int> Int::operator=(const Reference<Int> &rhs)
4465         {
4466                 Value *value = rhs.loadValue();
4467                 storeValue(value);
4468
4469                 return RValue<Int>(value);
4470         }
4471
4472         RValue<Int> Int::operator=(const UInt &rhs)
4473         {
4474                 Value *value = rhs.loadValue();
4475                 storeValue(value);
4476
4477                 return RValue<Int>(value);
4478         }
4479
4480         RValue<Int> Int::operator=(const Reference<UInt> &rhs)
4481         {
4482                 Value *value = rhs.loadValue();
4483                 storeValue(value);
4484
4485                 return RValue<Int>(value);
4486         }
4487
4488         RValue<Int> operator+(RValue<Int> lhs, RValue<Int> rhs)
4489         {
4490                 return RValue<Int>(Nucleus::createAdd(lhs.value, rhs.value));
4491         }
4492
4493         RValue<Int> operator-(RValue<Int> lhs, RValue<Int> rhs)
4494         {
4495                 return RValue<Int>(Nucleus::createSub(lhs.value, rhs.value));
4496         }
4497
4498         RValue<Int> operator*(RValue<Int> lhs, RValue<Int> rhs)
4499         {
4500                 return RValue<Int>(Nucleus::createMul(lhs.value, rhs.value));
4501         }
4502
4503         RValue<Int> operator/(RValue<Int> lhs, RValue<Int> rhs)
4504         {
4505                 return RValue<Int>(Nucleus::createSDiv(lhs.value, rhs.value));
4506         }
4507
4508         RValue<Int> operator%(RValue<Int> lhs, RValue<Int> rhs)
4509         {
4510                 return RValue<Int>(Nucleus::createSRem(lhs.value, rhs.value));
4511         }
4512
4513         RValue<Int> operator&(RValue<Int> lhs, RValue<Int> rhs)
4514         {
4515                 return RValue<Int>(Nucleus::createAnd(lhs.value, rhs.value));
4516         }
4517
4518         RValue<Int> operator|(RValue<Int> lhs, RValue<Int> rhs)
4519         {
4520                 return RValue<Int>(Nucleus::createOr(lhs.value, rhs.value));
4521         }
4522
4523         RValue<Int> operator^(RValue<Int> lhs, RValue<Int> rhs)
4524         {
4525                 return RValue<Int>(Nucleus::createXor(lhs.value, rhs.value));
4526         }
4527
4528         RValue<Int> operator<<(RValue<Int> lhs, RValue<Int> rhs)
4529         {
4530                 return RValue<Int>(Nucleus::createShl(lhs.value, rhs.value));
4531         }
4532
4533         RValue<Int> operator>>(RValue<Int> lhs, RValue<Int> rhs)
4534         {
4535                 return RValue<Int>(Nucleus::createAShr(lhs.value, rhs.value));
4536         }
4537
4538         RValue<Int> operator+=(Int &lhs, RValue<Int> rhs)
4539         {
4540                 return lhs = lhs + rhs;
4541         }
4542
4543         RValue<Int> operator-=(Int &lhs, RValue<Int> rhs)
4544         {
4545                 return lhs = lhs - rhs;
4546         }
4547
4548         RValue<Int> operator*=(Int &lhs, RValue<Int> rhs)
4549         {
4550                 return lhs = lhs * rhs;
4551         }
4552
4553         RValue<Int> operator/=(Int &lhs, RValue<Int> rhs)
4554         {
4555                 return lhs = lhs / rhs;
4556         }
4557
4558         RValue<Int> operator%=(Int &lhs, RValue<Int> rhs)
4559         {
4560                 return lhs = lhs % rhs;
4561         }
4562
4563         RValue<Int> operator&=(Int &lhs, RValue<Int> rhs)
4564         {
4565                 return lhs = lhs & rhs;
4566         }
4567
4568         RValue<Int> operator|=(Int &lhs, RValue<Int> rhs)
4569         {
4570                 return lhs = lhs | rhs;
4571         }
4572
4573         RValue<Int> operator^=(Int &lhs, RValue<Int> rhs)
4574         {
4575                 return lhs = lhs ^ rhs;
4576         }
4577
4578         RValue<Int> operator<<=(Int &lhs, RValue<Int> rhs)
4579         {
4580                 return lhs = lhs << rhs;
4581         }
4582
4583         RValue<Int> operator>>=(Int &lhs, RValue<Int> rhs)
4584         {
4585                 return lhs = lhs >> rhs;
4586         }
4587
4588         RValue<Int> operator+(RValue<Int> val)
4589         {
4590                 return val;
4591         }
4592
4593         RValue<Int> operator-(RValue<Int> val)
4594         {
4595                 return RValue<Int>(Nucleus::createNeg(val.value));
4596         }
4597
4598         RValue<Int> operator~(RValue<Int> val)
4599         {
4600                 return RValue<Int>(Nucleus::createNot(val.value));
4601         }
4602
4603         RValue<Int> operator++(Int &val, int)   // Post-increment
4604         {
4605                 RValue<Int> res = val;
4606                 val += 1;
4607                 return res;
4608         }
4609
4610         const Int &operator++(Int &val)   // Pre-increment
4611         {
4612                 val += 1;
4613                 return val;
4614         }
4615
4616         RValue<Int> operator--(Int &val, int)   // Post-decrement
4617         {
4618                 RValue<Int> res = val;
4619                 val -= 1;
4620                 return res;
4621         }
4622
4623         const Int &operator--(Int &val)   // Pre-decrement
4624         {
4625                 val -= 1;
4626                 return val;
4627         }
4628
4629         RValue<Bool> operator<(RValue<Int> lhs, RValue<Int> rhs)
4630         {
4631                 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
4632         }
4633
4634         RValue<Bool> operator<=(RValue<Int> lhs, RValue<Int> rhs)
4635         {
4636                 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
4637         }
4638
4639         RValue<Bool> operator>(RValue<Int> lhs, RValue<Int> rhs)
4640         {
4641                 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
4642         }
4643
4644         RValue<Bool> operator>=(RValue<Int> lhs, RValue<Int> rhs)
4645         {
4646                 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
4647         }
4648
4649         RValue<Bool> operator!=(RValue<Int> lhs, RValue<Int> rhs)
4650         {
4651                 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
4652         }
4653
4654         RValue<Bool> operator==(RValue<Int> lhs, RValue<Int> rhs)
4655         {
4656                 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
4657         }
4658
4659         RValue<Int> Max(RValue<Int> x, RValue<Int> y)
4660         {
4661                 return IfThenElse(x > y, x, y);
4662         }
4663
4664         RValue<Int> Min(RValue<Int> x, RValue<Int> y)
4665         {
4666                 return IfThenElse(x < y, x, y);
4667         }
4668
4669         RValue<Int> Clamp(RValue<Int> x, RValue<Int> min, RValue<Int> max)
4670         {
4671                 return Min(Max(x, min), max);
4672         }
4673
4674         RValue<Int> RoundInt(RValue<Float> cast)
4675         {
4676                 if(emulateIntrinsics)
4677                 {
4678                         // Push the fractional part off the mantissa. Accurate up to +/-2^22.
4679                         return Int((cast + Float(0x00C00000)) - Float(0x00C00000));
4680                 }
4681                 else
4682                 {
4683                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
4684                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
4685                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
4686                         auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
4687                         nearbyint->addArg(cast.value);
4688                         ::basicBlock->appendInst(nearbyint);
4689
4690                         return RValue<Int>(V(result));
4691                 }
4692         }
4693
4694         Type *Int::getType()
4695         {
4696                 return T(Ice::IceType_i32);
4697         }
4698
4699         Long::Long(RValue<Int> cast)
4700         {
4701                 Value *integer = Nucleus::createSExt(cast.value, Long::getType());
4702
4703                 storeValue(integer);
4704         }
4705
4706         Long::Long(RValue<UInt> cast)
4707         {
4708                 Value *integer = Nucleus::createZExt(cast.value, Long::getType());
4709
4710                 storeValue(integer);
4711         }
4712
4713         Long::Long(RValue<Long> rhs)
4714         {
4715                 storeValue(rhs.value);
4716         }
4717
4718         RValue<Long> Long::operator=(int64_t rhs)
4719         {
4720                 return RValue<Long>(storeValue(Nucleus::createConstantLong(rhs)));
4721         }
4722
4723         RValue<Long> Long::operator=(RValue<Long> rhs)
4724         {
4725                 storeValue(rhs.value);
4726
4727                 return rhs;
4728         }
4729
4730         RValue<Long> Long::operator=(const Long &rhs)
4731         {
4732                 Value *value = rhs.loadValue();
4733                 storeValue(value);
4734
4735                 return RValue<Long>(value);
4736         }
4737
4738         RValue<Long> Long::operator=(const Reference<Long> &rhs)
4739         {
4740                 Value *value = rhs.loadValue();
4741                 storeValue(value);
4742
4743                 return RValue<Long>(value);
4744         }
4745
4746         RValue<Long> operator+(RValue<Long> lhs, RValue<Long> rhs)
4747         {
4748                 return RValue<Long>(Nucleus::createAdd(lhs.value, rhs.value));
4749         }
4750
4751         RValue<Long> operator-(RValue<Long> lhs, RValue<Long> rhs)
4752         {
4753                 return RValue<Long>(Nucleus::createSub(lhs.value, rhs.value));
4754         }
4755
4756         RValue<Long> operator+=(Long &lhs, RValue<Long> rhs)
4757         {
4758                 return lhs = lhs + rhs;
4759         }
4760
4761         RValue<Long> operator-=(Long &lhs, RValue<Long> rhs)
4762         {
4763                 return lhs = lhs - rhs;
4764         }
4765
4766         RValue<Long> AddAtomic(RValue<Pointer<Long> > x, RValue<Long> y)
4767         {
4768                 return RValue<Long>(Nucleus::createAtomicAdd(x.value, y.value));
4769         }
4770
4771         Type *Long::getType()
4772         {
4773                 return T(Ice::IceType_i64);
4774         }
4775
4776         UInt::UInt(Argument<UInt> argument)
4777         {
4778                 storeValue(argument.value);
4779         }
4780
4781         UInt::UInt(RValue<UShort> cast)
4782         {
4783                 Value *integer = Nucleus::createZExt(cast.value, UInt::getType());
4784
4785                 storeValue(integer);
4786         }
4787
4788         UInt::UInt(RValue<Long> cast)
4789         {
4790                 Value *integer = Nucleus::createTrunc(cast.value, UInt::getType());
4791
4792                 storeValue(integer);
4793         }
4794
4795         UInt::UInt(RValue<Float> cast)
4796         {
4797                 // Smallest positive value representable in UInt, but not in Int
4798                 const unsigned int ustart = 0x80000000u;
4799                 const float ustartf = float(ustart);
4800
4801                 // If the value is negative, store 0, otherwise store the result of the conversion
4802                 storeValue((~(As<Int>(cast) >> 31) &
4803                 // Check if the value can be represented as an Int
4804                         IfThenElse(cast >= ustartf,
4805                 // If the value is too large, subtract ustart and re-add it after conversion.
4806                                 As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)),
4807                 // Otherwise, just convert normally
4808                                 Int(cast))).value);
4809         }
4810
4811         UInt::UInt(int x)
4812         {
4813                 storeValue(Nucleus::createConstantInt(x));
4814         }
4815
4816         UInt::UInt(unsigned int x)
4817         {
4818                 storeValue(Nucleus::createConstantInt(x));
4819         }
4820
4821         UInt::UInt(RValue<UInt> rhs)
4822         {
4823                 storeValue(rhs.value);
4824         }
4825
4826         UInt::UInt(RValue<Int> rhs)
4827         {
4828                 storeValue(rhs.value);
4829         }
4830
4831         UInt::UInt(const UInt &rhs)
4832         {
4833                 Value *value = rhs.loadValue();
4834                 storeValue(value);
4835         }
4836
4837         UInt::UInt(const Reference<UInt> &rhs)
4838         {
4839                 Value *value = rhs.loadValue();
4840                 storeValue(value);
4841         }
4842
4843         UInt::UInt(const Int &rhs)
4844         {
4845                 Value *value = rhs.loadValue();
4846                 storeValue(value);
4847         }
4848
4849         UInt::UInt(const Reference<Int> &rhs)
4850         {
4851                 Value *value = rhs.loadValue();
4852                 storeValue(value);
4853         }
4854
4855         RValue<UInt> UInt::operator=(unsigned int rhs)
4856         {
4857                 return RValue<UInt>(storeValue(Nucleus::createConstantInt(rhs)));
4858         }
4859
4860         RValue<UInt> UInt::operator=(RValue<UInt> rhs)
4861         {
4862                 storeValue(rhs.value);
4863
4864                 return rhs;
4865         }
4866
4867         RValue<UInt> UInt::operator=(RValue<Int> rhs)
4868         {
4869                 storeValue(rhs.value);
4870
4871                 return RValue<UInt>(rhs);
4872         }
4873
4874         RValue<UInt> UInt::operator=(const UInt &rhs)
4875         {
4876                 Value *value = rhs.loadValue();
4877                 storeValue(value);
4878
4879                 return RValue<UInt>(value);
4880         }
4881
4882         RValue<UInt> UInt::operator=(const Reference<UInt> &rhs)
4883         {
4884                 Value *value = rhs.loadValue();
4885                 storeValue(value);
4886
4887                 return RValue<UInt>(value);
4888         }
4889
4890         RValue<UInt> UInt::operator=(const Int &rhs)
4891         {
4892                 Value *value = rhs.loadValue();
4893                 storeValue(value);
4894
4895                 return RValue<UInt>(value);
4896         }
4897
4898         RValue<UInt> UInt::operator=(const Reference<Int> &rhs)
4899         {
4900                 Value *value = rhs.loadValue();
4901                 storeValue(value);
4902
4903                 return RValue<UInt>(value);
4904         }
4905
4906         RValue<UInt> operator+(RValue<UInt> lhs, RValue<UInt> rhs)
4907         {
4908                 return RValue<UInt>(Nucleus::createAdd(lhs.value, rhs.value));
4909         }
4910
4911         RValue<UInt> operator-(RValue<UInt> lhs, RValue<UInt> rhs)
4912         {
4913                 return RValue<UInt>(Nucleus::createSub(lhs.value, rhs.value));
4914         }
4915
4916         RValue<UInt> operator*(RValue<UInt> lhs, RValue<UInt> rhs)
4917         {
4918                 return RValue<UInt>(Nucleus::createMul(lhs.value, rhs.value));
4919         }
4920
4921         RValue<UInt> operator/(RValue<UInt> lhs, RValue<UInt> rhs)
4922         {
4923                 return RValue<UInt>(Nucleus::createUDiv(lhs.value, rhs.value));
4924         }
4925
4926         RValue<UInt> operator%(RValue<UInt> lhs, RValue<UInt> rhs)
4927         {
4928                 return RValue<UInt>(Nucleus::createURem(lhs.value, rhs.value));
4929         }
4930
4931         RValue<UInt> operator&(RValue<UInt> lhs, RValue<UInt> rhs)
4932         {
4933                 return RValue<UInt>(Nucleus::createAnd(lhs.value, rhs.value));
4934         }
4935
4936         RValue<UInt> operator|(RValue<UInt> lhs, RValue<UInt> rhs)
4937         {
4938                 return RValue<UInt>(Nucleus::createOr(lhs.value, rhs.value));
4939         }
4940
4941         RValue<UInt> operator^(RValue<UInt> lhs, RValue<UInt> rhs)
4942         {
4943                 return RValue<UInt>(Nucleus::createXor(lhs.value, rhs.value));
4944         }
4945
4946         RValue<UInt> operator<<(RValue<UInt> lhs, RValue<UInt> rhs)
4947         {
4948                 return RValue<UInt>(Nucleus::createShl(lhs.value, rhs.value));
4949         }
4950
4951         RValue<UInt> operator>>(RValue<UInt> lhs, RValue<UInt> rhs)
4952         {
4953                 return RValue<UInt>(Nucleus::createLShr(lhs.value, rhs.value));
4954         }
4955
4956         RValue<UInt> operator+=(UInt &lhs, RValue<UInt> rhs)
4957         {
4958                 return lhs = lhs + rhs;
4959         }
4960
4961         RValue<UInt> operator-=(UInt &lhs, RValue<UInt> rhs)
4962         {
4963                 return lhs = lhs - rhs;
4964         }
4965
4966         RValue<UInt> operator*=(UInt &lhs, RValue<UInt> rhs)
4967         {
4968                 return lhs = lhs * rhs;
4969         }
4970
4971         RValue<UInt> operator/=(UInt &lhs, RValue<UInt> rhs)
4972         {
4973                 return lhs = lhs / rhs;
4974         }
4975
4976         RValue<UInt> operator%=(UInt &lhs, RValue<UInt> rhs)
4977         {
4978                 return lhs = lhs % rhs;
4979         }
4980
4981         RValue<UInt> operator&=(UInt &lhs, RValue<UInt> rhs)
4982         {
4983                 return lhs = lhs & rhs;
4984         }
4985
4986         RValue<UInt> operator|=(UInt &lhs, RValue<UInt> rhs)
4987         {
4988                 return lhs = lhs | rhs;
4989         }
4990
4991         RValue<UInt> operator^=(UInt &lhs, RValue<UInt> rhs)
4992         {
4993                 return lhs = lhs ^ rhs;
4994         }
4995
4996         RValue<UInt> operator<<=(UInt &lhs, RValue<UInt> rhs)
4997         {
4998                 return lhs = lhs << rhs;
4999         }
5000
5001         RValue<UInt> operator>>=(UInt &lhs, RValue<UInt> rhs)
5002         {
5003                 return lhs = lhs >> rhs;
5004         }
5005
5006         RValue<UInt> operator+(RValue<UInt> val)
5007         {
5008                 return val;
5009         }
5010
5011         RValue<UInt> operator-(RValue<UInt> val)
5012         {
5013                 return RValue<UInt>(Nucleus::createNeg(val.value));
5014         }
5015
5016         RValue<UInt> operator~(RValue<UInt> val)
5017         {
5018                 return RValue<UInt>(Nucleus::createNot(val.value));
5019         }
5020
5021         RValue<UInt> operator++(UInt &val, int)   // Post-increment
5022         {
5023                 RValue<UInt> res = val;
5024                 val += 1;
5025                 return res;
5026         }
5027
5028         const UInt &operator++(UInt &val)   // Pre-increment
5029         {
5030                 val += 1;
5031                 return val;
5032         }
5033
5034         RValue<UInt> operator--(UInt &val, int)   // Post-decrement
5035         {
5036                 RValue<UInt> res = val;
5037                 val -= 1;
5038                 return res;
5039         }
5040
5041         const UInt &operator--(UInt &val)   // Pre-decrement
5042         {
5043                 val -= 1;
5044                 return val;
5045         }
5046
5047         RValue<UInt> Max(RValue<UInt> x, RValue<UInt> y)
5048         {
5049                 return IfThenElse(x > y, x, y);
5050         }
5051
5052         RValue<UInt> Min(RValue<UInt> x, RValue<UInt> y)
5053         {
5054                 return IfThenElse(x < y, x, y);
5055         }
5056
5057         RValue<UInt> Clamp(RValue<UInt> x, RValue<UInt> min, RValue<UInt> max)
5058         {
5059                 return Min(Max(x, min), max);
5060         }
5061
5062         RValue<Bool> operator<(RValue<UInt> lhs, RValue<UInt> rhs)
5063         {
5064                 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
5065         }
5066
5067         RValue<Bool> operator<=(RValue<UInt> lhs, RValue<UInt> rhs)
5068         {
5069                 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
5070         }
5071
5072         RValue<Bool> operator>(RValue<UInt> lhs, RValue<UInt> rhs)
5073         {
5074                 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
5075         }
5076
5077         RValue<Bool> operator>=(RValue<UInt> lhs, RValue<UInt> rhs)
5078         {
5079                 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
5080         }
5081
5082         RValue<Bool> operator!=(RValue<UInt> lhs, RValue<UInt> rhs)
5083         {
5084                 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
5085         }
5086
5087         RValue<Bool> operator==(RValue<UInt> lhs, RValue<UInt> rhs)
5088         {
5089                 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
5090         }
5091
5092 //      RValue<UInt> RoundUInt(RValue<Float> cast)
5093 //      {
5094 //              assert(false && "UNIMPLEMENTED"); return RValue<UInt>(V(nullptr));
5095 //      }
5096
5097         Type *UInt::getType()
5098         {
5099                 return T(Ice::IceType_i32);
5100         }
5101
5102 //      Int2::Int2(RValue<Int> cast)
5103 //      {
5104 //              Value *extend = Nucleus::createZExt(cast.value, Long::getType());
5105 //              Value *vector = Nucleus::createBitCast(extend, Int2::getType());
5106 //
5107 //              Constant *shuffle[2];
5108 //              shuffle[0] = Nucleus::createConstantInt(0);
5109 //              shuffle[1] = Nucleus::createConstantInt(0);
5110 //
5111 //              Value *replicate = Nucleus::createShuffleVector(vector, UndefValue::get(Int2::getType()), Nucleus::createConstantVector(shuffle, 2));
5112 //
5113 //              storeValue(replicate);
5114 //      }
5115
5116         Int2::Int2(RValue<Int4> cast)
5117         {
5118                 storeValue(Nucleus::createBitCast(cast.value, getType()));
5119         }
5120
5121         Int2::Int2(int x, int y)
5122         {
5123                 int64_t constantVector[2] = {x, y};
5124                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
5125         }
5126
5127         Int2::Int2(RValue<Int2> rhs)
5128         {
5129                 storeValue(rhs.value);
5130         }
5131
5132         Int2::Int2(const Int2 &rhs)
5133         {
5134                 Value *value = rhs.loadValue();
5135                 storeValue(value);
5136         }
5137
5138         Int2::Int2(const Reference<Int2> &rhs)
5139         {
5140                 Value *value = rhs.loadValue();
5141                 storeValue(value);
5142         }
5143
5144         Int2::Int2(RValue<Int> lo, RValue<Int> hi)
5145         {
5146                 int shuffle[4] = {0, 4, 1, 5};
5147                 Value *packed = Nucleus::createShuffleVector(Int4(lo).loadValue(), Int4(hi).loadValue(), shuffle);
5148
5149                 storeValue(Nucleus::createBitCast(packed, Int2::getType()));
5150         }
5151
5152         RValue<Int2> Int2::operator=(RValue<Int2> rhs)
5153         {
5154                 storeValue(rhs.value);
5155
5156                 return rhs;
5157         }
5158
5159         RValue<Int2> Int2::operator=(const Int2 &rhs)
5160         {
5161                 Value *value = rhs.loadValue();
5162                 storeValue(value);
5163
5164                 return RValue<Int2>(value);
5165         }
5166
5167         RValue<Int2> Int2::operator=(const Reference<Int2> &rhs)
5168         {
5169                 Value *value = rhs.loadValue();
5170                 storeValue(value);
5171
5172                 return RValue<Int2>(value);
5173         }
5174
5175         RValue<Int2> operator+(RValue<Int2> lhs, RValue<Int2> rhs)
5176         {
5177                 return RValue<Int2>(Nucleus::createAdd(lhs.value, rhs.value));
5178         }
5179
5180         RValue<Int2> operator-(RValue<Int2> lhs, RValue<Int2> rhs)
5181         {
5182                 return RValue<Int2>(Nucleus::createSub(lhs.value, rhs.value));
5183         }
5184
5185 //      RValue<Int2> operator*(RValue<Int2> lhs, RValue<Int2> rhs)
5186 //      {
5187 //              return RValue<Int2>(Nucleus::createMul(lhs.value, rhs.value));
5188 //      }
5189
5190 //      RValue<Int2> operator/(RValue<Int2> lhs, RValue<Int2> rhs)
5191 //      {
5192 //              return RValue<Int2>(Nucleus::createSDiv(lhs.value, rhs.value));
5193 //      }
5194
5195 //      RValue<Int2> operator%(RValue<Int2> lhs, RValue<Int2> rhs)
5196 //      {
5197 //              return RValue<Int2>(Nucleus::createSRem(lhs.value, rhs.value));
5198 //      }
5199
5200         RValue<Int2> operator&(RValue<Int2> lhs, RValue<Int2> rhs)
5201         {
5202                 return RValue<Int2>(Nucleus::createAnd(lhs.value, rhs.value));
5203         }
5204
5205         RValue<Int2> operator|(RValue<Int2> lhs, RValue<Int2> rhs)
5206         {
5207                 return RValue<Int2>(Nucleus::createOr(lhs.value, rhs.value));
5208         }
5209
5210         RValue<Int2> operator^(RValue<Int2> lhs, RValue<Int2> rhs)
5211         {
5212                 return RValue<Int2>(Nucleus::createXor(lhs.value, rhs.value));
5213         }
5214
5215         RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
5216         {
5217                 if(emulateIntrinsics)
5218                 {
5219                         Int2 result;
5220                         result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
5221                         result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
5222
5223                         return result;
5224                 }
5225                 else
5226                 {
5227                         return RValue<Int2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
5228                 }
5229         }
5230
5231         RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
5232         {
5233                 if(emulateIntrinsics)
5234                 {
5235                         Int2 result;
5236                         result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
5237                         result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
5238
5239                         return result;
5240                 }
5241                 else
5242                 {
5243                         return RValue<Int2>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
5244                 }
5245         }
5246
5247         RValue<Int2> operator+=(Int2 &lhs, RValue<Int2> rhs)
5248         {
5249                 return lhs = lhs + rhs;
5250         }
5251
5252         RValue<Int2> operator-=(Int2 &lhs, RValue<Int2> rhs)
5253         {
5254                 return lhs = lhs - rhs;
5255         }
5256
5257 //      RValue<Int2> operator*=(Int2 &lhs, RValue<Int2> rhs)
5258 //      {
5259 //              return lhs = lhs * rhs;
5260 //      }
5261
5262 //      RValue<Int2> operator/=(Int2 &lhs, RValue<Int2> rhs)
5263 //      {
5264 //              return lhs = lhs / rhs;
5265 //      }
5266
5267 //      RValue<Int2> operator%=(Int2 &lhs, RValue<Int2> rhs)
5268 //      {
5269 //              return lhs = lhs % rhs;
5270 //      }
5271
5272         RValue<Int2> operator&=(Int2 &lhs, RValue<Int2> rhs)
5273         {
5274                 return lhs = lhs & rhs;
5275         }
5276
5277         RValue<Int2> operator|=(Int2 &lhs, RValue<Int2> rhs)
5278         {
5279                 return lhs = lhs | rhs;
5280         }
5281
5282         RValue<Int2> operator^=(Int2 &lhs, RValue<Int2> rhs)
5283         {
5284                 return lhs = lhs ^ rhs;
5285         }
5286
5287         RValue<Int2> operator<<=(Int2 &lhs, unsigned char rhs)
5288         {
5289                 return lhs = lhs << rhs;
5290         }
5291
5292         RValue<Int2> operator>>=(Int2 &lhs, unsigned char rhs)
5293         {
5294                 return lhs = lhs >> rhs;
5295         }
5296
5297 //      RValue<Int2> operator+(RValue<Int2> val)
5298 //      {
5299 //              return val;
5300 //      }
5301
5302 //      RValue<Int2> operator-(RValue<Int2> val)
5303 //      {
5304 //              return RValue<Int2>(Nucleus::createNeg(val.value));
5305 //      }
5306
5307         RValue<Int2> operator~(RValue<Int2> val)
5308         {
5309                 return RValue<Int2>(Nucleus::createNot(val.value));
5310         }
5311
5312         RValue<Short4> UnpackLow(RValue<Int2> x, RValue<Int2> y)
5313         {
5314                 int shuffle[4] = {0, 4, 1, 5};   // Real type is v4i32
5315                 return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
5316         }
5317
5318         RValue<Short4> UnpackHigh(RValue<Int2> x, RValue<Int2> y)
5319         {
5320                 int shuffle[4] = {0, 4, 1, 5};   // Real type is v4i32
5321                 auto lowHigh = RValue<Int4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
5322                 return As<Short4>(Swizzle(lowHigh, 0xEE));
5323         }
5324
5325         RValue<Int> Extract(RValue<Int2> val, int i)
5326         {
5327                 return RValue<Int>(Nucleus::createExtractElement(val.value, Int::getType(), i));
5328         }
5329
5330         RValue<Int2> Insert(RValue<Int2> val, RValue<Int> element, int i)
5331         {
5332                 return RValue<Int2>(Nucleus::createInsertElement(val.value, element.value, i));
5333         }
5334
5335         Type *Int2::getType()
5336         {
5337                 return T(Type_v2i32);
5338         }
5339
5340         UInt2::UInt2(unsigned int x, unsigned int y)
5341         {
5342                 int64_t constantVector[2] = {x, y};
5343                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
5344         }
5345
5346         UInt2::UInt2(RValue<UInt2> rhs)
5347         {
5348                 storeValue(rhs.value);
5349         }
5350
5351         UInt2::UInt2(const UInt2 &rhs)
5352         {
5353                 Value *value = rhs.loadValue();
5354                 storeValue(value);
5355         }
5356
5357         UInt2::UInt2(const Reference<UInt2> &rhs)
5358         {
5359                 Value *value = rhs.loadValue();
5360                 storeValue(value);
5361         }
5362
5363         RValue<UInt2> UInt2::operator=(RValue<UInt2> rhs)
5364         {
5365                 storeValue(rhs.value);
5366
5367                 return rhs;
5368         }
5369
5370         RValue<UInt2> UInt2::operator=(const UInt2 &rhs)
5371         {
5372                 Value *value = rhs.loadValue();
5373                 storeValue(value);
5374
5375                 return RValue<UInt2>(value);
5376         }
5377
5378         RValue<UInt2> UInt2::operator=(const Reference<UInt2> &rhs)
5379         {
5380                 Value *value = rhs.loadValue();
5381                 storeValue(value);
5382
5383                 return RValue<UInt2>(value);
5384         }
5385
5386         RValue<UInt2> operator+(RValue<UInt2> lhs, RValue<UInt2> rhs)
5387         {
5388                 return RValue<UInt2>(Nucleus::createAdd(lhs.value, rhs.value));
5389         }
5390
5391         RValue<UInt2> operator-(RValue<UInt2> lhs, RValue<UInt2> rhs)
5392         {
5393                 return RValue<UInt2>(Nucleus::createSub(lhs.value, rhs.value));
5394         }
5395
5396 //      RValue<UInt2> operator*(RValue<UInt2> lhs, RValue<UInt2> rhs)
5397 //      {
5398 //              return RValue<UInt2>(Nucleus::createMul(lhs.value, rhs.value));
5399 //      }
5400
5401 //      RValue<UInt2> operator/(RValue<UInt2> lhs, RValue<UInt2> rhs)
5402 //      {
5403 //              return RValue<UInt2>(Nucleus::createUDiv(lhs.value, rhs.value));
5404 //      }
5405
5406 //      RValue<UInt2> operator%(RValue<UInt2> lhs, RValue<UInt2> rhs)
5407 //      {
5408 //              return RValue<UInt2>(Nucleus::createURem(lhs.value, rhs.value));
5409 //      }
5410
5411         RValue<UInt2> operator&(RValue<UInt2> lhs, RValue<UInt2> rhs)
5412         {
5413                 return RValue<UInt2>(Nucleus::createAnd(lhs.value, rhs.value));
5414         }
5415
5416         RValue<UInt2> operator|(RValue<UInt2> lhs, RValue<UInt2> rhs)
5417         {
5418                 return RValue<UInt2>(Nucleus::createOr(lhs.value, rhs.value));
5419         }
5420
5421         RValue<UInt2> operator^(RValue<UInt2> lhs, RValue<UInt2> rhs)
5422         {
5423                 return RValue<UInt2>(Nucleus::createXor(lhs.value, rhs.value));
5424         }
5425
5426         RValue<UInt> Extract(RValue<UInt2> val, int i)
5427         {
5428                 return RValue<UInt>(Nucleus::createExtractElement(val.value, UInt::getType(), i));
5429         }
5430
5431         RValue<UInt2> Insert(RValue<UInt2> val, RValue<UInt> element, int i)
5432         {
5433                 return RValue<UInt2>(Nucleus::createInsertElement(val.value, element.value, i));
5434         }
5435
5436         RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
5437         {
5438                 if(emulateIntrinsics)
5439                 {
5440                         UInt2 result;
5441                         result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
5442                         result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
5443
5444                         return result;
5445                 }
5446                 else
5447                 {
5448                         return RValue<UInt2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
5449                 }
5450         }
5451
5452         RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
5453         {
5454                 if(emulateIntrinsics)
5455                 {
5456                         UInt2 result;
5457                         result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
5458                         result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
5459
5460                         return result;
5461                 }
5462                 else
5463                 {
5464                         return RValue<UInt2>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
5465                 }
5466         }
5467
5468         RValue<UInt2> operator+=(UInt2 &lhs, RValue<UInt2> rhs)
5469         {
5470                 return lhs = lhs + rhs;
5471         }
5472
5473         RValue<UInt2> operator-=(UInt2 &lhs, RValue<UInt2> rhs)
5474         {
5475                 return lhs = lhs - rhs;
5476         }
5477
5478 //      RValue<UInt2> operator*=(UInt2 &lhs, RValue<UInt2> rhs)
5479 //      {
5480 //              return lhs = lhs * rhs;
5481 //      }
5482
5483 //      RValue<UInt2> operator/=(UInt2 &lhs, RValue<UInt2> rhs)
5484 //      {
5485 //              return lhs = lhs / rhs;
5486 //      }
5487
5488 //      RValue<UInt2> operator%=(UInt2 &lhs, RValue<UInt2> rhs)
5489 //      {
5490 //              return lhs = lhs % rhs;
5491 //      }
5492
5493         RValue<UInt2> operator&=(UInt2 &lhs, RValue<UInt2> rhs)
5494         {
5495                 return lhs = lhs & rhs;
5496         }
5497
5498         RValue<UInt2> operator|=(UInt2 &lhs, RValue<UInt2> rhs)
5499         {
5500                 return lhs = lhs | rhs;
5501         }
5502
5503         RValue<UInt2> operator^=(UInt2 &lhs, RValue<UInt2> rhs)
5504         {
5505                 return lhs = lhs ^ rhs;
5506         }
5507
5508         RValue<UInt2> operator<<=(UInt2 &lhs, unsigned char rhs)
5509         {
5510                 return lhs = lhs << rhs;
5511         }
5512
5513         RValue<UInt2> operator>>=(UInt2 &lhs, unsigned char rhs)
5514         {
5515                 return lhs = lhs >> rhs;
5516         }
5517
5518 //      RValue<UInt2> operator+(RValue<UInt2> val)
5519 //      {
5520 //              return val;
5521 //      }
5522
5523 //      RValue<UInt2> operator-(RValue<UInt2> val)
5524 //      {
5525 //              return RValue<UInt2>(Nucleus::createNeg(val.value));
5526 //      }
5527
5528         RValue<UInt2> operator~(RValue<UInt2> val)
5529         {
5530                 return RValue<UInt2>(Nucleus::createNot(val.value));
5531         }
5532
5533         Type *UInt2::getType()
5534         {
5535                 return T(Type_v2i32);
5536         }
5537
5538         Int4::Int4(RValue<Byte4> cast)
5539         {
5540                 Value *x = Nucleus::createBitCast(cast.value, Int::getType());
5541                 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
5542
5543                 Value *e;
5544                 int swizzle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};
5545                 Value *b = Nucleus::createBitCast(a, Byte16::getType());
5546                 Value *c = Nucleus::createShuffleVector(b, V(Nucleus::createNullValue(Byte16::getType())), swizzle);
5547
5548                 int swizzle2[8] = {0, 8, 1, 9, 2, 10, 3, 11};
5549                 Value *d = Nucleus::createBitCast(c, Short8::getType());
5550                 e = Nucleus::createShuffleVector(d, V(Nucleus::createNullValue(Short8::getType())), swizzle2);
5551
5552                 Value *f = Nucleus::createBitCast(e, Int4::getType());
5553                 storeValue(f);
5554         }
5555
5556         Int4::Int4(RValue<SByte4> cast)
5557         {
5558                 Value *x = Nucleus::createBitCast(cast.value, Int::getType());
5559                 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
5560
5561                 int swizzle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7};
5562                 Value *b = Nucleus::createBitCast(a, Byte16::getType());
5563                 Value *c = Nucleus::createShuffleVector(b, b, swizzle);
5564
5565                 int swizzle2[8] = {0, 0, 1, 1, 2, 2, 3, 3};
5566                 Value *d = Nucleus::createBitCast(c, Short8::getType());
5567                 Value *e = Nucleus::createShuffleVector(d, d, swizzle2);
5568
5569                 *this = As<Int4>(e) >> 24;
5570         }
5571
5572         Int4::Int4(RValue<Float4> cast)
5573         {
5574                 Value *xyzw = Nucleus::createFPToSI(cast.value, Int4::getType());
5575
5576                 storeValue(xyzw);
5577         }
5578
5579         Int4::Int4(RValue<Short4> cast)
5580         {
5581                 int swizzle[8] = {0, 0, 1, 1, 2, 2, 3, 3};
5582                 Value *c = Nucleus::createShuffleVector(cast.value, cast.value, swizzle);
5583
5584                 *this = As<Int4>(c) >> 16;
5585         }
5586
5587         Int4::Int4(RValue<UShort4> cast)
5588         {
5589                 int swizzle[8] = {0, 8, 1, 9, 2, 10, 3, 11};
5590                 Value *c = Nucleus::createShuffleVector(cast.value, Short8(0, 0, 0, 0, 0, 0, 0, 0).loadValue(), swizzle);
5591                 Value *d = Nucleus::createBitCast(c, Int4::getType());
5592                 storeValue(d);
5593         }
5594
5595         Int4::Int4(int xyzw)
5596         {
5597                 constant(xyzw, xyzw, xyzw, xyzw);
5598         }
5599
5600         Int4::Int4(int x, int yzw)
5601         {
5602                 constant(x, yzw, yzw, yzw);
5603         }
5604
5605         Int4::Int4(int x, int y, int zw)
5606         {
5607                 constant(x, y, zw, zw);
5608         }
5609
5610         Int4::Int4(int x, int y, int z, int w)
5611         {
5612                 constant(x, y, z, w);
5613         }
5614
5615         void Int4::constant(int x, int y, int z, int w)
5616         {
5617                 int64_t constantVector[4] = {x, y, z, w};
5618                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
5619         }
5620
5621         Int4::Int4(RValue<Int4> rhs)
5622         {
5623                 storeValue(rhs.value);
5624         }
5625
5626         Int4::Int4(const Int4 &rhs)
5627         {
5628                 Value *value = rhs.loadValue();
5629                 storeValue(value);
5630         }
5631
5632         Int4::Int4(const Reference<Int4> &rhs)
5633         {
5634                 Value *value = rhs.loadValue();
5635                 storeValue(value);
5636         }
5637
5638         Int4::Int4(RValue<UInt4> rhs)
5639         {
5640                 storeValue(rhs.value);
5641         }
5642
5643         Int4::Int4(const UInt4 &rhs)
5644         {
5645                 Value *value = rhs.loadValue();
5646                 storeValue(value);
5647         }
5648
5649         Int4::Int4(const Reference<UInt4> &rhs)
5650         {
5651                 Value *value = rhs.loadValue();
5652                 storeValue(value);
5653         }
5654
5655         Int4::Int4(RValue<Int2> lo, RValue<Int2> hi)
5656         {
5657                 int shuffle[4] = {0, 1, 4, 5};   // Real type is v4i32
5658                 Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
5659
5660                 storeValue(packed);
5661         }
5662
5663         Int4::Int4(RValue<Int> rhs)
5664         {
5665                 Value *vector = Nucleus::createBitCast(rhs.value, Int4::getType());
5666
5667                 int swizzle[4] = {0, 0, 0, 0};
5668                 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
5669
5670                 storeValue(replicate);
5671         }
5672
5673         Int4::Int4(const Int &rhs)
5674         {
5675                 *this = RValue<Int>(rhs.loadValue());
5676         }
5677
5678         Int4::Int4(const Reference<Int> &rhs)
5679         {
5680                 *this = RValue<Int>(rhs.loadValue());
5681         }
5682
5683         RValue<Int4> Int4::operator=(RValue<Int4> rhs)
5684         {
5685                 storeValue(rhs.value);
5686
5687                 return rhs;
5688         }
5689
5690         RValue<Int4> Int4::operator=(const Int4 &rhs)
5691         {
5692                 Value *value = rhs.loadValue();
5693                 storeValue(value);
5694
5695                 return RValue<Int4>(value);
5696         }
5697
5698         RValue<Int4> Int4::operator=(const Reference<Int4> &rhs)
5699         {
5700                 Value *value = rhs.loadValue();
5701                 storeValue(value);
5702
5703                 return RValue<Int4>(value);
5704         }
5705
5706         RValue<Int4> operator+(RValue<Int4> lhs, RValue<Int4> rhs)
5707         {
5708                 return RValue<Int4>(Nucleus::createAdd(lhs.value, rhs.value));
5709         }
5710
5711         RValue<Int4> operator-(RValue<Int4> lhs, RValue<Int4> rhs)
5712         {
5713                 return RValue<Int4>(Nucleus::createSub(lhs.value, rhs.value));
5714         }
5715
5716         RValue<Int4> operator*(RValue<Int4> lhs, RValue<Int4> rhs)
5717         {
5718                 return RValue<Int4>(Nucleus::createMul(lhs.value, rhs.value));
5719         }
5720
5721         RValue<Int4> operator/(RValue<Int4> lhs, RValue<Int4> rhs)
5722         {
5723                 return RValue<Int4>(Nucleus::createSDiv(lhs.value, rhs.value));
5724         }
5725
5726         RValue<Int4> operator%(RValue<Int4> lhs, RValue<Int4> rhs)
5727         {
5728                 return RValue<Int4>(Nucleus::createSRem(lhs.value, rhs.value));
5729         }
5730
5731         RValue<Int4> operator&(RValue<Int4> lhs, RValue<Int4> rhs)
5732         {
5733                 return RValue<Int4>(Nucleus::createAnd(lhs.value, rhs.value));
5734         }
5735
5736         RValue<Int4> operator|(RValue<Int4> lhs, RValue<Int4> rhs)
5737         {
5738                 return RValue<Int4>(Nucleus::createOr(lhs.value, rhs.value));
5739         }
5740
5741         RValue<Int4> operator^(RValue<Int4> lhs, RValue<Int4> rhs)
5742         {
5743                 return RValue<Int4>(Nucleus::createXor(lhs.value, rhs.value));
5744         }
5745
5746         RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
5747         {
5748                 if(emulateIntrinsics)
5749                 {
5750                         Int4 result;
5751                         result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
5752                         result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
5753                         result = Insert(result, Extract(lhs, 2) << Int(rhs), 2);
5754                         result = Insert(result, Extract(lhs, 3) << Int(rhs), 3);
5755
5756                         return result;
5757                 }
5758                 else
5759                 {
5760                         return RValue<Int4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
5761                 }
5762         }
5763
5764         RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
5765         {
5766                 if(emulateIntrinsics)
5767                 {
5768                         Int4 result;
5769                         result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
5770                         result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
5771                         result = Insert(result, Extract(lhs, 2) >> Int(rhs), 2);
5772                         result = Insert(result, Extract(lhs, 3) >> Int(rhs), 3);
5773
5774                         return result;
5775                 }
5776                 else
5777                 {
5778                         return RValue<Int4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
5779                 }
5780         }
5781
5782         RValue<Int4> operator<<(RValue<Int4> lhs, RValue<Int4> rhs)
5783         {
5784                 return RValue<Int4>(Nucleus::createShl(lhs.value, rhs.value));
5785         }
5786
5787         RValue<Int4> operator>>(RValue<Int4> lhs, RValue<Int4> rhs)
5788         {
5789                 return RValue<Int4>(Nucleus::createAShr(lhs.value, rhs.value));
5790         }
5791
5792         RValue<Int4> operator+=(Int4 &lhs, RValue<Int4> rhs)
5793         {
5794                 return lhs = lhs + rhs;
5795         }
5796
5797         RValue<Int4> operator-=(Int4 &lhs, RValue<Int4> rhs)
5798         {
5799                 return lhs = lhs - rhs;
5800         }
5801
5802         RValue<Int4> operator*=(Int4 &lhs, RValue<Int4> rhs)
5803         {
5804                 return lhs = lhs * rhs;
5805         }
5806
5807 //      RValue<Int4> operator/=(Int4 &lhs, RValue<Int4> rhs)
5808 //      {
5809 //              return lhs = lhs / rhs;
5810 //      }
5811
5812 //      RValue<Int4> operator%=(Int4 &lhs, RValue<Int4> rhs)
5813 //      {
5814 //              return lhs = lhs % rhs;
5815 //      }
5816
5817         RValue<Int4> operator&=(Int4 &lhs, RValue<Int4> rhs)
5818         {
5819                 return lhs = lhs & rhs;
5820         }
5821
5822         RValue<Int4> operator|=(Int4 &lhs, RValue<Int4> rhs)
5823         {
5824                 return lhs = lhs | rhs;
5825         }
5826
5827         RValue<Int4> operator^=(Int4 &lhs, RValue<Int4> rhs)
5828         {
5829                 return lhs = lhs ^ rhs;
5830         }
5831
5832         RValue<Int4> operator<<=(Int4 &lhs, unsigned char rhs)
5833         {
5834                 return lhs = lhs << rhs;
5835         }
5836
5837         RValue<Int4> operator>>=(Int4 &lhs, unsigned char rhs)
5838         {
5839                 return lhs = lhs >> rhs;
5840         }
5841
5842         RValue<Int4> operator+(RValue<Int4> val)
5843         {
5844                 return val;
5845         }
5846
5847         RValue<Int4> operator-(RValue<Int4> val)
5848         {
5849                 return RValue<Int4>(Nucleus::createNeg(val.value));
5850         }
5851
5852         RValue<Int4> operator~(RValue<Int4> val)
5853         {
5854                 return RValue<Int4>(Nucleus::createNot(val.value));
5855         }
5856
5857         RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
5858         {
5859                 return RValue<Int4>(Nucleus::createICmpEQ(x.value, y.value));
5860         }
5861
5862         RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
5863         {
5864                 return RValue<Int4>(Nucleus::createICmpSLT(x.value, y.value));
5865         }
5866
5867         RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
5868         {
5869                 return RValue<Int4>(Nucleus::createICmpSLE(x.value, y.value));
5870         }
5871
5872         RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
5873         {
5874                 return RValue<Int4>(Nucleus::createICmpNE(x.value, y.value));
5875         }
5876
5877         RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
5878         {
5879                 return RValue<Int4>(Nucleus::createICmpSGE(x.value, y.value));
5880         }
5881
5882         RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
5883         {
5884                 return RValue<Int4>(Nucleus::createICmpSGT(x.value, y.value));
5885         }
5886
5887         RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
5888         {
5889                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
5890                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value, y.value);
5891                 ::basicBlock->appendInst(cmp);
5892
5893                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
5894                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
5895                 ::basicBlock->appendInst(select);
5896
5897                 return RValue<Int4>(V(result));
5898         }
5899
5900         RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
5901         {
5902                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
5903                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value, y.value);
5904                 ::basicBlock->appendInst(cmp);
5905
5906                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
5907                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
5908                 ::basicBlock->appendInst(select);
5909
5910                 return RValue<Int4>(V(result));
5911         }
5912
5913         RValue<Int4> RoundInt(RValue<Float4> cast)
5914         {
5915                 if(emulateIntrinsics)
5916                 {
5917                         // Push the fractional part off the mantissa. Accurate up to +/-2^22.
5918                         return Int4((cast + Float4(0x00C00000)) - Float4(0x00C00000));
5919                 }
5920                 else
5921                 {
5922                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
5923                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
5924                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
5925                         auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
5926                         nearbyint->addArg(cast.value);
5927                         ::basicBlock->appendInst(nearbyint);
5928
5929                         return RValue<Int4>(V(result));
5930                 }
5931         }
5932
5933         RValue<Short8> Pack(RValue<Int4> x, RValue<Int4> y)
5934         {
5935                 if(emulateIntrinsics)
5936                 {
5937                         Short8 result;
5938                         result = Insert(result, Saturate(Extract(x, 0)), 0);
5939                         result = Insert(result, Saturate(Extract(x, 1)), 1);
5940                         result = Insert(result, Saturate(Extract(x, 2)), 2);
5941                         result = Insert(result, Saturate(Extract(x, 3)), 3);
5942                         result = Insert(result, Saturate(Extract(y, 0)), 4);
5943                         result = Insert(result, Saturate(Extract(y, 1)), 5);
5944                         result = Insert(result, Saturate(Extract(y, 2)), 6);
5945                         result = Insert(result, Saturate(Extract(y, 3)), 7);
5946
5947                         return result;
5948                 }
5949                 else
5950                 {
5951                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
5952                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
5953                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
5954                         auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
5955                         pack->addArg(x.value);
5956                         pack->addArg(y.value);
5957                         ::basicBlock->appendInst(pack);
5958
5959                         return RValue<Short8>(V(result));
5960                 }
5961         }
5962
5963         RValue<Int> Extract(RValue<Int4> x, int i)
5964         {
5965                 return RValue<Int>(Nucleus::createExtractElement(x.value, Int::getType(), i));
5966         }
5967
5968         RValue<Int4> Insert(RValue<Int4> x, RValue<Int> element, int i)
5969         {
5970                 return RValue<Int4>(Nucleus::createInsertElement(x.value, element.value, i));
5971         }
5972
5973         RValue<Int> SignMask(RValue<Int4> x)
5974         {
5975                 if(emulateIntrinsics)
5976                 {
5977                         Int4 xx = (x >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
5978                         return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
5979                 }
5980                 else
5981                 {
5982                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
5983                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
5984                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
5985                         auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
5986                         movmsk->addArg(x.value);
5987                         ::basicBlock->appendInst(movmsk);
5988
5989                         return RValue<Int>(V(result));
5990                 }
5991         }
5992
5993         RValue<Int4> Swizzle(RValue<Int4> x, unsigned char select)
5994         {
5995                 return RValue<Int4>(createSwizzle4(x.value, select));
5996         }
5997
5998         Type *Int4::getType()
5999         {
6000                 return T(Ice::IceType_v4i32);
6001         }
6002
6003         UInt4::UInt4(RValue<Float4> cast)
6004         {
6005                 // Smallest positive value representable in UInt, but not in Int
6006                 const unsigned int ustart = 0x80000000u;
6007                 const float ustartf = float(ustart);
6008
6009                 // Check if the value can be represented as an Int
6010                 Int4 uiValue = CmpNLT(cast, Float4(ustartf));
6011                 // If the value is too large, subtract ustart and re-add it after conversion.
6012                 uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) |
6013                 // Otherwise, just convert normally
6014                           (~uiValue & Int4(cast));
6015                 // If the value is negative, store 0, otherwise store the result of the conversion
6016                 storeValue((~(As<Int4>(cast) >> 31) & uiValue).value);
6017         }
6018
6019         UInt4::UInt4(int xyzw)
6020         {
6021                 constant(xyzw, xyzw, xyzw, xyzw);
6022         }
6023
6024         UInt4::UInt4(int x, int yzw)
6025         {
6026                 constant(x, yzw, yzw, yzw);
6027         }
6028
6029         UInt4::UInt4(int x, int y, int zw)
6030         {
6031                 constant(x, y, zw, zw);
6032         }
6033
6034         UInt4::UInt4(int x, int y, int z, int w)
6035         {
6036                 constant(x, y, z, w);
6037         }
6038
6039         void UInt4::constant(int x, int y, int z, int w)
6040         {
6041                 int64_t constantVector[4] = {x, y, z, w};
6042                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
6043         }
6044
6045         UInt4::UInt4(RValue<UInt4> rhs)
6046         {
6047                 storeValue(rhs.value);
6048         }
6049
6050         UInt4::UInt4(const UInt4 &rhs)
6051         {
6052                 Value *value = rhs.loadValue();
6053                 storeValue(value);
6054         }
6055
6056         UInt4::UInt4(const Reference<UInt4> &rhs)
6057         {
6058                 Value *value = rhs.loadValue();
6059                 storeValue(value);
6060         }
6061
6062         UInt4::UInt4(RValue<Int4> rhs)
6063         {
6064                 storeValue(rhs.value);
6065         }
6066
6067         UInt4::UInt4(const Int4 &rhs)
6068         {
6069                 Value *value = rhs.loadValue();
6070                 storeValue(value);
6071         }
6072
6073         UInt4::UInt4(const Reference<Int4> &rhs)
6074         {
6075                 Value *value = rhs.loadValue();
6076                 storeValue(value);
6077         }
6078
6079         UInt4::UInt4(RValue<UInt2> lo, RValue<UInt2> hi)
6080         {
6081                 int shuffle[4] = {0, 1, 4, 5};   // Real type is v4i32
6082                 Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
6083
6084                 storeValue(packed);
6085         }
6086
6087         RValue<UInt4> UInt4::operator=(RValue<UInt4> rhs)
6088         {
6089                 storeValue(rhs.value);
6090
6091                 return rhs;
6092         }
6093
6094         RValue<UInt4> UInt4::operator=(const UInt4 &rhs)
6095         {
6096                 Value *value = rhs.loadValue();
6097                 storeValue(value);
6098
6099                 return RValue<UInt4>(value);
6100         }
6101
6102         RValue<UInt4> UInt4::operator=(const Reference<UInt4> &rhs)
6103         {
6104                 Value *value = rhs.loadValue();
6105                 storeValue(value);
6106
6107                 return RValue<UInt4>(value);
6108         }
6109
6110         RValue<UInt4> operator+(RValue<UInt4> lhs, RValue<UInt4> rhs)
6111         {
6112                 return RValue<UInt4>(Nucleus::createAdd(lhs.value, rhs.value));
6113         }
6114
6115         RValue<UInt4> operator-(RValue<UInt4> lhs, RValue<UInt4> rhs)
6116         {
6117                 return RValue<UInt4>(Nucleus::createSub(lhs.value, rhs.value));
6118         }
6119
6120         RValue<UInt4> operator*(RValue<UInt4> lhs, RValue<UInt4> rhs)
6121         {
6122                 return RValue<UInt4>(Nucleus::createMul(lhs.value, rhs.value));
6123         }
6124
6125         RValue<UInt4> operator/(RValue<UInt4> lhs, RValue<UInt4> rhs)
6126         {
6127                 return RValue<UInt4>(Nucleus::createUDiv(lhs.value, rhs.value));
6128         }
6129
6130         RValue<UInt4> operator%(RValue<UInt4> lhs, RValue<UInt4> rhs)
6131         {
6132                 return RValue<UInt4>(Nucleus::createURem(lhs.value, rhs.value));
6133         }
6134
6135         RValue<UInt4> operator&(RValue<UInt4> lhs, RValue<UInt4> rhs)
6136         {
6137                 return RValue<UInt4>(Nucleus::createAnd(lhs.value, rhs.value));
6138         }
6139
6140         RValue<UInt4> operator|(RValue<UInt4> lhs, RValue<UInt4> rhs)
6141         {
6142                 return RValue<UInt4>(Nucleus::createOr(lhs.value, rhs.value));
6143         }
6144
6145         RValue<UInt4> operator^(RValue<UInt4> lhs, RValue<UInt4> rhs)
6146         {
6147                 return RValue<UInt4>(Nucleus::createXor(lhs.value, rhs.value));
6148         }
6149
6150         RValue<UInt> Extract(RValue<UInt4> x, int i)
6151         {
6152                 return RValue<UInt>(Nucleus::createExtractElement(x.value, UInt::getType(), i));
6153         }
6154
6155         RValue<UInt4> Insert(RValue<UInt4> x, RValue<UInt> element, int i)
6156         {
6157                 return RValue<UInt4>(Nucleus::createInsertElement(x.value, element.value, i));
6158         }
6159
6160         RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
6161         {
6162                 if(emulateIntrinsics)
6163                 {
6164                         UInt4 result;
6165                         result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
6166                         result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
6167                         result = Insert(result, Extract(lhs, 2) << UInt(rhs), 2);
6168                         result = Insert(result, Extract(lhs, 3) << UInt(rhs), 3);
6169
6170                         return result;
6171                 }
6172                 else
6173                 {
6174                         return RValue<UInt4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
6175                 }
6176         }
6177
6178         RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
6179         {
6180                 if(emulateIntrinsics)
6181                 {
6182                         UInt4 result;
6183                         result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
6184                         result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
6185                         result = Insert(result, Extract(lhs, 2) >> UInt(rhs), 2);
6186                         result = Insert(result, Extract(lhs, 3) >> UInt(rhs), 3);
6187
6188                         return result;
6189                 }
6190                 else
6191                 {
6192                         return RValue<UInt4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
6193                 }
6194         }
6195
6196         RValue<UInt4> operator<<(RValue<UInt4> lhs, RValue<UInt4> rhs)
6197         {
6198                 return RValue<UInt4>(Nucleus::createShl(lhs.value, rhs.value));
6199         }
6200
6201         RValue<UInt4> operator>>(RValue<UInt4> lhs, RValue<UInt4> rhs)
6202         {
6203                 return RValue<UInt4>(Nucleus::createLShr(lhs.value, rhs.value));
6204         }
6205
6206         RValue<UInt4> operator+=(UInt4 &lhs, RValue<UInt4> rhs)
6207         {
6208                 return lhs = lhs + rhs;
6209         }
6210
6211         RValue<UInt4> operator-=(UInt4 &lhs, RValue<UInt4> rhs)
6212         {
6213                 return lhs = lhs - rhs;
6214         }
6215
6216         RValue<UInt4> operator*=(UInt4 &lhs, RValue<UInt4> rhs)
6217         {
6218                 return lhs = lhs * rhs;
6219         }
6220
6221 //      RValue<UInt4> operator/=(UInt4 &lhs, RValue<UInt4> rhs)
6222 //      {
6223 //              return lhs = lhs / rhs;
6224 //      }
6225
6226 //      RValue<UInt4> operator%=(UInt4 &lhs, RValue<UInt4> rhs)
6227 //      {
6228 //              return lhs = lhs % rhs;
6229 //      }
6230
6231         RValue<UInt4> operator&=(UInt4 &lhs, RValue<UInt4> rhs)
6232         {
6233                 return lhs = lhs & rhs;
6234         }
6235
6236         RValue<UInt4> operator|=(UInt4 &lhs, RValue<UInt4> rhs)
6237         {
6238                 return lhs = lhs | rhs;
6239         }
6240
6241         RValue<UInt4> operator^=(UInt4 &lhs, RValue<UInt4> rhs)
6242         {
6243                 return lhs = lhs ^ rhs;
6244         }
6245
6246         RValue<UInt4> operator<<=(UInt4 &lhs, unsigned char rhs)
6247         {
6248                 return lhs = lhs << rhs;
6249         }
6250
6251         RValue<UInt4> operator>>=(UInt4 &lhs, unsigned char rhs)
6252         {
6253                 return lhs = lhs >> rhs;
6254         }
6255
6256         RValue<UInt4> operator+(RValue<UInt4> val)
6257         {
6258                 return val;
6259         }
6260
6261         RValue<UInt4> operator-(RValue<UInt4> val)
6262         {
6263                 return RValue<UInt4>(Nucleus::createNeg(val.value));
6264         }
6265
6266         RValue<UInt4> operator~(RValue<UInt4> val)
6267         {
6268                 return RValue<UInt4>(Nucleus::createNot(val.value));
6269         }
6270
6271         RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
6272         {
6273                 return RValue<UInt4>(Nucleus::createICmpEQ(x.value, y.value));
6274         }
6275
6276         RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
6277         {
6278                 return RValue<UInt4>(Nucleus::createICmpULT(x.value, y.value));
6279         }
6280
6281         RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
6282         {
6283                 return RValue<UInt4>(Nucleus::createICmpULE(x.value, y.value));
6284         }
6285
6286         RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
6287         {
6288                 return RValue<UInt4>(Nucleus::createICmpNE(x.value, y.value));
6289         }
6290
6291         RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
6292         {
6293                 return RValue<UInt4>(Nucleus::createICmpUGE(x.value, y.value));
6294         }
6295
6296         RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
6297         {
6298                 return RValue<UInt4>(Nucleus::createICmpUGT(x.value, y.value));
6299         }
6300
6301         RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
6302         {
6303                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
6304                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value, y.value);
6305                 ::basicBlock->appendInst(cmp);
6306
6307                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
6308                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
6309                 ::basicBlock->appendInst(select);
6310
6311                 return RValue<UInt4>(V(result));
6312         }
6313
6314         RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
6315         {
6316                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
6317                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value, y.value);
6318                 ::basicBlock->appendInst(cmp);
6319
6320                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
6321                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
6322                 ::basicBlock->appendInst(select);
6323
6324                 return RValue<UInt4>(V(result));
6325         }
6326
6327         RValue<UShort8> Pack(RValue<UInt4> x, RValue<UInt4> y)
6328         {
6329                 if(CPUID::SSE4_1)
6330                 {
6331                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
6332                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
6333                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
6334                         auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
6335                         pack->addArg(x.value);
6336                         pack->addArg(y.value);
6337                         ::basicBlock->appendInst(pack);
6338
6339                         return RValue<UShort8>(V(result));
6340                 }
6341                 else
6342                 {
6343                         RValue<Int4> sx = As<Int4>(x);
6344                         RValue<Int4> bx = (sx & ~(sx >> 31)) - Int4(0x8000);
6345
6346                         RValue<Int4> sy = As<Int4>(y);
6347                         RValue<Int4> by = (sy & ~(sy >> 31)) - Int4(0x8000);
6348
6349                         return As<UShort8>(Pack(bx, by) + Short8(0x8000u));
6350                 }
6351         }
6352
6353         Type *UInt4::getType()
6354         {
6355                 return T(Ice::IceType_v4i32);
6356         }
6357
6358         Float::Float(RValue<Int> cast)
6359         {
6360                 Value *integer = Nucleus::createSIToFP(cast.value, Float::getType());
6361
6362                 storeValue(integer);
6363         }
6364
6365         Float::Float(RValue<UInt> cast)
6366         {
6367                 RValue<Float> result = Float(Int(cast & UInt(0x7FFFFFFF))) +
6368                                        As<Float>((As<Int>(cast) >> 31) & As<Int>(Float(0x80000000u)));
6369
6370                 storeValue(result.value);
6371         }
6372
6373         Float::Float(float x)
6374         {
6375                 storeValue(Nucleus::createConstantFloat(x));
6376         }
6377
6378         Float::Float(RValue<Float> rhs)
6379         {
6380                 storeValue(rhs.value);
6381         }
6382
6383         Float::Float(const Float &rhs)
6384         {
6385                 Value *value = rhs.loadValue();
6386                 storeValue(value);
6387         }
6388
6389         Float::Float(const Reference<Float> &rhs)
6390         {
6391                 Value *value = rhs.loadValue();
6392                 storeValue(value);
6393         }
6394
6395         RValue<Float> Float::operator=(RValue<Float> rhs)
6396         {
6397                 storeValue(rhs.value);
6398
6399                 return rhs;
6400         }
6401
6402         RValue<Float> Float::operator=(const Float &rhs)
6403         {
6404                 Value *value = rhs.loadValue();
6405                 storeValue(value);
6406
6407                 return RValue<Float>(value);
6408         }
6409
6410         RValue<Float> Float::operator=(const Reference<Float> &rhs)
6411         {
6412                 Value *value = rhs.loadValue();
6413                 storeValue(value);
6414
6415                 return RValue<Float>(value);
6416         }
6417
6418         RValue<Float> operator+(RValue<Float> lhs, RValue<Float> rhs)
6419         {
6420                 return RValue<Float>(Nucleus::createFAdd(lhs.value, rhs.value));
6421         }
6422
6423         RValue<Float> operator-(RValue<Float> lhs, RValue<Float> rhs)
6424         {
6425                 return RValue<Float>(Nucleus::createFSub(lhs.value, rhs.value));
6426         }
6427
6428         RValue<Float> operator*(RValue<Float> lhs, RValue<Float> rhs)
6429         {
6430                 return RValue<Float>(Nucleus::createFMul(lhs.value, rhs.value));
6431         }
6432
6433         RValue<Float> operator/(RValue<Float> lhs, RValue<Float> rhs)
6434         {
6435                 return RValue<Float>(Nucleus::createFDiv(lhs.value, rhs.value));
6436         }
6437
6438         RValue<Float> operator+=(Float &lhs, RValue<Float> rhs)
6439         {
6440                 return lhs = lhs + rhs;
6441         }
6442
6443         RValue<Float> operator-=(Float &lhs, RValue<Float> rhs)
6444         {
6445                 return lhs = lhs - rhs;
6446         }
6447
6448         RValue<Float> operator*=(Float &lhs, RValue<Float> rhs)
6449         {
6450                 return lhs = lhs * rhs;
6451         }
6452
6453         RValue<Float> operator/=(Float &lhs, RValue<Float> rhs)
6454         {
6455                 return lhs = lhs / rhs;
6456         }
6457
6458         RValue<Float> operator+(RValue<Float> val)
6459         {
6460                 return val;
6461         }
6462
6463         RValue<Float> operator-(RValue<Float> val)
6464         {
6465                 return RValue<Float>(Nucleus::createFNeg(val.value));
6466         }
6467
6468         RValue<Bool> operator<(RValue<Float> lhs, RValue<Float> rhs)
6469         {
6470                 return RValue<Bool>(Nucleus::createFCmpOLT(lhs.value, rhs.value));
6471         }
6472
6473         RValue<Bool> operator<=(RValue<Float> lhs, RValue<Float> rhs)
6474         {
6475                 return RValue<Bool>(Nucleus::createFCmpOLE(lhs.value, rhs.value));
6476         }
6477
6478         RValue<Bool> operator>(RValue<Float> lhs, RValue<Float> rhs)
6479         {
6480                 return RValue<Bool>(Nucleus::createFCmpOGT(lhs.value, rhs.value));
6481         }
6482
6483         RValue<Bool> operator>=(RValue<Float> lhs, RValue<Float> rhs)
6484         {
6485                 return RValue<Bool>(Nucleus::createFCmpOGE(lhs.value, rhs.value));
6486         }
6487
6488         RValue<Bool> operator!=(RValue<Float> lhs, RValue<Float> rhs)
6489         {
6490                 return RValue<Bool>(Nucleus::createFCmpONE(lhs.value, rhs.value));
6491         }
6492
6493         RValue<Bool> operator==(RValue<Float> lhs, RValue<Float> rhs)
6494         {
6495                 return RValue<Bool>(Nucleus::createFCmpOEQ(lhs.value, rhs.value));
6496         }
6497
6498         RValue<Float> Abs(RValue<Float> x)
6499         {
6500                 return IfThenElse(x > 0.0f, x, -x);
6501         }
6502
6503         RValue<Float> Max(RValue<Float> x, RValue<Float> y)
6504         {
6505                 return IfThenElse(x > y, x, y);
6506         }
6507
6508         RValue<Float> Min(RValue<Float> x, RValue<Float> y)
6509         {
6510                 return IfThenElse(x < y, x, y);
6511         }
6512
6513         RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
6514         {
6515                 return 1.0f / x;
6516         }
6517
6518         RValue<Float> RcpSqrt_pp(RValue<Float> x)
6519         {
6520                 return Rcp_pp(Sqrt(x));
6521         }
6522
6523         RValue<Float> Sqrt(RValue<Float> x)
6524         {
6525                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_f32);
6526                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
6527                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
6528                 auto sqrt = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
6529                 sqrt->addArg(x.value);
6530                 ::basicBlock->appendInst(sqrt);
6531
6532                 return RValue<Float>(V(result));
6533         }
6534
6535         RValue<Float> Round(RValue<Float> x)
6536         {
6537                 return Float4(Round(Float4(x))).x;
6538         }
6539
6540         RValue<Float> Trunc(RValue<Float> x)
6541         {
6542                 return Float4(Trunc(Float4(x))).x;
6543         }
6544
6545         RValue<Float> Frac(RValue<Float> x)
6546         {
6547                 return Float4(Frac(Float4(x))).x;
6548         }
6549
6550         RValue<Float> Floor(RValue<Float> x)
6551         {
6552                 return Float4(Floor(Float4(x))).x;
6553         }
6554
6555         RValue<Float> Ceil(RValue<Float> x)
6556         {
6557                 return Float4(Ceil(Float4(x))).x;
6558         }
6559
6560         Type *Float::getType()
6561         {
6562                 return T(Ice::IceType_f32);
6563         }
6564
6565         Float2::Float2(RValue<Float4> cast)
6566         {
6567                 storeValue(Nucleus::createBitCast(cast.value, getType()));
6568         }
6569
6570         Type *Float2::getType()
6571         {
6572                 return T(Type_v2f32);
6573         }
6574
6575         Float4::Float4(RValue<Byte4> cast) : FloatXYZW(this)
6576         {
6577                 Value *a = Int4(cast).loadValue();
6578                 Value *xyzw = Nucleus::createSIToFP(a, Float4::getType());
6579
6580                 storeValue(xyzw);
6581         }
6582
6583         Float4::Float4(RValue<SByte4> cast) : FloatXYZW(this)
6584         {
6585                 Value *a = Int4(cast).loadValue();
6586                 Value *xyzw = Nucleus::createSIToFP(a, Float4::getType());
6587
6588                 storeValue(xyzw);
6589         }
6590
6591         Float4::Float4(RValue<Short4> cast) : FloatXYZW(this)
6592         {
6593                 Int4 c(cast);
6594                 storeValue(Nucleus::createSIToFP(RValue<Int4>(c).value, Float4::getType()));
6595         }
6596
6597         Float4::Float4(RValue<UShort4> cast) : FloatXYZW(this)
6598         {
6599                 Int4 c(cast);
6600                 storeValue(Nucleus::createSIToFP(RValue<Int4>(c).value, Float4::getType()));
6601         }
6602
6603         Float4::Float4(RValue<Int4> cast) : FloatXYZW(this)
6604         {
6605                 Value *xyzw = Nucleus::createSIToFP(cast.value, Float4::getType());
6606
6607                 storeValue(xyzw);
6608         }
6609
6610         Float4::Float4(RValue<UInt4> cast) : FloatXYZW(this)
6611         {
6612                 RValue<Float4> result = Float4(Int4(cast & UInt4(0x7FFFFFFF))) +
6613                                         As<Float4>((As<Int4>(cast) >> 31) & As<Int4>(Float4(0x80000000u)));
6614
6615                 storeValue(result.value);
6616         }
6617
6618         Float4::Float4() : FloatXYZW(this)
6619         {
6620         }
6621
6622         Float4::Float4(float xyzw) : FloatXYZW(this)
6623         {
6624                 constant(xyzw, xyzw, xyzw, xyzw);
6625         }
6626
6627         Float4::Float4(float x, float yzw) : FloatXYZW(this)
6628         {
6629                 constant(x, yzw, yzw, yzw);
6630         }
6631
6632         Float4::Float4(float x, float y, float zw) : FloatXYZW(this)
6633         {
6634                 constant(x, y, zw, zw);
6635         }
6636
6637         Float4::Float4(float x, float y, float z, float w) : FloatXYZW(this)
6638         {
6639                 constant(x, y, z, w);
6640         }
6641
6642         void Float4::constant(float x, float y, float z, float w)
6643         {
6644                 double constantVector[4] = {x, y, z, w};
6645                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
6646         }
6647
6648         Float4::Float4(RValue<Float4> rhs) : FloatXYZW(this)
6649         {
6650                 storeValue(rhs.value);
6651         }
6652
6653         Float4::Float4(const Float4 &rhs) : FloatXYZW(this)
6654         {
6655                 Value *value = rhs.loadValue();
6656                 storeValue(value);
6657         }
6658
6659         Float4::Float4(const Reference<Float4> &rhs) : FloatXYZW(this)
6660         {
6661                 Value *value = rhs.loadValue();
6662                 storeValue(value);
6663         }
6664
6665         Float4::Float4(RValue<Float> rhs) : FloatXYZW(this)
6666         {
6667                 Value *vector = Nucleus::createBitCast(rhs.value, Float4::getType());
6668
6669                 int swizzle[4] = {0, 0, 0, 0};
6670                 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
6671
6672                 storeValue(replicate);
6673         }
6674
6675         Float4::Float4(const Float &rhs) : FloatXYZW(this)
6676         {
6677                 *this = RValue<Float>(rhs.loadValue());
6678         }
6679
6680         Float4::Float4(const Reference<Float> &rhs) : FloatXYZW(this)
6681         {
6682                 *this = RValue<Float>(rhs.loadValue());
6683         }
6684
6685         RValue<Float4> Float4::operator=(float x)
6686         {
6687                 return *this = Float4(x, x, x, x);
6688         }
6689
6690         RValue<Float4> Float4::operator=(RValue<Float4> rhs)
6691         {
6692                 storeValue(rhs.value);
6693
6694                 return rhs;
6695         }
6696
6697         RValue<Float4> Float4::operator=(const Float4 &rhs)
6698         {
6699                 Value *value = rhs.loadValue();
6700                 storeValue(value);
6701
6702                 return RValue<Float4>(value);
6703         }
6704
6705         RValue<Float4> Float4::operator=(const Reference<Float4> &rhs)
6706         {
6707                 Value *value = rhs.loadValue();
6708                 storeValue(value);
6709
6710                 return RValue<Float4>(value);
6711         }
6712
6713         RValue<Float4> Float4::operator=(RValue<Float> rhs)
6714         {
6715                 return *this = Float4(rhs);
6716         }
6717
6718         RValue<Float4> Float4::operator=(const Float &rhs)
6719         {
6720                 return *this = Float4(rhs);
6721         }
6722
6723         RValue<Float4> Float4::operator=(const Reference<Float> &rhs)
6724         {
6725                 return *this = Float4(rhs);
6726         }
6727
6728         RValue<Float4> operator+(RValue<Float4> lhs, RValue<Float4> rhs)
6729         {
6730                 return RValue<Float4>(Nucleus::createFAdd(lhs.value, rhs.value));
6731         }
6732
6733         RValue<Float4> operator-(RValue<Float4> lhs, RValue<Float4> rhs)
6734         {
6735                 return RValue<Float4>(Nucleus::createFSub(lhs.value, rhs.value));
6736         }
6737
6738         RValue<Float4> operator*(RValue<Float4> lhs, RValue<Float4> rhs)
6739         {
6740                 return RValue<Float4>(Nucleus::createFMul(lhs.value, rhs.value));
6741         }
6742
6743         RValue<Float4> operator/(RValue<Float4> lhs, RValue<Float4> rhs)
6744         {
6745                 return RValue<Float4>(Nucleus::createFDiv(lhs.value, rhs.value));
6746         }
6747
6748         RValue<Float4> operator%(RValue<Float4> lhs, RValue<Float4> rhs)
6749         {
6750                 return RValue<Float4>(Nucleus::createFRem(lhs.value, rhs.value));
6751         }
6752
6753         RValue<Float4> operator+=(Float4 &lhs, RValue<Float4> rhs)
6754         {
6755                 return lhs = lhs + rhs;
6756         }
6757
6758         RValue<Float4> operator-=(Float4 &lhs, RValue<Float4> rhs)
6759         {
6760                 return lhs = lhs - rhs;
6761         }
6762
6763         RValue<Float4> operator*=(Float4 &lhs, RValue<Float4> rhs)
6764         {
6765                 return lhs = lhs * rhs;
6766         }
6767
6768         RValue<Float4> operator/=(Float4 &lhs, RValue<Float4> rhs)
6769         {
6770                 return lhs = lhs / rhs;
6771         }
6772
6773         RValue<Float4> operator%=(Float4 &lhs, RValue<Float4> rhs)
6774         {
6775                 return lhs = lhs % rhs;
6776         }
6777
6778         RValue<Float4> operator+(RValue<Float4> val)
6779         {
6780                 return val;
6781         }
6782
6783         RValue<Float4> operator-(RValue<Float4> val)
6784         {
6785                 return RValue<Float4>(Nucleus::createFNeg(val.value));
6786         }
6787
6788         RValue<Float4> Abs(RValue<Float4> x)
6789         {
6790                 Value *vector = Nucleus::createBitCast(x.value, Int4::getType());
6791                 int64_t constantVector[4] = {0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF};
6792                 Value *result = Nucleus::createAnd(vector, V(Nucleus::createConstantVector(constantVector, Int4::getType())));
6793
6794                 return As<Float4>(result);
6795         }
6796
6797         RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
6798         {
6799                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
6800                 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Ogt, condition, x.value, y.value);
6801                 ::basicBlock->appendInst(cmp);
6802
6803                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
6804                 auto select = Ice::InstSelect::create(::function, result, condition, x.value, y.value);
6805                 ::basicBlock->appendInst(select);
6806
6807                 return RValue<Float4>(V(result));
6808         }
6809
6810         RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
6811         {
6812                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
6813                 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Olt, condition, x.value, y.value);
6814                 ::basicBlock->appendInst(cmp);
6815
6816                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
6817                 auto select = Ice::InstSelect::create(::function, result, condition, x.value, y.value);
6818                 ::basicBlock->appendInst(select);
6819
6820                 return RValue<Float4>(V(result));
6821         }
6822
6823         RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
6824         {
6825                 return Float4(1.0f) / x;
6826         }
6827
6828         RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
6829         {
6830                 return Rcp_pp(Sqrt(x));
6831         }
6832
6833         RValue<Float4> Sqrt(RValue<Float4> x)
6834         {
6835                 if(emulateIntrinsics)
6836                 {
6837                         Float4 result;
6838                         result.x = Sqrt(Float(Float4(x).x));
6839                         result.y = Sqrt(Float(Float4(x).y));
6840                         result.z = Sqrt(Float(Float4(x).z));
6841                         result.w = Sqrt(Float(Float4(x).w));
6842
6843                         return result;
6844                 }
6845                 else
6846                 {
6847                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
6848                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
6849                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
6850                         auto sqrt = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
6851                         sqrt->addArg(x.value);
6852                         ::basicBlock->appendInst(sqrt);
6853
6854                         return RValue<Float4>(V(result));
6855                 }
6856         }
6857
6858         RValue<Float4> Insert(RValue<Float4> x, RValue<Float> element, int i)
6859         {
6860                 return RValue<Float4>(Nucleus::createInsertElement(x.value, element.value, i));
6861         }
6862
6863         RValue<Float> Extract(RValue<Float4> x, int i)
6864         {
6865                 return RValue<Float>(Nucleus::createExtractElement(x.value, Float::getType(), i));
6866         }
6867
6868         RValue<Float4> Swizzle(RValue<Float4> x, unsigned char select)
6869         {
6870                 return RValue<Float4>(createSwizzle4(x.value, select));
6871         }
6872
6873         RValue<Float4> ShuffleLowHigh(RValue<Float4> x, RValue<Float4> y, unsigned char imm)
6874         {
6875                 int shuffle[4] =
6876                 {
6877                         ((imm >> 0) & 0x03) + 0,
6878                         ((imm >> 2) & 0x03) + 0,
6879                         ((imm >> 4) & 0x03) + 4,
6880                         ((imm >> 6) & 0x03) + 4,
6881                 };
6882
6883                 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
6884         }
6885
6886         RValue<Float4> UnpackLow(RValue<Float4> x, RValue<Float4> y)
6887         {
6888                 int shuffle[4] = {0, 4, 1, 5};
6889                 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
6890         }
6891
6892         RValue<Float4> UnpackHigh(RValue<Float4> x, RValue<Float4> y)
6893         {
6894                 int shuffle[4] = {2, 6, 3, 7};
6895                 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
6896         }
6897
6898         RValue<Float4> Mask(Float4 &lhs, RValue<Float4> rhs, unsigned char select)
6899         {
6900                 Value *vector = lhs.loadValue();
6901                 Value *result = createMask4(vector, rhs.value, select);
6902                 lhs.storeValue(result);
6903
6904                 return RValue<Float4>(result);
6905         }
6906
6907         RValue<Int> SignMask(RValue<Float4> x)
6908         {
6909                 if(emulateIntrinsics)
6910                 {
6911                         Int4 xx = (As<Int4>(x) >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
6912                         return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
6913                 }
6914                 else
6915                 {
6916                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
6917                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
6918                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
6919                         auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
6920                         movmsk->addArg(x.value);
6921                         ::basicBlock->appendInst(movmsk);
6922
6923                         return RValue<Int>(V(result));
6924                 }
6925         }
6926
6927         RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
6928         {
6929                 return RValue<Int4>(Nucleus::createFCmpOEQ(x.value, y.value));
6930         }
6931
6932         RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
6933         {
6934                 return RValue<Int4>(Nucleus::createFCmpOLT(x.value, y.value));
6935         }
6936
6937         RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
6938         {
6939                 return RValue<Int4>(Nucleus::createFCmpOLE(x.value, y.value));
6940         }
6941
6942         RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
6943         {
6944                 return RValue<Int4>(Nucleus::createFCmpONE(x.value, y.value));
6945         }
6946
6947         RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
6948         {
6949                 return RValue<Int4>(Nucleus::createFCmpOGE(x.value, y.value));
6950         }
6951
6952         RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
6953         {
6954                 return RValue<Int4>(Nucleus::createFCmpOGT(x.value, y.value));
6955         }
6956
6957         RValue<Float4> Round(RValue<Float4> x)
6958         {
6959                 if(emulateIntrinsics)
6960                 {
6961                         // Push the fractional part off the mantissa. Accurate up to +/-2^22.
6962                         return (x + Float4(0x00C00000)) - Float4(0x00C00000);
6963                 }
6964                 else if(CPUID::SSE4_1)
6965                 {
6966                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
6967                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
6968                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
6969                         auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
6970                         round->addArg(x.value);
6971                         round->addArg(::context->getConstantInt32(0));
6972                         ::basicBlock->appendInst(round);
6973
6974                         return RValue<Float4>(V(result));
6975                 }
6976                 else
6977                 {
6978                         return Float4(RoundInt(x));
6979                 }
6980         }
6981
6982         RValue<Float4> Trunc(RValue<Float4> x)
6983         {
6984                 if(CPUID::SSE4_1)
6985                 {
6986                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
6987                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
6988                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
6989                         auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
6990                         round->addArg(x.value);
6991                         round->addArg(::context->getConstantInt32(3));
6992                         ::basicBlock->appendInst(round);
6993
6994                         return RValue<Float4>(V(result));
6995                 }
6996                 else
6997                 {
6998                         return Float4(Int4(x));
6999                 }
7000         }
7001
7002         RValue<Float4> Frac(RValue<Float4> x)
7003         {
7004                 Float4 frc;
7005
7006                 if(CPUID::SSE4_1)
7007                 {
7008                         frc = x - Floor(x);
7009                 }
7010                 else
7011                 {
7012                         frc = x - Float4(Int4(x));   // Signed fractional part.
7013
7014                         frc += As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1, 1, 1, 1)));   // Add 1.0 if negative.
7015                 }
7016
7017                 // x - floor(x) can be 1.0 for very small negative x.
7018                 // Clamp against the value just below 1.0.
7019                 return Min(frc, As<Float4>(Int4(0x3F7FFFFF)));
7020         }
7021
7022         RValue<Float4> Floor(RValue<Float4> x)
7023         {
7024                 if(CPUID::SSE4_1)
7025                 {
7026                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
7027                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
7028                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
7029                         auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
7030                         round->addArg(x.value);
7031                         round->addArg(::context->getConstantInt32(1));
7032                         ::basicBlock->appendInst(round);
7033
7034                         return RValue<Float4>(V(result));
7035                 }
7036                 else
7037                 {
7038                         return x - Frac(x);
7039                 }
7040         }
7041
7042         RValue<Float4> Ceil(RValue<Float4> x)
7043         {
7044                 if(CPUID::SSE4_1)
7045                 {
7046                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
7047                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
7048                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
7049                         auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
7050                         round->addArg(x.value);
7051                         round->addArg(::context->getConstantInt32(2));
7052                         ::basicBlock->appendInst(round);
7053
7054                         return RValue<Float4>(V(result));
7055                 }
7056                 else
7057                 {
7058                         return -Floor(-x);
7059                 }
7060         }
7061
7062         Type *Float4::getType()
7063         {
7064                 return T(Ice::IceType_v4f32);
7065         }
7066
7067         RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, int offset)
7068         {
7069                 return lhs + RValue<Int>(Nucleus::createConstantInt(offset));
7070         }
7071
7072         RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<Int> offset)
7073         {
7074                 return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), offset.value, false));
7075         }
7076
7077         RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<UInt> offset)
7078         {
7079                 return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), offset.value, true));
7080         }
7081
7082         RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, int offset)
7083         {
7084                 return lhs = lhs + offset;
7085         }
7086
7087         RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, RValue<Int> offset)
7088         {
7089                 return lhs = lhs + offset;
7090         }
7091
7092         RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, RValue<UInt> offset)
7093         {
7094                 return lhs = lhs + offset;
7095         }
7096
7097         RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, int offset)
7098         {
7099                 return lhs + -offset;
7100         }
7101
7102         RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, RValue<Int> offset)
7103         {
7104                 return lhs + -offset;
7105         }
7106
7107         RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, RValue<UInt> offset)
7108         {
7109                 return lhs + -offset;
7110         }
7111
7112         RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, int offset)
7113         {
7114                 return lhs = lhs - offset;
7115         }
7116
7117         RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, RValue<Int> offset)
7118         {
7119                 return lhs = lhs - offset;
7120         }
7121
7122         RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, RValue<UInt> offset)
7123         {
7124                 return lhs = lhs - offset;
7125         }
7126
7127         void Return()
7128         {
7129                 Nucleus::createRetVoid();
7130                 Nucleus::setInsertBlock(Nucleus::createBasicBlock());
7131                 Nucleus::createUnreachable();
7132         }
7133
7134         void Return(RValue<Int> ret)
7135         {
7136                 Nucleus::createRet(ret.value);
7137                 Nucleus::setInsertBlock(Nucleus::createBasicBlock());
7138                 Nucleus::createUnreachable();
7139         }
7140
7141         void branch(RValue<Bool> cmp, BasicBlock *bodyBB, BasicBlock *endBB)
7142         {
7143                 Nucleus::createCondBr(cmp.value, bodyBB, endBB);
7144                 Nucleus::setInsertBlock(bodyBB);
7145         }
7146
7147         RValue<Long> Ticks()
7148         {
7149                 assert(false && "UNIMPLEMENTED"); return RValue<Long>(V(nullptr));
7150         }
7151 }