src/Reactor/SubzeroReactor.cpp

   1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
   2 //
   3 // Licensed under the Apache License, Version 2.0 (the "License");
   4 // you may not use this file except in compliance with the License.
   5 // You may obtain a copy of the License at
   6 //
   7 //    http://www.apache.org/licenses/LICENSE-2.0
   8 //
   9 // Unless required by applicable law or agreed to in writing, software
  10 // distributed under the License is distributed on an "AS IS" BASIS,
  11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 // See the License for the specific language governing permissions and
  13 // limitations under the License.
  14
  15 #include "Nucleus.hpp"
  16
  17 #include "Reactor.hpp"
  18 #include "Routine.hpp"
  19
  20 #include "Optimizer.hpp"
  21
  22 #include "src/IceTypes.h"
  23 #include "src/IceCfg.h"
  24 #include "src/IceELFStreamer.h"
  25 #include "src/IceGlobalContext.h"
  26 #include "src/IceCfgNode.h"
  27 #include "src/IceELFObjectWriter.h"
  28 #include "src/IceGlobalInits.h"
  29
  30 #include "llvm/Support/FileSystem.h"
  31 #include "llvm/Support/raw_os_ostream.h"
  32
  33 #if defined(_WIN32)
  34 #ifndef WIN32_LEAN_AND_MEAN
  35 #define WIN32_LEAN_AND_MEAN
  36 #endif // !WIN32_LEAN_AND_MEAN
  37 #ifndef NOMINMAX
  38 #define NOMINMAX
  39 #endif // !NOMINMAX
  40 #include <Windows.h>
  41 #else
  42 #include <sys/mman.h>
  43 #if !defined(MAP_ANONYMOUS)
  44 #define MAP_ANONYMOUS MAP_ANON
  45 #endif
  46 #endif
  47
  48 #include <mutex>
  49 #include <limits>
  50 #include <iostream>
  51 #include <cassert>
  52
  53 namespace
  54 {
  55         Ice::GlobalContext *context = nullptr;
  56         Ice::Cfg *function = nullptr;
  57         Ice::CfgNode *basicBlock = nullptr;
  58         Ice::CfgLocalAllocatorScope *allocator = nullptr;
  59         sw::Routine *routine = nullptr;
  60
  61         std::mutex codegenMutex;
  62
  63         Ice::ELFFileStreamer *elfFile = nullptr;
  64         Ice::Fdstream *out = nullptr;
  65 }
  66
  67 namespace
  68 {
  69         #if !defined(__i386__) && defined(_M_IX86)
  70                 #define __i386__ 1
  71         #endif
  72
  73         #if !defined(__x86_64__) && (defined(_M_AMD64) || defined (_M_X64))
  74                 #define __x86_64__ 1
  75         #endif
  76
  77         class CPUID
  78         {
  79         public:
  80                 const static bool SSE4_1;
  81
  82         private:
  83                 static void cpuid(int registers[4], int info)
  84                 {
  85                         #if defined(__i386__) || defined(__x86_64__)
  86                                 #if defined(_WIN32)
  87                                         __cpuid(registers, info);
  88                                 #else
  89                                         __asm volatile("cpuid": "=a" (registers[0]), "=b" (registers[1]), "=c" (registers[2]), "=d" (registers[3]): "a" (info));
  90                                 #endif
  91                         #else
  92                                 registers[0] = 0;
  93                                 registers[1] = 0;
  94                                 registers[2] = 0;
  95                                 registers[3] = 0;
  96                         #endif
  97                 }
  98
  99                 static bool detectSSE4_1()
 100                 {
 101                         #if defined(__i386__) || defined(__x86_64__)
 102                                 int registers[4];
 103                                 cpuid(registers, 1);
 104                                 return (registers[2] & 0x00080000) != 0;
 105                         #else
 106                                 return false;
 107                         #endif
 108                 }
 109         };
 110
 111         const bool CPUID::SSE4_1 = CPUID::detectSSE4_1();
 112 }
 113
 114 namespace sw
 115 {
 116         enum EmulatedType
 117         {
 118                 EmulatedShift = 16,
 119                 EmulatedV2 = 2 << EmulatedShift,
 120                 EmulatedV4 = 4 << EmulatedShift,
 121                 EmulatedV8 = 8 << EmulatedShift,
 122                 EmulatedBits = EmulatedV2 | EmulatedV4 | EmulatedV8,
 123
 124                 Type_v2i32 = Ice::IceType_v4i32 | EmulatedV2,
 125                 Type_v4i16 = Ice::IceType_v8i16 | EmulatedV4,
 126                 Type_v2i16 = Ice::IceType_v8i16 | EmulatedV2,
 127                 Type_v8i8 =  Ice::IceType_v16i8 | EmulatedV8,
 128                 Type_v4i8 =  Ice::IceType_v16i8 | EmulatedV4,
 129                 Type_v2f32 = Ice::IceType_v4f32 | EmulatedV2,
 130         };
 131
 132         class Value : public Ice::Operand {};
 133         class SwitchCases : public Ice::InstSwitch {};
 134         class BasicBlock : public Ice::CfgNode {};
 135
 136         Ice::Type T(Type *t)
 137         {
 138                 static_assert(static_cast<unsigned int>(Ice::IceType_NUM) < static_cast<unsigned int>(EmulatedBits), "Ice::Type overlaps with our emulated types!");
 139                 return (Ice::Type)(reinterpret_cast<std::intptr_t>(t) & ~EmulatedBits);
 140         }
 141
 142         Type *T(Ice::Type t)
 143         {
 144                 return reinterpret_cast<Type*>(t);
 145         }
 146
 147         Type *T(EmulatedType t)
 148         {
 149                 return reinterpret_cast<Type*>(t);
 150         }
 151
 152         Value *V(Ice::Operand *v)
 153         {
 154                 return reinterpret_cast<Value*>(v);
 155         }
 156
 157         BasicBlock *B(Ice::CfgNode *b)
 158         {
 159                 return reinterpret_cast<BasicBlock*>(b);
 160         }
 161
 162         static size_t typeSize(Type *type)
 163         {
 164                 if(reinterpret_cast<std::intptr_t>(type) & EmulatedBits)
 165                 {
 166                         switch(reinterpret_cast<std::intptr_t>(type))
 167                         {
 168                         case Type_v2i32: return 8;
 169                         case Type_v4i16: return 8;
 170                         case Type_v2i16: return 4;
 171                         case Type_v8i8:  return 8;
 172                         case Type_v4i8:  return 4;
 173                         case Type_v2f32: return 8;
 174                         default: assert(false);
 175                         }
 176                 }
 177
 178                 return Ice::typeWidthInBytes(T(type));
 179         }
 180
 181         Optimization optimization[10] = {InstructionCombining, Disabled};
 182
 183         using ElfHeader = std::conditional<sizeof(void*) == 8, Elf64_Ehdr, Elf32_Ehdr>::type;
 184         using SectionHeader = std::conditional<sizeof(void*) == 8, Elf64_Shdr, Elf32_Shdr>::type;
 185
 186         inline const SectionHeader *sectionHeader(const ElfHeader *elfHeader)
 187         {
 188                 return reinterpret_cast<const SectionHeader*>((intptr_t)elfHeader + elfHeader->e_shoff);
 189         }
 190
 191         inline const SectionHeader *elfSection(const ElfHeader *elfHeader, int index)
 192         {
 193                 return &sectionHeader(elfHeader)[index];
 194         }
 195
 196         static void *relocateSymbol(const ElfHeader *elfHeader, const Elf32_Rel &relocation, const SectionHeader &relocationTable)
 197         {
 198                 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
 199
 200                 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
 201                 int32_t *patchSite = (int*)(address + relocation.r_offset);
 202                 uint32_t index = relocation.getSymbol();
 203                 int table = relocationTable.sh_link;
 204                 void *symbolValue = nullptr;
 205
 206                 if(index != SHN_UNDEF)
 207                 {
 208                         if(table == SHN_UNDEF) return nullptr;
 209                         const SectionHeader *symbolTable = elfSection(elfHeader, table);
 210
 211                         uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
 212                         if(index >= symtab_entries)
 213                         {
 214                                 assert(index < symtab_entries && "Symbol Index out of range");
 215                                 return nullptr;
 216                         }
 217
 218                         intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
 219                         Elf32_Sym &symbol = ((Elf32_Sym*)symbolAddress)[index];
 220                         uint16_t section = symbol.st_shndx;
 221
 222                         if(section != SHN_UNDEF && section < SHN_LORESERVE)
 223                         {
 224                                 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
 225                                 symbolValue = reinterpret_cast<void*>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
 226                         }
 227                         else
 228                         {
 229                                 return nullptr;
 230                         }
 231                 }
 232
 233                 #if defined(__i386__)
 234                         switch(relocation.getType())
 235                         {
 236                         case R_386_NONE:
 237                                 // No relocation
 238                                 break;
 239                         case R_386_32:
 240                                 *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite);
 241                                 break;
 242                 //      case R_386_PC32:
 243                 //              *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite - (intptr_t)patchSite);
 244                 //              break;
 245                         default:
 246                                 assert(false && "Unsupported relocation type");
 247                                 return nullptr;
 248                         }
 249                 #elif defined(__arm__)
 250                         switch(relocation.getType())
 251                         {
 252                         case R_ARM_NONE:
 253                                 // No relocation
 254                                 break;
 255                         case R_ARM_MOVW_ABS_NC:
 256                                 {
 257                                         uint32_t thumb = 0;   // Calls to Thumb code not supported.
 258                                         uint32_t lo = (uint32_t)symbolValue | thumb;
 259                                         *patchSite = (*patchSite & 0xFFF0F000) | ((lo & 0xF000) << 4) | (lo & 0x0FFF);
 260                                 }
 261                                 break;
 262                         case R_ARM_MOVT_ABS:
 263                                 {
 264                                         uint32_t hi = (uint32_t)(symbolValue) >> 16;
 265                                         *patchSite = (*patchSite & 0xFFF0F000) | ((hi & 0xF000) << 4) | (hi & 0x0FFF);
 266                                 }
 267                                 break;
 268                         default:
 269                                 assert(false && "Unsupported relocation type");
 270                                 return nullptr;
 271                         }
 272                 #endif
 273
 274                 return symbolValue;
 275         }
 276
 277         static void *relocateSymbol(const ElfHeader *elfHeader, const Elf64_Rela &relocation, const SectionHeader &relocationTable)
 278         {
 279                 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
 280
 281                 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
 282                 int32_t *patchSite = (int*)(address + relocation.r_offset);
 283                 uint32_t index = relocation.getSymbol();
 284                 int table = relocationTable.sh_link;
 285                 void *symbolValue = nullptr;
 286
 287                 if(index != SHN_UNDEF)
 288                 {
 289                         if(table == SHN_UNDEF) return nullptr;
 290                         const SectionHeader *symbolTable = elfSection(elfHeader, table);
 291
 292                         uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
 293                         if(index >= symtab_entries)
 294                         {
 295                                 assert(index < symtab_entries && "Symbol Index out of range");
 296                                 return nullptr;
 297                         }
 298
 299                         intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
 300                         Elf64_Sym &symbol = ((Elf64_Sym*)symbolAddress)[index];
 301                         uint16_t section = symbol.st_shndx;
 302
 303                         if(section != SHN_UNDEF && section < SHN_LORESERVE)
 304                         {
 305                                 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
 306                                 symbolValue = reinterpret_cast<void*>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
 307                         }
 308                         else
 309                         {
 310                                 return nullptr;
 311                         }
 312                 }
 313
 314                 #if defined(__x86_64__)
 315                         switch(relocation.getType())
 316                         {
 317                         case R_X86_64_NONE:
 318                                 // No relocation
 319                                 break;
 320                         case R_X86_64_64:
 321                                 *(int64_t*)patchSite = (int64_t)((intptr_t)symbolValue + *(int64_t*)patchSite) + relocation.r_addend;
 322                                 break;
 323                         case R_X86_64_PC32:
 324                                 *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite - (intptr_t)patchSite) + relocation.r_addend;
 325                                 break;
 326                         case R_X86_64_32S:
 327                                 *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite) + relocation.r_addend;
 328                                 break;
 329                         default:
 330                                 assert(false && "Unsupported relocation type");
 331                                 return nullptr;
 332                         }
 333                 #endif
 334
 335                 return symbolValue;
 336         }
 337
 338         void *loadImage(uint8_t *const elfImage, size_t &codeSize)
 339         {
 340                 ElfHeader *elfHeader = (ElfHeader*)elfImage;
 341
 342                 if(!elfHeader->checkMagic())
 343                 {
 344                         return nullptr;
 345                 }
 346
 347                 // Expect ELF bitness to match platform
 348                 assert(sizeof(void*) == 8 ? elfHeader->getFileClass() == ELFCLASS64 : elfHeader->getFileClass() == ELFCLASS32);
 349                 #if defined(__i386__)
 350                         assert(sizeof(void*) == 4 && elfHeader->e_machine == EM_386);
 351                 #elif defined(__x86_64__)
 352                         assert(sizeof(void*) == 8 && elfHeader->e_machine == EM_X86_64);
 353                 #elif defined(__arm__)
 354                         assert(sizeof(void*) == 4 && elfHeader->e_machine == EM_ARM);
 355                 #else
 356                         #error "Unsupported platform"
 357                 #endif
 358
 359                 SectionHeader *sectionHeader = (SectionHeader*)(elfImage + elfHeader->e_shoff);
 360                 void *entry = nullptr;
 361
 362                 for(int i = 0; i < elfHeader->e_shnum; i++)
 363                 {
 364                         if(sectionHeader[i].sh_type == SHT_PROGBITS)
 365                         {
 366                                 if(sectionHeader[i].sh_flags & SHF_EXECINSTR)
 367                                 {
 368                                         entry = elfImage + sectionHeader[i].sh_offset;
 369                                         codeSize = sectionHeader[i].sh_size;
 370                                 }
 371                         }
 372                         else if(sectionHeader[i].sh_type == SHT_REL)
 373                         {
 374                                 assert(sizeof(void*) == 4 && "UNIMPLEMENTED");   // Only expected/implemented for 32-bit code
 375
 376                                 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
 377                                 {
 378                                         const Elf32_Rel &relocation = ((const Elf32_Rel*)(elfImage + sectionHeader[i].sh_offset))[index];
 379                                         relocateSymbol(elfHeader, relocation, sectionHeader[i]);
 380                                 }
 381                         }
 382                         else if(sectionHeader[i].sh_type == SHT_RELA)
 383                         {
 384                                 assert(sizeof(void*) == 8 && "UNIMPLEMENTED");   // Only expected/implemented for 64-bit code
 385
 386                                 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
 387                                 {
 388                                         const Elf64_Rela &relocation = ((const Elf64_Rela*)(elfImage + sectionHeader[i].sh_offset))[index];
 389                                         relocateSymbol(elfHeader, relocation, sectionHeader[i]);
 390                                 }
 391                         }
 392                 }
 393
 394                 return entry;
 395         }
 396
 397         template<typename T>
 398         struct ExecutableAllocator
 399         {
 400                 ExecutableAllocator() {};
 401                 template<class U> ExecutableAllocator(const ExecutableAllocator<U> &other) {};
 402
 403                 using value_type = T;
 404                 using size_type = std::size_t;
 405
 406                 T *allocate(size_type n)
 407                 {
 408                         #if defined(_WIN32)
 409                                 return (T*)VirtualAlloc(NULL, sizeof(T) * n, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
 410                         #else
 411                                 return (T*)mmap(nullptr, sizeof(T) * n, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
 412                         #endif
 413                 }
 414
 415                 void deallocate(T *p, size_type n)
 416                 {
 417                         #if defined(_WIN32)
 418                                 VirtualFree(p, 0, MEM_RELEASE);
 419                         #else
 420                                 munmap(p, sizeof(T) * n);
 421                         #endif
 422                 }
 423         };
 424
 425         class ELFMemoryStreamer : public Ice::ELFStreamer, public Routine
 426         {
 427                 ELFMemoryStreamer(const ELFMemoryStreamer &) = delete;
 428                 ELFMemoryStreamer &operator=(const ELFMemoryStreamer &) = delete;
 429
 430         public:
 431                 ELFMemoryStreamer() : Routine(), entry(nullptr)
 432                 {
 433                         position = 0;
 434                         buffer.reserve(0x1000);
 435                 }
 436
 437                 virtual ~ELFMemoryStreamer()
 438                 {
 439                         #if defined(_WIN32)
 440                                 if(buffer.size() != 0)
 441                                 {
 442                                         DWORD exeProtection;
 443                                         VirtualProtect(&buffer[0], buffer.size(), oldProtection, &exeProtection);
 444                                 }
 445                         #endif
 446                 }
 447
 448                 void write8(uint8_t Value) override
 449                 {
 450                         if(position == (uint64_t)buffer.size())
 451                         {
 452                                 buffer.push_back(Value);
 453                                 position++;
 454                         }
 455                         else if(position < (uint64_t)buffer.size())
 456                         {
 457                                 buffer[position] = Value;
 458                                 position++;
 459                         }
 460                         else assert(false && "UNIMPLEMENTED");
 461                 }
 462
 463                 void writeBytes(llvm::StringRef Bytes) override
 464                 {
 465                         std::size_t oldSize = buffer.size();
 466                         buffer.resize(oldSize + Bytes.size());
 467                         memcpy(&buffer[oldSize], Bytes.begin(), Bytes.size());
 468                         position += Bytes.size();
 469                 }
 470
 471                 uint64_t tell() const override { return position; }
 472
 473                 void seek(uint64_t Off) override { position = Off; }
 474
 475                 const void *getEntry() override
 476                 {
 477                         if(!entry)
 478                         {
 479                                 #if defined(_WIN32)
 480                                         VirtualProtect(&buffer[0], buffer.size(), PAGE_EXECUTE_READWRITE, &oldProtection);
 481                                 #else
 482                                         mprotect(&buffer[0], buffer.size(), PROT_READ | PROT_WRITE | PROT_EXEC);
 483                                 #endif
 484
 485                                 position = std::numeric_limits<std::size_t>::max();   // Can't stream more data after this
 486
 487                                 size_t codeSize = 0;
 488                                 entry = loadImage(&buffer[0], codeSize);
 489
 490                                 #if defined(_WIN32)
 491                                         FlushInstructionCache(GetCurrentProcess(), NULL, 0);
 492                                 #else
 493                                         __builtin___clear_cache((char*)entry, (char*)entry + codeSize);
 494                                 #endif
 495                         }
 496
 497                         return entry;
 498                 }
 499
 500         private:
 501                 void *entry;
 502                 std::vector<uint8_t, ExecutableAllocator<uint8_t>> buffer;
 503                 std::size_t position;
 504
 505                 #if defined(_WIN32)
 506                 DWORD oldProtection;
 507                 #endif
 508         };
 509
 510         Nucleus::Nucleus()
 511         {
 512                 ::codegenMutex.lock();   // Reactor is currently not thread safe
 513
 514                 Ice::ClFlags &Flags = Ice::ClFlags::Flags;
 515                 Ice::ClFlags::getParsedClFlags(Flags);
 516
 517                 #if defined(__arm__)
 518                         Flags.setTargetArch(Ice::Target_ARM32);
 519                         Flags.setTargetInstructionSet(Ice::ARM32InstructionSet_HWDivArm);
 520                 #else   // x86
 521                         Flags.setTargetArch(sizeof(void*) == 8 ? Ice::Target_X8664 : Ice::Target_X8632);
 522                         Flags.setTargetInstructionSet(CPUID::SSE4_1 ? Ice::X86InstructionSet_SSE4_1 : Ice::X86InstructionSet_SSE2);
 523                 #endif
 524                 Flags.setOutFileType(Ice::FT_Elf);
 525                 Flags.setOptLevel(Ice::Opt_2);
 526                 Flags.setApplicationBinaryInterface(Ice::ABI_Platform);
 527                 Flags.setVerbose(false ? Ice::IceV_Most : Ice::IceV_None);
 528                 Flags.setDisableHybridAssembly(true);
 529
 530                 static llvm::raw_os_ostream cout(std::cout);
 531                 static llvm::raw_os_ostream cerr(std::cerr);
 532
 533                 if(false)   // Write out to a file
 534                 {
 535                         std::error_code errorCode;
 536                         ::out = new Ice::Fdstream("out.o", errorCode, llvm::sys::fs::F_None);
 537                         ::elfFile = new Ice::ELFFileStreamer(*out);
 538                         ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfFile);
 539                 }
 540                 else
 541                 {
 542                         ELFMemoryStreamer *elfMemory = new ELFMemoryStreamer();
 543                         ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfMemory);
 544                         ::routine = elfMemory;
 545                 }
 546         }
 547
 548         Nucleus::~Nucleus()
 549         {
 550                 delete ::allocator;
 551                 delete ::function;
 552                 delete ::context;
 553
 554                 delete ::elfFile;
 555                 delete ::out;
 556
 557                 ::codegenMutex.unlock();
 558         }
 559
 560         Routine *Nucleus::acquireRoutine(const wchar_t *name, bool runOptimizations)
 561         {
 562                 if(basicBlock->getInsts().empty() || basicBlock->getInsts().back().getKind() != Ice::Inst::Ret)
 563                 {
 564                         createRetVoid();
 565                 }
 566
 567                 std::wstring wideName(name);
 568                 std::string asciiName(wideName.begin(), wideName.end());
 569                 ::function->setFunctionName(Ice::GlobalString::createWithString(::context, asciiName));
 570
 571                 optimize();
 572
 573                 ::function->translate();
 574                 assert(!::function->hasError());
 575
 576                 auto *globals = ::function->getGlobalInits().release();
 577
 578                 if(globals && !globals->empty())
 579                 {
 580                         ::context->getGlobals()->merge(globals);
 581                 }
 582
 583                 ::context->emitFileHeader();
 584                 ::function->emitIAS();
 585                 auto assembler = ::function->releaseAssembler();
 586                 auto objectWriter = ::context->getObjectWriter();
 587                 assembler->alignFunction();
 588                 objectWriter->writeFunctionCode(::function->getFunctionName(), false, assembler.get());
 589                 ::context->lowerGlobals("last");
 590                 ::context->lowerConstants();
 591                 ::context->lowerJumpTables();
 592                 objectWriter->setUndefinedSyms(::context->getConstantExternSyms());
 593                 objectWriter->writeNonUserSections();
 594
 595                 return ::routine;
 596         }
 597
 598         void Nucleus::optimize()
 599         {
 600                 sw::optimize(::function);
 601         }
 602
 603         Value *Nucleus::allocateStackVariable(Type *t, int arraySize)
 604         {
 605                 Ice::Type type = T(t);
 606                 int typeSize = Ice::typeWidthInBytes(type);
 607                 int totalSize = typeSize * (arraySize ? arraySize : 1);
 608
 609                 auto bytes = Ice::ConstantInteger32::create(::context, type, totalSize);
 610                 auto address = ::function->makeVariable(T(getPointerType(t)));
 611                 auto alloca = Ice::InstAlloca::create(::function, address, bytes, typeSize);
 612                 ::function->getEntryNode()->getInsts().push_front(alloca);
 613
 614                 return V(address);
 615         }
 616
 617         BasicBlock *Nucleus::createBasicBlock()
 618         {
 619                 return B(::function->makeNode());
 620         }
 621
 622         BasicBlock *Nucleus::getInsertBlock()
 623         {
 624                 return B(::basicBlock);
 625         }
 626
 627         void Nucleus::setInsertBlock(BasicBlock *basicBlock)
 628         {
 629         //      assert(::basicBlock->getInsts().back().getTerminatorEdges().size() >= 0 && "Previous basic block must have a terminator");
 630                 ::basicBlock = basicBlock;
 631         }
 632
 633         void Nucleus::createFunction(Type *ReturnType, std::vector<Type*> &Params)
 634         {
 635                 uint32_t sequenceNumber = 0;
 636                 ::function = Ice::Cfg::create(::context, sequenceNumber).release();
 637                 ::allocator = new Ice::CfgLocalAllocatorScope(::function);
 638
 639                 for(Type *type : Params)
 640                 {
 641                         Ice::Variable *arg = ::function->makeVariable(T(type));
 642                         ::function->addArg(arg);
 643                 }
 644
 645                 Ice::CfgNode *node = ::function->makeNode();
 646                 ::function->setEntryNode(node);
 647                 ::basicBlock = node;
 648         }
 649
 650         Value *Nucleus::getArgument(unsigned int index)
 651         {
 652                 return V(::function->getArgs()[index]);
 653         }
 654
 655         void Nucleus::createRetVoid()
 656         {
 657                 Ice::InstRet *ret = Ice::InstRet::create(::function);
 658                 ::basicBlock->appendInst(ret);
 659         }
 660
 661         void Nucleus::createRet(Value *v)
 662         {
 663                 Ice::InstRet *ret = Ice::InstRet::create(::function, v);
 664                 ::basicBlock->appendInst(ret);
 665         }
 666
 667         void Nucleus::createBr(BasicBlock *dest)
 668         {
 669                 auto br = Ice::InstBr::create(::function, dest);
 670                 ::basicBlock->appendInst(br);
 671         }
 672
 673         void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
 674         {
 675                 auto br = Ice::InstBr::create(::function, cond, ifTrue, ifFalse);
 676                 ::basicBlock->appendInst(br);
 677         }
 678
 679         static bool isCommutative(Ice::InstArithmetic::OpKind op)
 680         {
 681                 switch(op)
 682                 {
 683                 case Ice::InstArithmetic::Add:
 684                 case Ice::InstArithmetic::Fadd:
 685                 case Ice::InstArithmetic::Mul:
 686                 case Ice::InstArithmetic::Fmul:
 687                 case Ice::InstArithmetic::And:
 688                 case Ice::InstArithmetic::Or:
 689                 case Ice::InstArithmetic::Xor:
 690                         return true;
 691                 default:
 692                         return false;
 693                 }
 694         }
 695
 696         static Value *createArithmetic(Ice::InstArithmetic::OpKind op, Value *lhs, Value *rhs)
 697         {
 698                 assert(lhs->getType() == rhs->getType() || (llvm::isa<Ice::Constant>(rhs) && (op == Ice::InstArithmetic::Shl || Ice::InstArithmetic::Lshr || Ice::InstArithmetic::Ashr)));
 699
 700                 bool swapOperands = llvm::isa<Ice::Constant>(lhs) && isCommutative(op);
 701
 702                 Ice::Variable *result = ::function->makeVariable(lhs->getType());
 703                 Ice::InstArithmetic *arithmetic = Ice::InstArithmetic::create(::function, op, result, swapOperands ? rhs : lhs, swapOperands ? lhs : rhs);
 704                 ::basicBlock->appendInst(arithmetic);
 705
 706                 return V(result);
 707         }
 708
 709         Value *Nucleus::createAdd(Value *lhs, Value *rhs)
 710         {
 711                 return createArithmetic(Ice::InstArithmetic::Add, lhs, rhs);
 712         }
 713
 714         Value *Nucleus::createSub(Value *lhs, Value *rhs)
 715         {
 716                 return createArithmetic(Ice::InstArithmetic::Sub, lhs, rhs);
 717         }
 718
 719         Value *Nucleus::createMul(Value *lhs, Value *rhs)
 720         {
 721                 return createArithmetic(Ice::InstArithmetic::Mul, lhs, rhs);
 722         }
 723
 724         Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
 725         {
 726                 return createArithmetic(Ice::InstArithmetic::Udiv, lhs, rhs);
 727         }
 728
 729         Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
 730         {
 731                 return createArithmetic(Ice::InstArithmetic::Sdiv, lhs, rhs);
 732         }
 733
 734         Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
 735         {
 736                 return createArithmetic(Ice::InstArithmetic::Fadd, lhs, rhs);
 737         }
 738
 739         Value *Nucleus::createFSub(Value *lhs, Value *rhs)
 740         {
 741                 return createArithmetic(Ice::InstArithmetic::Fsub, lhs, rhs);
 742         }
 743
 744         Value *Nucleus::createFMul(Value *lhs, Value *rhs)
 745         {
 746                 return createArithmetic(Ice::InstArithmetic::Fmul, lhs, rhs);
 747         }
 748
 749         Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
 750         {
 751                 return createArithmetic(Ice::InstArithmetic::Fdiv, lhs, rhs);
 752         }
 753
 754         Value *Nucleus::createURem(Value *lhs, Value *rhs)
 755         {
 756                 return createArithmetic(Ice::InstArithmetic::Urem, lhs, rhs);
 757         }
 758
 759         Value *Nucleus::createSRem(Value *lhs, Value *rhs)
 760         {
 761                 return createArithmetic(Ice::InstArithmetic::Srem, lhs, rhs);
 762         }
 763
 764         Value *Nucleus::createFRem(Value *lhs, Value *rhs)
 765         {
 766                 return createArithmetic(Ice::InstArithmetic::Frem, lhs, rhs);
 767         }
 768
 769         Value *Nucleus::createShl(Value *lhs, Value *rhs)
 770         {
 771                 return createArithmetic(Ice::InstArithmetic::Shl, lhs, rhs);
 772         }
 773
 774         Value *Nucleus::createLShr(Value *lhs, Value *rhs)
 775         {
 776                 return createArithmetic(Ice::InstArithmetic::Lshr, lhs, rhs);
 777         }
 778
 779         Value *Nucleus::createAShr(Value *lhs, Value *rhs)
 780         {
 781                 return createArithmetic(Ice::InstArithmetic::Ashr, lhs, rhs);
 782         }
 783
 784         Value *Nucleus::createAnd(Value *lhs, Value *rhs)
 785         {
 786                 return createArithmetic(Ice::InstArithmetic::And, lhs, rhs);
 787         }
 788
 789         Value *Nucleus::createOr(Value *lhs, Value *rhs)
 790         {
 791                 return createArithmetic(Ice::InstArithmetic::Or, lhs, rhs);
 792         }
 793
 794         Value *Nucleus::createXor(Value *lhs, Value *rhs)
 795         {
 796                 return createArithmetic(Ice::InstArithmetic::Xor, lhs, rhs);
 797         }
 798
 799         Value *Nucleus::createNeg(Value *v)
 800         {
 801                 return createSub(createNullValue(T(v->getType())), v);
 802         }
 803
 804         Value *Nucleus::createFNeg(Value *v)
 805         {
 806                 double c[4] = {-0.0, -0.0, -0.0, -0.0};
 807                 Value *negativeZero = Ice::isVectorType(v->getType()) ?
 808                                       createConstantVector(c, T(v->getType())) :
 809                                       V(::context->getConstantFloat(-0.0f));
 810
 811                 return createFSub(negativeZero, v);
 812         }
 813
 814         Value *Nucleus::createNot(Value *v)
 815         {
 816                 if(Ice::isScalarIntegerType(v->getType()))
 817                 {
 818                         return createXor(v, V(::context->getConstantInt(v->getType(), -1)));
 819                 }
 820                 else   // Vector
 821                 {
 822                         int64_t c[4] = {-1, -1, -1, -1};
 823                         return createXor(v, createConstantVector(c, T(v->getType())));
 824                 }
 825         }
 826
 827         Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int align)
 828         {
 829                 int valueType = (int)reinterpret_cast<intptr_t>(type);
 830                 Ice::Variable *result = ::function->makeVariable(T(type));
 831
 832                 if(valueType & EmulatedBits)
 833                 {
 834                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::LoadSubVector, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
 835                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
 836                         auto load = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
 837                         load->addArg(ptr);
 838                         load->addArg(::context->getConstantInt32(typeSize(type)));
 839                         ::basicBlock->appendInst(load);
 840                 }
 841                 else
 842                 {
 843                         auto load = Ice::InstLoad::create(::function, result, ptr, align);
 844                         ::basicBlock->appendInst(load);
 845                 }
 846
 847                 return V(result);
 848         }
 849
 850         Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int align)
 851         {
 852                 int valueType = (int)reinterpret_cast<intptr_t>(type);
 853
 854                 if(valueType & EmulatedBits)
 855                 {
 856                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::StoreSubVector, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T};
 857                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
 858                         auto store = Ice::InstIntrinsicCall::create(::function, 3, nullptr, target, intrinsic);
 859                         store->addArg(value);
 860                         store->addArg(ptr);
 861                         store->addArg(::context->getConstantInt32(typeSize(type)));
 862                         ::basicBlock->appendInst(store);
 863                 }
 864                 else
 865                 {
 866                         assert(T(value->getType()) == type);
 867
 868                         auto store = Ice::InstStore::create(::function, value, ptr, align);
 869                         ::basicBlock->appendInst(store);
 870                 }
 871
 872                 return value;
 873         }
 874
 875         Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex)
 876         {
 877                 assert(index->getType() == Ice::IceType_i32);
 878
 879                 if(auto *constant = llvm::dyn_cast<Ice::ConstantInteger32>(index))
 880                 {
 881                         int32_t offset = constant->getValue() * (int)typeSize(type);
 882
 883                         if(offset == 0)
 884                         {
 885                                 return ptr;
 886                         }
 887
 888                         return createAdd(ptr, createConstantInt(offset));
 889                 }
 890
 891                 if(!Ice::isByteSizedType(T(type)))
 892                 {
 893                         index = createMul(index, createConstantInt((int)typeSize(type)));
 894                 }
 895
 896                 if(sizeof(void*) == 8)
 897                 {
 898                         if(unsignedIndex)
 899                         {
 900                                 index = createZExt(index, T(Ice::IceType_i64));
 901                         }
 902                         else
 903                         {
 904                                 index = createSExt(index, T(Ice::IceType_i64));
 905                         }
 906                 }
 907
 908                 return createAdd(ptr, index);
 909         }
 910
 911         Value *Nucleus::createAtomicAdd(Value *ptr, Value *value)
 912         {
 913                 assert(false && "UNIMPLEMENTED"); return nullptr;
 914         }
 915
 916         static Value *createCast(Ice::InstCast::OpKind op, Value *v, Type *destType)
 917         {
 918                 if(v->getType() == T(destType))
 919                 {
 920                         return v;
 921                 }
 922
 923                 Ice::Variable *result = ::function->makeVariable(T(destType));
 924                 Ice::InstCast *cast = Ice::InstCast::create(::function, op, result, v);
 925                 ::basicBlock->appendInst(cast);
 926
 927                 return V(result);
 928         }
 929
 930         Value *Nucleus::createTrunc(Value *v, Type *destType)
 931         {
 932                 return createCast(Ice::InstCast::Trunc, v, destType);
 933         }
 934
 935         Value *Nucleus::createZExt(Value *v, Type *destType)
 936         {
 937                 return createCast(Ice::InstCast::Zext, v, destType);
 938         }
 939
 940         Value *Nucleus::createSExt(Value *v, Type *destType)
 941         {
 942                 return createCast(Ice::InstCast::Sext, v, destType);
 943         }
 944
 945         Value *Nucleus::createFPToSI(Value *v, Type *destType)
 946         {
 947                 return createCast(Ice::InstCast::Fptosi, v, destType);
 948         }
 949
 950         Value *Nucleus::createSIToFP(Value *v, Type *destType)
 951         {
 952                 return createCast(Ice::InstCast::Sitofp, v, destType);
 953         }
 954
 955         Value *Nucleus::createFPTrunc(Value *v, Type *destType)
 956         {
 957                 return createCast(Ice::InstCast::Fptrunc, v, destType);
 958         }
 959
 960         Value *Nucleus::createFPExt(Value *v, Type *destType)
 961         {
 962                 return createCast(Ice::InstCast::Fpext, v, destType);
 963         }
 964
 965         Value *Nucleus::createBitCast(Value *v, Type *destType)
 966         {
 967                 return createCast(Ice::InstCast::Bitcast, v, destType);
 968         }
 969
 970         static Value *createIntCompare(Ice::InstIcmp::ICond condition, Value *lhs, Value *rhs)
 971         {
 972                 assert(lhs->getType() == rhs->getType());
 973
 974                 auto result = ::function->makeVariable(Ice::isScalarIntegerType(lhs->getType()) ? Ice::IceType_i1 : lhs->getType());
 975                 auto cmp = Ice::InstIcmp::create(::function, condition, result, lhs, rhs);
 976                 ::basicBlock->appendInst(cmp);
 977
 978                 return V(result);
 979         }
 980
 981         Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
 982         {
 983                 return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
 984         }
 985
 986         Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
 987         {
 988                 return createIntCompare(Ice::InstIcmp::Ne, lhs, rhs);
 989         }
 990
 991         Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
 992         {
 993                 return createIntCompare(Ice::InstIcmp::Ugt, lhs, rhs);
 994         }
 995
 996         Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
 997         {
 998                 return createIntCompare(Ice::InstIcmp::Uge, lhs, rhs);
 999         }
1000
1001         Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
1002         {
1003                 return createIntCompare(Ice::InstIcmp::Ult, lhs, rhs);
1004         }
1005
1006         Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
1007         {
1008                 return createIntCompare(Ice::InstIcmp::Ule, lhs, rhs);
1009         }
1010
1011         Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
1012         {
1013                 return createIntCompare(Ice::InstIcmp::Sgt, lhs, rhs);
1014         }
1015
1016         Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
1017         {
1018                 return createIntCompare(Ice::InstIcmp::Sge, lhs, rhs);
1019         }
1020
1021         Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
1022         {
1023                 return createIntCompare(Ice::InstIcmp::Slt, lhs, rhs);
1024         }
1025
1026         Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
1027         {
1028                 return createIntCompare(Ice::InstIcmp::Sle, lhs, rhs);
1029         }
1030
1031         static Value *createFloatCompare(Ice::InstFcmp::FCond condition, Value *lhs, Value *rhs)
1032         {
1033                 assert(lhs->getType() == rhs->getType());
1034                 assert(Ice::isScalarFloatingType(lhs->getType()) || lhs->getType() == Ice::IceType_v4f32);
1035
1036                 auto result = ::function->makeVariable(Ice::isScalarFloatingType(lhs->getType()) ? Ice::IceType_i1 : Ice::IceType_v4i32);
1037                 auto cmp = Ice::InstFcmp::create(::function, condition, result, lhs, rhs);
1038                 ::basicBlock->appendInst(cmp);
1039
1040                 return V(result);
1041         }
1042
1043         Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
1044         {
1045                 return createFloatCompare(Ice::InstFcmp::Oeq, lhs, rhs);
1046         }
1047
1048         Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
1049         {
1050                 return createFloatCompare(Ice::InstFcmp::Ogt, lhs, rhs);
1051         }
1052
1053         Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
1054         {
1055                 return createFloatCompare(Ice::InstFcmp::Oge, lhs, rhs);
1056         }
1057
1058         Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
1059         {
1060                 return createFloatCompare(Ice::InstFcmp::Olt, lhs, rhs);
1061         }
1062
1063         Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
1064         {
1065                 return createFloatCompare(Ice::InstFcmp::Ole, lhs, rhs);
1066         }
1067
1068         Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
1069         {
1070                 return createFloatCompare(Ice::InstFcmp::One, lhs, rhs);
1071         }
1072
1073         Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
1074         {
1075                 return createFloatCompare(Ice::InstFcmp::Ord, lhs, rhs);
1076         }
1077
1078         Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
1079         {
1080                 return createFloatCompare(Ice::InstFcmp::Uno, lhs, rhs);
1081         }
1082
1083         Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
1084         {
1085                 return createFloatCompare(Ice::InstFcmp::Ueq, lhs, rhs);
1086         }
1087
1088         Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
1089         {
1090                 return createFloatCompare(Ice::InstFcmp::Ugt, lhs, rhs);
1091         }
1092
1093         Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
1094         {
1095                 return createFloatCompare(Ice::InstFcmp::Uge, lhs, rhs);
1096         }
1097
1098         Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
1099         {
1100                 return createFloatCompare(Ice::InstFcmp::Ult, lhs, rhs);
1101         }
1102
1103         Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
1104         {
1105                 return createFloatCompare(Ice::InstFcmp::Ule, lhs, rhs);
1106         }
1107
1108         Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
1109         {
1110                 return createFloatCompare(Ice::InstFcmp::Une, lhs, rhs);
1111         }
1112
1113         Value *Nucleus::createExtractElement(Value *vector, Type *type, int index)
1114         {
1115                 auto result = ::function->makeVariable(T(type));
1116                 auto extract = Ice::InstExtractElement::create(::function, result, vector, ::context->getConstantInt32(index));
1117                 ::basicBlock->appendInst(extract);
1118
1119                 return V(result);
1120         }
1121
1122         Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
1123         {
1124                 auto result = ::function->makeVariable(vector->getType());
1125                 auto insert = Ice::InstInsertElement::create(::function, result, vector, element, ::context->getConstantInt32(index));
1126                 ::basicBlock->appendInst(insert);
1127
1128                 return V(result);
1129         }
1130
1131         Value *Nucleus::createShuffleVector(Value *V1, Value *V2, const int *select)
1132         {
1133                 assert(V1->getType() == V2->getType());
1134
1135                 int size = Ice::typeNumElements(V1->getType());
1136                 auto result = ::function->makeVariable(V1->getType());
1137                 auto shuffle = Ice::InstShuffleVector::create(::function, result, V1, V2);
1138
1139                 for(int i = 0; i < size; i++)
1140                 {
1141                         shuffle->addIndex(llvm::cast<Ice::ConstantInteger32>(::context->getConstantInt32(select[i])));
1142                 }
1143
1144                 ::basicBlock->appendInst(shuffle);
1145
1146                 return V(result);
1147         }
1148
1149         Value *Nucleus::createSelect(Value *C, Value *ifTrue, Value *ifFalse)
1150         {
1151                 assert(ifTrue->getType() == ifFalse->getType());
1152
1153                 auto result = ::function->makeVariable(ifTrue->getType());
1154                 auto *select = Ice::InstSelect::create(::function, result, C, ifTrue, ifFalse);
1155                 ::basicBlock->appendInst(select);
1156
1157                 return V(result);
1158         }
1159
1160         SwitchCases *Nucleus::createSwitch(Value *control, BasicBlock *defaultBranch, unsigned numCases)
1161         {
1162                 auto switchInst = Ice::InstSwitch::create(::function, numCases, control, defaultBranch);
1163                 ::basicBlock->appendInst(switchInst);
1164
1165                 return reinterpret_cast<SwitchCases*>(switchInst);
1166         }
1167
1168         void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch)
1169         {
1170                 switchCases->addBranch(label, label, branch);
1171         }
1172
1173         void Nucleus::createUnreachable()
1174         {
1175                 Ice::InstUnreachable *unreachable = Ice::InstUnreachable::create(::function);
1176                 ::basicBlock->appendInst(unreachable);
1177         }
1178
1179         static Value *createSwizzle4(Value *val, unsigned char select)
1180         {
1181                 int swizzle[4] =
1182                 {
1183                         (select >> 0) & 0x03,
1184                         (select >> 2) & 0x03,
1185                         (select >> 4) & 0x03,
1186                         (select >> 6) & 0x03,
1187                 };
1188
1189                 return Nucleus::createShuffleVector(val, val, swizzle);
1190         }
1191
1192         static Value *createMask4(Value *lhs, Value *rhs, unsigned char select)
1193         {
1194                 int64_t mask[4] = {0, 0, 0, 0};
1195
1196                 mask[(select >> 0) & 0x03] = -1;
1197                 mask[(select >> 2) & 0x03] = -1;
1198                 mask[(select >> 4) & 0x03] = -1;
1199                 mask[(select >> 6) & 0x03] = -1;
1200
1201                 Value *condition = Nucleus::createConstantVector(mask, T(Ice::IceType_v4i1));
1202                 Value *result = Nucleus::createSelect(condition, rhs, lhs);
1203
1204                 return result;
1205         }
1206
1207         Type *Nucleus::getPointerType(Type *ElementType)
1208         {
1209                 if(sizeof(void*) == 8)
1210                 {
1211                         return T(Ice::IceType_i64);
1212                 }
1213                 else
1214                 {
1215                         return T(Ice::IceType_i32);
1216                 }
1217         }
1218
1219         Value *Nucleus::createNullValue(Type *Ty)
1220         {
1221                 if(Ice::isVectorType(T(Ty)))
1222                 {
1223                         assert(Ice::typeNumElements(T(Ty)) <= 16);
1224                         int64_t c[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
1225                         return createConstantVector(c, Ty);
1226                 }
1227                 else
1228                 {
1229                         return V(::context->getConstantZero(T(Ty)));
1230                 }
1231         }
1232
1233         Value *Nucleus::createConstantLong(int64_t i)
1234         {
1235                 return V(::context->getConstantInt64(i));
1236         }
1237
1238         Value *Nucleus::createConstantInt(int i)
1239         {
1240                 return V(::context->getConstantInt32(i));
1241         }
1242
1243         Value *Nucleus::createConstantInt(unsigned int i)
1244         {
1245                 return V(::context->getConstantInt32(i));
1246         }
1247
1248         Value *Nucleus::createConstantBool(bool b)
1249         {
1250                 return V(::context->getConstantInt1(b));
1251         }
1252
1253         Value *Nucleus::createConstantByte(signed char i)
1254         {
1255                 return V(::context->getConstantInt8(i));
1256         }
1257
1258         Value *Nucleus::createConstantByte(unsigned char i)
1259         {
1260                 return V(::context->getConstantInt8(i));
1261         }
1262
1263         Value *Nucleus::createConstantShort(short i)
1264         {
1265                 return V(::context->getConstantInt16(i));
1266         }
1267
1268         Value *Nucleus::createConstantShort(unsigned short i)
1269         {
1270                 return V(::context->getConstantInt16(i));
1271         }
1272
1273         Value *Nucleus::createConstantFloat(float x)
1274         {
1275                 return V(::context->getConstantFloat(x));
1276         }
1277
1278         Value *Nucleus::createNullPointer(Type *Ty)
1279         {
1280                 return createNullValue(T(sizeof(void*) == 8 ? Ice::IceType_i64 : Ice::IceType_i32));
1281         }
1282
1283         Value *Nucleus::createConstantVector(const int64_t *constants, Type *type)
1284         {
1285                 const int vectorSize = 16;
1286                 assert(Ice::typeWidthInBytes(T(type)) == vectorSize);
1287                 const int alignment = vectorSize;
1288                 auto globalPool = ::function->getGlobalPool();
1289
1290                 const int64_t *i = constants;
1291                 const double *f = reinterpret_cast<const double*>(constants);
1292                 Ice::VariableDeclaration::DataInitializer *dataInitializer = nullptr;
1293
1294                 switch((int)reinterpret_cast<intptr_t>(type))
1295                 {
1296                 case Ice::IceType_v4i32:
1297                 case Ice::IceType_v4i1:
1298                         {
1299                                 const int initializer[4] = {(int)i[0], (int)i[1], (int)i[2], (int)i[3]};
1300                                 static_assert(sizeof(initializer) == vectorSize, "!");
1301                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1302                         }
1303                         break;
1304                 case Ice::IceType_v4f32:
1305                         {
1306                                 const float initializer[4] = {(float)f[0], (float)f[1], (float)f[2], (float)f[3]};
1307                                 static_assert(sizeof(initializer) == vectorSize, "!");
1308                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1309                         }
1310                         break;
1311                 case Ice::IceType_v8i16:
1312                 case Ice::IceType_v8i1:
1313                         {
1314                                 const short initializer[8] = {(short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[4], (short)i[5], (short)i[6], (short)i[7]};
1315                                 static_assert(sizeof(initializer) == vectorSize, "!");
1316                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1317                         }
1318                         break;
1319                 case Ice::IceType_v16i8:
1320                 case Ice::IceType_v16i1:
1321                         {
1322                                 const char initializer[16] = {(char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[8], (char)i[9], (char)i[10], (char)i[11], (char)i[12], (char)i[13], (char)i[14], (char)i[15]};
1323                                 static_assert(sizeof(initializer) == vectorSize, "!");
1324                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1325                         }
1326                         break;
1327                 case Type_v2i32:
1328                         {
1329                                 const int initializer[4] = {(int)i[0], (int)i[1], (int)i[0], (int)i[1]};
1330                                 static_assert(sizeof(initializer) == vectorSize, "!");
1331                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1332                         }
1333                         break;
1334                 case Type_v2f32:
1335                         {
1336                                 const float initializer[4] = {(float)f[0], (float)f[1], (float)f[0], (float)f[1]};
1337                                 static_assert(sizeof(initializer) == vectorSize, "!");
1338                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1339                         }
1340                         break;
1341                 case Type_v4i16:
1342                         {
1343                                 const short initializer[8] = {(short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[0], (short)i[1], (short)i[2], (short)i[3]};
1344                                 static_assert(sizeof(initializer) == vectorSize, "!");
1345                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1346                         }
1347                         break;
1348                 case Type_v8i8:
1349                         {
1350                                 const char initializer[16] = {(char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7]};
1351                                 static_assert(sizeof(initializer) == vectorSize, "!");
1352                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1353                         }
1354                         break;
1355                 case Type_v4i8:
1356                         {
1357                                 const char initializer[16] = {(char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3]};
1358                                 static_assert(sizeof(initializer) == vectorSize, "!");
1359                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1360                         }
1361                         break;
1362                 default:
1363                         assert(false && "Unknown constant vector type" && type);
1364                 }
1365
1366                 auto name = Ice::GlobalString::createWithoutString(::context);
1367                 auto *variableDeclaration = Ice::VariableDeclaration::create(globalPool);
1368                 variableDeclaration->setName(name);
1369                 variableDeclaration->setAlignment(alignment);
1370                 variableDeclaration->setIsConstant(true);
1371                 variableDeclaration->addInitializer(dataInitializer);
1372
1373                 ::function->addGlobal(variableDeclaration);
1374
1375                 constexpr int32_t offset = 0;
1376                 Ice::Operand *ptr = ::context->getConstantSym(offset, name);
1377
1378                 Ice::Variable *result = ::function->makeVariable(T(type));
1379                 auto load = Ice::InstLoad::create(::function, result, ptr, alignment);
1380                 ::basicBlock->appendInst(load);
1381
1382                 return V(result);
1383         }
1384
1385         Value *Nucleus::createConstantVector(const double *constants, Type *type)
1386         {
1387                 return createConstantVector((const int64_t*)constants, type);
1388         }
1389
1390         Type *Void::getType()
1391         {
1392                 return T(Ice::IceType_void);
1393         }
1394
1395         Bool::Bool(Argument<Bool> argument)
1396         {
1397                 storeValue(argument.value);
1398         }
1399
1400         Bool::Bool(bool x)
1401         {
1402                 storeValue(Nucleus::createConstantBool(x));
1403         }
1404
1405         Bool::Bool(RValue<Bool> rhs)
1406         {
1407                 storeValue(rhs.value);
1408         }
1409
1410         Bool::Bool(const Bool &rhs)
1411         {
1412                 Value *value = rhs.loadValue();
1413                 storeValue(value);
1414         }
1415
1416         Bool::Bool(const Reference<Bool> &rhs)
1417         {
1418                 Value *value = rhs.loadValue();
1419                 storeValue(value);
1420         }
1421
1422         RValue<Bool> Bool::operator=(RValue<Bool> rhs)
1423         {
1424                 storeValue(rhs.value);
1425
1426                 return rhs;
1427         }
1428
1429         RValue<Bool> Bool::operator=(const Bool &rhs)
1430         {
1431                 Value *value = rhs.loadValue();
1432                 storeValue(value);
1433
1434                 return RValue<Bool>(value);
1435         }
1436
1437         RValue<Bool> Bool::operator=(const Reference<Bool> &rhs)
1438         {
1439                 Value *value = rhs.loadValue();
1440                 storeValue(value);
1441
1442                 return RValue<Bool>(value);
1443         }
1444
1445         RValue<Bool> operator!(RValue<Bool> val)
1446         {
1447                 return RValue<Bool>(Nucleus::createNot(val.value));
1448         }
1449
1450         RValue<Bool> operator&&(RValue<Bool> lhs, RValue<Bool> rhs)
1451         {
1452                 return RValue<Bool>(Nucleus::createAnd(lhs.value, rhs.value));
1453         }
1454
1455         RValue<Bool> operator||(RValue<Bool> lhs, RValue<Bool> rhs)
1456         {
1457                 return RValue<Bool>(Nucleus::createOr(lhs.value, rhs.value));
1458         }
1459
1460         Type *Bool::getType()
1461         {
1462                 return T(Ice::IceType_i1);
1463         }
1464
1465         Byte::Byte(Argument<Byte> argument)
1466         {
1467                 storeValue(argument.value);
1468         }
1469
1470         Byte::Byte(RValue<Int> cast)
1471         {
1472                 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
1473
1474                 storeValue(integer);
1475         }
1476
1477         Byte::Byte(RValue<UInt> cast)
1478         {
1479                 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
1480
1481                 storeValue(integer);
1482         }
1483
1484         Byte::Byte(RValue<UShort> cast)
1485         {
1486                 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
1487
1488                 storeValue(integer);
1489         }
1490
1491         Byte::Byte(int x)
1492         {
1493                 storeValue(Nucleus::createConstantByte((unsigned char)x));
1494         }
1495
1496         Byte::Byte(unsigned char x)
1497         {
1498                 storeValue(Nucleus::createConstantByte(x));
1499         }
1500
1501         Byte::Byte(RValue<Byte> rhs)
1502         {
1503                 storeValue(rhs.value);
1504         }
1505
1506         Byte::Byte(const Byte &rhs)
1507         {
1508                 Value *value = rhs.loadValue();
1509                 storeValue(value);
1510         }
1511
1512         Byte::Byte(const Reference<Byte> &rhs)
1513         {
1514                 Value *value = rhs.loadValue();
1515                 storeValue(value);
1516         }
1517
1518         RValue<Byte> Byte::operator=(RValue<Byte> rhs)
1519         {
1520                 storeValue(rhs.value);
1521
1522                 return rhs;
1523         }
1524
1525         RValue<Byte> Byte::operator=(const Byte &rhs)
1526         {
1527                 Value *value = rhs.loadValue();
1528                 storeValue(value);
1529
1530                 return RValue<Byte>(value);
1531         }
1532
1533         RValue<Byte> Byte::operator=(const Reference<Byte> &rhs)
1534         {
1535                 Value *value = rhs.loadValue();
1536                 storeValue(value);
1537
1538                 return RValue<Byte>(value);
1539         }
1540
1541         RValue<Byte> operator+(RValue<Byte> lhs, RValue<Byte> rhs)
1542         {
1543                 return RValue<Byte>(Nucleus::createAdd(lhs.value, rhs.value));
1544         }
1545
1546         RValue<Byte> operator-(RValue<Byte> lhs, RValue<Byte> rhs)
1547         {
1548                 return RValue<Byte>(Nucleus::createSub(lhs.value, rhs.value));
1549         }
1550
1551         RValue<Byte> operator*(RValue<Byte> lhs, RValue<Byte> rhs)
1552         {
1553                 return RValue<Byte>(Nucleus::createMul(lhs.value, rhs.value));
1554         }
1555
1556         RValue<Byte> operator/(RValue<Byte> lhs, RValue<Byte> rhs)
1557         {
1558                 return RValue<Byte>(Nucleus::createUDiv(lhs.value, rhs.value));
1559         }
1560
1561         RValue<Byte> operator%(RValue<Byte> lhs, RValue<Byte> rhs)
1562         {
1563                 return RValue<Byte>(Nucleus::createURem(lhs.value, rhs.value));
1564         }
1565
1566         RValue<Byte> operator&(RValue<Byte> lhs, RValue<Byte> rhs)
1567         {
1568                 return RValue<Byte>(Nucleus::createAnd(lhs.value, rhs.value));
1569         }
1570
1571         RValue<Byte> operator|(RValue<Byte> lhs, RValue<Byte> rhs)
1572         {
1573                 return RValue<Byte>(Nucleus::createOr(lhs.value, rhs.value));
1574         }
1575
1576         RValue<Byte> operator^(RValue<Byte> lhs, RValue<Byte> rhs)
1577         {
1578                 return RValue<Byte>(Nucleus::createXor(lhs.value, rhs.value));
1579         }
1580
1581         RValue<Byte> operator<<(RValue<Byte> lhs, RValue<Byte> rhs)
1582         {
1583                 return RValue<Byte>(Nucleus::createShl(lhs.value, rhs.value));
1584         }
1585
1586         RValue<Byte> operator>>(RValue<Byte> lhs, RValue<Byte> rhs)
1587         {
1588                 return RValue<Byte>(Nucleus::createLShr(lhs.value, rhs.value));
1589         }
1590
1591         RValue<Byte> operator+=(Byte &lhs, RValue<Byte> rhs)
1592         {
1593                 return lhs = lhs + rhs;
1594         }
1595
1596         RValue<Byte> operator-=(Byte &lhs, RValue<Byte> rhs)
1597         {
1598                 return lhs = lhs - rhs;
1599         }
1600
1601         RValue<Byte> operator*=(Byte &lhs, RValue<Byte> rhs)
1602         {
1603                 return lhs = lhs * rhs;
1604         }
1605
1606         RValue<Byte> operator/=(Byte &lhs, RValue<Byte> rhs)
1607         {
1608                 return lhs = lhs / rhs;
1609         }
1610
1611         RValue<Byte> operator%=(Byte &lhs, RValue<Byte> rhs)
1612         {
1613                 return lhs = lhs % rhs;
1614         }
1615
1616         RValue<Byte> operator&=(Byte &lhs, RValue<Byte> rhs)
1617         {
1618                 return lhs = lhs & rhs;
1619         }
1620
1621         RValue<Byte> operator|=(Byte &lhs, RValue<Byte> rhs)
1622         {
1623                 return lhs = lhs | rhs;
1624         }
1625
1626         RValue<Byte> operator^=(Byte &lhs, RValue<Byte> rhs)
1627         {
1628                 return lhs = lhs ^ rhs;
1629         }
1630
1631         RValue<Byte> operator<<=(Byte &lhs, RValue<Byte> rhs)
1632         {
1633                 return lhs = lhs << rhs;
1634         }
1635
1636         RValue<Byte> operator>>=(Byte &lhs, RValue<Byte> rhs)
1637         {
1638                 return lhs = lhs >> rhs;
1639         }
1640
1641         RValue<Byte> operator+(RValue<Byte> val)
1642         {
1643                 return val;
1644         }
1645
1646         RValue<Byte> operator-(RValue<Byte> val)
1647         {
1648                 return RValue<Byte>(Nucleus::createNeg(val.value));
1649         }
1650
1651         RValue<Byte> operator~(RValue<Byte> val)
1652         {
1653                 return RValue<Byte>(Nucleus::createNot(val.value));
1654         }
1655
1656         RValue<Byte> operator++(Byte &val, int)   // Post-increment
1657         {
1658                 RValue<Byte> res = val;
1659                 val += Byte(1);
1660                 return res;
1661         }
1662
1663         const Byte &operator++(Byte &val)   // Pre-increment
1664         {
1665                 val += Byte(1);
1666                 return val;
1667         }
1668
1669         RValue<Byte> operator--(Byte &val, int)   // Post-decrement
1670         {
1671                 RValue<Byte> res = val;
1672                 val -= Byte(1);
1673                 return res;
1674         }
1675
1676         const Byte &operator--(Byte &val)   // Pre-decrement
1677         {
1678                 val -= Byte(1);
1679                 return val;
1680         }
1681
1682         RValue<Bool> operator<(RValue<Byte> lhs, RValue<Byte> rhs)
1683         {
1684                 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
1685         }
1686
1687         RValue<Bool> operator<=(RValue<Byte> lhs, RValue<Byte> rhs)
1688         {
1689                 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
1690         }
1691
1692         RValue<Bool> operator>(RValue<Byte> lhs, RValue<Byte> rhs)
1693         {
1694                 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
1695         }
1696
1697         RValue<Bool> operator>=(RValue<Byte> lhs, RValue<Byte> rhs)
1698         {
1699                 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
1700         }
1701
1702         RValue<Bool> operator!=(RValue<Byte> lhs, RValue<Byte> rhs)
1703         {
1704                 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
1705         }
1706
1707         RValue<Bool> operator==(RValue<Byte> lhs, RValue<Byte> rhs)
1708         {
1709                 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
1710         }
1711
1712         Type *Byte::getType()
1713         {
1714                 return T(Ice::IceType_i8);
1715         }
1716
1717         SByte::SByte(Argument<SByte> argument)
1718         {
1719                 storeValue(argument.value);
1720         }
1721
1722         SByte::SByte(RValue<Int> cast)
1723         {
1724                 Value *integer = Nucleus::createTrunc(cast.value, SByte::getType());
1725
1726                 storeValue(integer);
1727         }
1728
1729         SByte::SByte(RValue<Short> cast)
1730         {
1731                 Value *integer = Nucleus::createTrunc(cast.value, SByte::getType());
1732
1733                 storeValue(integer);
1734         }
1735
1736         SByte::SByte(signed char x)
1737         {
1738                 storeValue(Nucleus::createConstantByte(x));
1739         }
1740
1741         SByte::SByte(RValue<SByte> rhs)
1742         {
1743                 storeValue(rhs.value);
1744         }
1745
1746         SByte::SByte(const SByte &rhs)
1747         {
1748                 Value *value = rhs.loadValue();
1749                 storeValue(value);
1750         }
1751
1752         SByte::SByte(const Reference<SByte> &rhs)
1753         {
1754                 Value *value = rhs.loadValue();
1755                 storeValue(value);
1756         }
1757
1758         RValue<SByte> SByte::operator=(RValue<SByte> rhs)
1759         {
1760                 storeValue(rhs.value);
1761
1762                 return rhs;
1763         }
1764
1765         RValue<SByte> SByte::operator=(const SByte &rhs)
1766         {
1767                 Value *value = rhs.loadValue();
1768                 storeValue(value);
1769
1770                 return RValue<SByte>(value);
1771         }
1772
1773         RValue<SByte> SByte::operator=(const Reference<SByte> &rhs)
1774         {
1775                 Value *value = rhs.loadValue();
1776                 storeValue(value);
1777
1778                 return RValue<SByte>(value);
1779         }
1780
1781         RValue<SByte> operator+(RValue<SByte> lhs, RValue<SByte> rhs)
1782         {
1783                 return RValue<SByte>(Nucleus::createAdd(lhs.value, rhs.value));
1784         }
1785
1786         RValue<SByte> operator-(RValue<SByte> lhs, RValue<SByte> rhs)
1787         {
1788                 return RValue<SByte>(Nucleus::createSub(lhs.value, rhs.value));
1789         }
1790
1791         RValue<SByte> operator*(RValue<SByte> lhs, RValue<SByte> rhs)
1792         {
1793                 return RValue<SByte>(Nucleus::createMul(lhs.value, rhs.value));
1794         }
1795
1796         RValue<SByte> operator/(RValue<SByte> lhs, RValue<SByte> rhs)
1797         {
1798                 return RValue<SByte>(Nucleus::createSDiv(lhs.value, rhs.value));
1799         }
1800
1801         RValue<SByte> operator%(RValue<SByte> lhs, RValue<SByte> rhs)
1802         {
1803                 return RValue<SByte>(Nucleus::createSRem(lhs.value, rhs.value));
1804         }
1805
1806         RValue<SByte> operator&(RValue<SByte> lhs, RValue<SByte> rhs)
1807         {
1808                 return RValue<SByte>(Nucleus::createAnd(lhs.value, rhs.value));
1809         }
1810
1811         RValue<SByte> operator|(RValue<SByte> lhs, RValue<SByte> rhs)
1812         {
1813                 return RValue<SByte>(Nucleus::createOr(lhs.value, rhs.value));
1814         }
1815
1816         RValue<SByte> operator^(RValue<SByte> lhs, RValue<SByte> rhs)
1817         {
1818                 return RValue<SByte>(Nucleus::createXor(lhs.value, rhs.value));
1819         }
1820
1821         RValue<SByte> operator<<(RValue<SByte> lhs, RValue<SByte> rhs)
1822         {
1823                 return RValue<SByte>(Nucleus::createShl(lhs.value, rhs.value));
1824         }
1825
1826         RValue<SByte> operator>>(RValue<SByte> lhs, RValue<SByte> rhs)
1827         {
1828                 return RValue<SByte>(Nucleus::createAShr(lhs.value, rhs.value));
1829         }
1830
1831         RValue<SByte> operator+=(SByte &lhs, RValue<SByte> rhs)
1832         {
1833                 return lhs = lhs + rhs;
1834         }
1835
1836         RValue<SByte> operator-=(SByte &lhs, RValue<SByte> rhs)
1837         {
1838                 return lhs = lhs - rhs;
1839         }
1840
1841         RValue<SByte> operator*=(SByte &lhs, RValue<SByte> rhs)
1842         {
1843                 return lhs = lhs * rhs;
1844         }
1845
1846         RValue<SByte> operator/=(SByte &lhs, RValue<SByte> rhs)
1847         {
1848                 return lhs = lhs / rhs;
1849         }
1850
1851         RValue<SByte> operator%=(SByte &lhs, RValue<SByte> rhs)
1852         {
1853                 return lhs = lhs % rhs;
1854         }
1855
1856         RValue<SByte> operator&=(SByte &lhs, RValue<SByte> rhs)
1857         {
1858                 return lhs = lhs & rhs;
1859         }
1860
1861         RValue<SByte> operator|=(SByte &lhs, RValue<SByte> rhs)
1862         {
1863                 return lhs = lhs | rhs;
1864         }
1865
1866         RValue<SByte> operator^=(SByte &lhs, RValue<SByte> rhs)
1867         {
1868                 return lhs = lhs ^ rhs;
1869         }
1870
1871         RValue<SByte> operator<<=(SByte &lhs, RValue<SByte> rhs)
1872         {
1873                 return lhs = lhs << rhs;
1874         }
1875
1876         RValue<SByte> operator>>=(SByte &lhs, RValue<SByte> rhs)
1877         {
1878                 return lhs = lhs >> rhs;
1879         }
1880
1881         RValue<SByte> operator+(RValue<SByte> val)
1882         {
1883                 return val;
1884         }
1885
1886         RValue<SByte> operator-(RValue<SByte> val)
1887         {
1888                 return RValue<SByte>(Nucleus::createNeg(val.value));
1889         }
1890
1891         RValue<SByte> operator~(RValue<SByte> val)
1892         {
1893                 return RValue<SByte>(Nucleus::createNot(val.value));
1894         }
1895
1896         RValue<SByte> operator++(SByte &val, int)   // Post-increment
1897         {
1898                 RValue<SByte> res = val;
1899                 val += SByte(1);
1900                 return res;
1901         }
1902
1903         const SByte &operator++(SByte &val)   // Pre-increment
1904         {
1905                 val += SByte(1);
1906                 return val;
1907         }
1908
1909         RValue<SByte> operator--(SByte &val, int)   // Post-decrement
1910         {
1911                 RValue<SByte> res = val;
1912                 val -= SByte(1);
1913                 return res;
1914         }
1915
1916         const SByte &operator--(SByte &val)   // Pre-decrement
1917         {
1918                 val -= SByte(1);
1919                 return val;
1920         }
1921
1922         RValue<Bool> operator<(RValue<SByte> lhs, RValue<SByte> rhs)
1923         {
1924                 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
1925         }
1926
1927         RValue<Bool> operator<=(RValue<SByte> lhs, RValue<SByte> rhs)
1928         {
1929                 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
1930         }
1931
1932         RValue<Bool> operator>(RValue<SByte> lhs, RValue<SByte> rhs)
1933         {
1934                 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
1935         }
1936
1937         RValue<Bool> operator>=(RValue<SByte> lhs, RValue<SByte> rhs)
1938         {
1939                 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
1940         }
1941
1942         RValue<Bool> operator!=(RValue<SByte> lhs, RValue<SByte> rhs)
1943         {
1944                 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
1945         }
1946
1947         RValue<Bool> operator==(RValue<SByte> lhs, RValue<SByte> rhs)
1948         {
1949                 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
1950         }
1951
1952         Type *SByte::getType()
1953         {
1954                 return T(Ice::IceType_i8);
1955         }
1956
1957         Short::Short(Argument<Short> argument)
1958         {
1959                 storeValue(argument.value);
1960         }
1961
1962         Short::Short(RValue<Int> cast)
1963         {
1964                 Value *integer = Nucleus::createTrunc(cast.value, Short::getType());
1965
1966                 storeValue(integer);
1967         }
1968
1969         Short::Short(short x)
1970         {
1971                 storeValue(Nucleus::createConstantShort(x));
1972         }
1973
1974         Short::Short(RValue<Short> rhs)
1975         {
1976                 storeValue(rhs.value);
1977         }
1978
1979         Short::Short(const Short &rhs)
1980         {
1981                 Value *value = rhs.loadValue();
1982                 storeValue(value);
1983         }
1984
1985         Short::Short(const Reference<Short> &rhs)
1986         {
1987                 Value *value = rhs.loadValue();
1988                 storeValue(value);
1989         }
1990
1991         RValue<Short> Short::operator=(RValue<Short> rhs)
1992         {
1993                 storeValue(rhs.value);
1994
1995                 return rhs;
1996         }
1997
1998         RValue<Short> Short::operator=(const Short &rhs)
1999         {
2000                 Value *value = rhs.loadValue();
2001                 storeValue(value);
2002
2003                 return RValue<Short>(value);
2004         }
2005
2006         RValue<Short> Short::operator=(const Reference<Short> &rhs)
2007         {
2008                 Value *value = rhs.loadValue();
2009                 storeValue(value);
2010
2011                 return RValue<Short>(value);
2012         }
2013
2014         RValue<Short> operator+(RValue<Short> lhs, RValue<Short> rhs)
2015         {
2016                 return RValue<Short>(Nucleus::createAdd(lhs.value, rhs.value));
2017         }
2018
2019         RValue<Short> operator-(RValue<Short> lhs, RValue<Short> rhs)
2020         {
2021                 return RValue<Short>(Nucleus::createSub(lhs.value, rhs.value));
2022         }
2023
2024         RValue<Short> operator*(RValue<Short> lhs, RValue<Short> rhs)
2025         {
2026                 return RValue<Short>(Nucleus::createMul(lhs.value, rhs.value));
2027         }
2028
2029         RValue<Short> operator/(RValue<Short> lhs, RValue<Short> rhs)
2030         {
2031                 return RValue<Short>(Nucleus::createSDiv(lhs.value, rhs.value));
2032         }
2033
2034         RValue<Short> operator%(RValue<Short> lhs, RValue<Short> rhs)
2035         {
2036                 return RValue<Short>(Nucleus::createSRem(lhs.value, rhs.value));
2037         }
2038
2039         RValue<Short> operator&(RValue<Short> lhs, RValue<Short> rhs)
2040         {
2041                 return RValue<Short>(Nucleus::createAnd(lhs.value, rhs.value));
2042         }
2043
2044         RValue<Short> operator|(RValue<Short> lhs, RValue<Short> rhs)
2045         {
2046                 return RValue<Short>(Nucleus::createOr(lhs.value, rhs.value));
2047         }
2048
2049         RValue<Short> operator^(RValue<Short> lhs, RValue<Short> rhs)
2050         {
2051                 return RValue<Short>(Nucleus::createXor(lhs.value, rhs.value));
2052         }
2053
2054         RValue<Short> operator<<(RValue<Short> lhs, RValue<Short> rhs)
2055         {
2056                 return RValue<Short>(Nucleus::createShl(lhs.value, rhs.value));
2057         }
2058
2059         RValue<Short> operator>>(RValue<Short> lhs, RValue<Short> rhs)
2060         {
2061                 return RValue<Short>(Nucleus::createAShr(lhs.value, rhs.value));
2062         }
2063
2064         RValue<Short> operator+=(Short &lhs, RValue<Short> rhs)
2065         {
2066                 return lhs = lhs + rhs;
2067         }
2068
2069         RValue<Short> operator-=(Short &lhs, RValue<Short> rhs)
2070         {
2071                 return lhs = lhs - rhs;
2072         }
2073
2074         RValue<Short> operator*=(Short &lhs, RValue<Short> rhs)
2075         {
2076                 return lhs = lhs * rhs;
2077         }
2078
2079         RValue<Short> operator/=(Short &lhs, RValue<Short> rhs)
2080         {
2081                 return lhs = lhs / rhs;
2082         }
2083
2084         RValue<Short> operator%=(Short &lhs, RValue<Short> rhs)
2085         {
2086                 return lhs = lhs % rhs;
2087         }
2088
2089         RValue<Short> operator&=(Short &lhs, RValue<Short> rhs)
2090         {
2091                 return lhs = lhs & rhs;
2092         }
2093
2094         RValue<Short> operator|=(Short &lhs, RValue<Short> rhs)
2095         {
2096                 return lhs = lhs | rhs;
2097         }
2098
2099         RValue<Short> operator^=(Short &lhs, RValue<Short> rhs)
2100         {
2101                 return lhs = lhs ^ rhs;
2102         }
2103
2104         RValue<Short> operator<<=(Short &lhs, RValue<Short> rhs)
2105         {
2106                 return lhs = lhs << rhs;
2107         }
2108
2109         RValue<Short> operator>>=(Short &lhs, RValue<Short> rhs)
2110         {
2111                 return lhs = lhs >> rhs;
2112         }
2113
2114         RValue<Short> operator+(RValue<Short> val)
2115         {
2116                 return val;
2117         }
2118
2119         RValue<Short> operator-(RValue<Short> val)
2120         {
2121                 return RValue<Short>(Nucleus::createNeg(val.value));
2122         }
2123
2124         RValue<Short> operator~(RValue<Short> val)
2125         {
2126                 return RValue<Short>(Nucleus::createNot(val.value));
2127         }
2128
2129         RValue<Short> operator++(Short &val, int)   // Post-increment
2130         {
2131                 RValue<Short> res = val;
2132                 val += Short(1);
2133                 return res;
2134         }
2135
2136         const Short &operator++(Short &val)   // Pre-increment
2137         {
2138                 val += Short(1);
2139                 return val;
2140         }
2141
2142         RValue<Short> operator--(Short &val, int)   // Post-decrement
2143         {
2144                 RValue<Short> res = val;
2145                 val -= Short(1);
2146                 return res;
2147         }
2148
2149         const Short &operator--(Short &val)   // Pre-decrement
2150         {
2151                 val -= Short(1);
2152                 return val;
2153         }
2154
2155         RValue<Bool> operator<(RValue<Short> lhs, RValue<Short> rhs)
2156         {
2157                 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
2158         }
2159
2160         RValue<Bool> operator<=(RValue<Short> lhs, RValue<Short> rhs)
2161         {
2162                 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
2163         }
2164
2165         RValue<Bool> operator>(RValue<Short> lhs, RValue<Short> rhs)
2166         {
2167                 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
2168         }
2169
2170         RValue<Bool> operator>=(RValue<Short> lhs, RValue<Short> rhs)
2171         {
2172                 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
2173         }
2174
2175         RValue<Bool> operator!=(RValue<Short> lhs, RValue<Short> rhs)
2176         {
2177                 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
2178         }
2179
2180         RValue<Bool> operator==(RValue<Short> lhs, RValue<Short> rhs)
2181         {
2182                 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
2183         }
2184
2185         Type *Short::getType()
2186         {
2187                 return T(Ice::IceType_i16);
2188         }
2189
2190         UShort::UShort(Argument<UShort> argument)
2191         {
2192                 storeValue(argument.value);
2193         }
2194
2195         UShort::UShort(RValue<UInt> cast)
2196         {
2197                 Value *integer = Nucleus::createTrunc(cast.value, UShort::getType());
2198
2199                 storeValue(integer);
2200         }
2201
2202         UShort::UShort(RValue<Int> cast)
2203         {
2204                 Value *integer = Nucleus::createTrunc(cast.value, UShort::getType());
2205
2206                 storeValue(integer);
2207         }
2208
2209         UShort::UShort(unsigned short x)
2210         {
2211                 storeValue(Nucleus::createConstantShort(x));
2212         }
2213
2214         UShort::UShort(RValue<UShort> rhs)
2215         {
2216                 storeValue(rhs.value);
2217         }
2218
2219         UShort::UShort(const UShort &rhs)
2220         {
2221                 Value *value = rhs.loadValue();
2222                 storeValue(value);
2223         }
2224
2225         UShort::UShort(const Reference<UShort> &rhs)
2226         {
2227                 Value *value = rhs.loadValue();
2228                 storeValue(value);
2229         }
2230
2231         RValue<UShort> UShort::operator=(RValue<UShort> rhs)
2232         {
2233                 storeValue(rhs.value);
2234
2235                 return rhs;
2236         }
2237
2238         RValue<UShort> UShort::operator=(const UShort &rhs)
2239         {
2240                 Value *value = rhs.loadValue();
2241                 storeValue(value);
2242
2243                 return RValue<UShort>(value);
2244         }
2245
2246         RValue<UShort> UShort::operator=(const Reference<UShort> &rhs)
2247         {
2248                 Value *value = rhs.loadValue();
2249                 storeValue(value);
2250
2251                 return RValue<UShort>(value);
2252         }
2253
2254         RValue<UShort> operator+(RValue<UShort> lhs, RValue<UShort> rhs)
2255         {
2256                 return RValue<UShort>(Nucleus::createAdd(lhs.value, rhs.value));
2257         }
2258
2259         RValue<UShort> operator-(RValue<UShort> lhs, RValue<UShort> rhs)
2260         {
2261                 return RValue<UShort>(Nucleus::createSub(lhs.value, rhs.value));
2262         }
2263
2264         RValue<UShort> operator*(RValue<UShort> lhs, RValue<UShort> rhs)
2265         {
2266                 return RValue<UShort>(Nucleus::createMul(lhs.value, rhs.value));
2267         }
2268
2269         RValue<UShort> operator/(RValue<UShort> lhs, RValue<UShort> rhs)
2270         {
2271                 return RValue<UShort>(Nucleus::createUDiv(lhs.value, rhs.value));
2272         }
2273
2274         RValue<UShort> operator%(RValue<UShort> lhs, RValue<UShort> rhs)
2275         {
2276                 return RValue<UShort>(Nucleus::createURem(lhs.value, rhs.value));
2277         }
2278
2279         RValue<UShort> operator&(RValue<UShort> lhs, RValue<UShort> rhs)
2280         {
2281                 return RValue<UShort>(Nucleus::createAnd(lhs.value, rhs.value));
2282         }
2283
2284         RValue<UShort> operator|(RValue<UShort> lhs, RValue<UShort> rhs)
2285         {
2286                 return RValue<UShort>(Nucleus::createOr(lhs.value, rhs.value));
2287         }
2288
2289         RValue<UShort> operator^(RValue<UShort> lhs, RValue<UShort> rhs)
2290         {
2291                 return RValue<UShort>(Nucleus::createXor(lhs.value, rhs.value));
2292         }
2293
2294         RValue<UShort> operator<<(RValue<UShort> lhs, RValue<UShort> rhs)
2295         {
2296                 return RValue<UShort>(Nucleus::createShl(lhs.value, rhs.value));
2297         }
2298
2299         RValue<UShort> operator>>(RValue<UShort> lhs, RValue<UShort> rhs)
2300         {
2301                 return RValue<UShort>(Nucleus::createLShr(lhs.value, rhs.value));
2302         }
2303
2304         RValue<UShort> operator+=(UShort &lhs, RValue<UShort> rhs)
2305         {
2306                 return lhs = lhs + rhs;
2307         }
2308
2309         RValue<UShort> operator-=(UShort &lhs, RValue<UShort> rhs)
2310         {
2311                 return lhs = lhs - rhs;
2312         }
2313
2314         RValue<UShort> operator*=(UShort &lhs, RValue<UShort> rhs)
2315         {
2316                 return lhs = lhs * rhs;
2317         }
2318
2319         RValue<UShort> operator/=(UShort &lhs, RValue<UShort> rhs)
2320         {
2321                 return lhs = lhs / rhs;
2322         }
2323
2324         RValue<UShort> operator%=(UShort &lhs, RValue<UShort> rhs)
2325         {
2326                 return lhs = lhs % rhs;
2327         }
2328
2329         RValue<UShort> operator&=(UShort &lhs, RValue<UShort> rhs)
2330         {
2331                 return lhs = lhs & rhs;
2332         }
2333
2334         RValue<UShort> operator|=(UShort &lhs, RValue<UShort> rhs)
2335         {
2336                 return lhs = lhs | rhs;
2337         }
2338
2339         RValue<UShort> operator^=(UShort &lhs, RValue<UShort> rhs)
2340         {
2341                 return lhs = lhs ^ rhs;
2342         }
2343
2344         RValue<UShort> operator<<=(UShort &lhs, RValue<UShort> rhs)
2345         {
2346                 return lhs = lhs << rhs;
2347         }
2348
2349         RValue<UShort> operator>>=(UShort &lhs, RValue<UShort> rhs)
2350         {
2351                 return lhs = lhs >> rhs;
2352         }
2353
2354         RValue<UShort> operator+(RValue<UShort> val)
2355         {
2356                 return val;
2357         }
2358
2359         RValue<UShort> operator-(RValue<UShort> val)
2360         {
2361                 return RValue<UShort>(Nucleus::createNeg(val.value));
2362         }
2363
2364         RValue<UShort> operator~(RValue<UShort> val)
2365         {
2366                 return RValue<UShort>(Nucleus::createNot(val.value));
2367         }
2368
2369         RValue<UShort> operator++(UShort &val, int)   // Post-increment
2370         {
2371                 RValue<UShort> res = val;
2372                 val += UShort(1);
2373                 return res;
2374         }
2375
2376         const UShort &operator++(UShort &val)   // Pre-increment
2377         {
2378                 val += UShort(1);
2379                 return val;
2380         }
2381
2382         RValue<UShort> operator--(UShort &val, int)   // Post-decrement
2383         {
2384                 RValue<UShort> res = val;
2385                 val -= UShort(1);
2386                 return res;
2387         }
2388
2389         const UShort &operator--(UShort &val)   // Pre-decrement
2390         {
2391                 val -= UShort(1);
2392                 return val;
2393         }
2394
2395         RValue<Bool> operator<(RValue<UShort> lhs, RValue<UShort> rhs)
2396         {
2397                 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
2398         }
2399
2400         RValue<Bool> operator<=(RValue<UShort> lhs, RValue<UShort> rhs)
2401         {
2402                 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
2403         }
2404
2405         RValue<Bool> operator>(RValue<UShort> lhs, RValue<UShort> rhs)
2406         {
2407                 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
2408         }
2409
2410         RValue<Bool> operator>=(RValue<UShort> lhs, RValue<UShort> rhs)
2411         {
2412                 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
2413         }
2414
2415         RValue<Bool> operator!=(RValue<UShort> lhs, RValue<UShort> rhs)
2416         {
2417                 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
2418         }
2419
2420         RValue<Bool> operator==(RValue<UShort> lhs, RValue<UShort> rhs)
2421         {
2422                 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
2423         }
2424
2425         Type *UShort::getType()
2426         {
2427                 return T(Ice::IceType_i16);
2428         }
2429
2430         Byte4::Byte4(RValue<Byte8> cast)
2431         {
2432                 storeValue(Nucleus::createBitCast(cast.value, getType()));
2433         }
2434
2435         Byte4::Byte4(const Reference<Byte4> &rhs)
2436         {
2437                 Value *value = rhs.loadValue();
2438                 storeValue(value);
2439         }
2440
2441         Type *Byte4::getType()
2442         {
2443                 return T(Type_v4i8);
2444         }
2445
2446         Type *SByte4::getType()
2447         {
2448                 return T(Type_v4i8);
2449         }
2450
2451         Byte8::Byte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7)
2452         {
2453                 int64_t constantVector[8] = {x0, x1, x2, x3, x4, x5, x6, x7};
2454                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
2455         }
2456
2457         Byte8::Byte8(RValue<Byte8> rhs)
2458         {
2459                 storeValue(rhs.value);
2460         }
2461
2462         Byte8::Byte8(const Byte8 &rhs)
2463         {
2464                 Value *value = rhs.loadValue();
2465                 storeValue(value);
2466         }
2467
2468         Byte8::Byte8(const Reference<Byte8> &rhs)
2469         {
2470                 Value *value = rhs.loadValue();
2471                 storeValue(value);
2472         }
2473
2474         RValue<Byte8> Byte8::operator=(RValue<Byte8> rhs)
2475         {
2476                 storeValue(rhs.value);
2477
2478                 return rhs;
2479         }
2480
2481         RValue<Byte8> Byte8::operator=(const Byte8 &rhs)
2482         {
2483                 Value *value = rhs.loadValue();
2484                 storeValue(value);
2485
2486                 return RValue<Byte8>(value);
2487         }
2488
2489         RValue<Byte8> Byte8::operator=(const Reference<Byte8> &rhs)
2490         {
2491                 Value *value = rhs.loadValue();
2492                 storeValue(value);
2493
2494                 return RValue<Byte8>(value);
2495         }
2496
2497         RValue<Byte8> operator+(RValue<Byte8> lhs, RValue<Byte8> rhs)
2498         {
2499                 return RValue<Byte8>(Nucleus::createAdd(lhs.value, rhs.value));
2500         }
2501
2502         RValue<Byte8> operator-(RValue<Byte8> lhs, RValue<Byte8> rhs)
2503         {
2504                 return RValue<Byte8>(Nucleus::createSub(lhs.value, rhs.value));
2505         }
2506
2507 //      RValue<Byte8> operator*(RValue<Byte8> lhs, RValue<Byte8> rhs)
2508 //      {
2509 //              return RValue<Byte8>(Nucleus::createMul(lhs.value, rhs.value));
2510 //      }
2511
2512 //      RValue<Byte8> operator/(RValue<Byte8> lhs, RValue<Byte8> rhs)
2513 //      {
2514 //              return RValue<Byte8>(Nucleus::createUDiv(lhs.value, rhs.value));
2515 //      }
2516
2517 //      RValue<Byte8> operator%(RValue<Byte8> lhs, RValue<Byte8> rhs)
2518 //      {
2519 //              return RValue<Byte8>(Nucleus::createURem(lhs.value, rhs.value));
2520 //      }
2521
2522         RValue<Byte8> operator&(RValue<Byte8> lhs, RValue<Byte8> rhs)
2523         {
2524                 return RValue<Byte8>(Nucleus::createAnd(lhs.value, rhs.value));
2525         }
2526
2527         RValue<Byte8> operator|(RValue<Byte8> lhs, RValue<Byte8> rhs)
2528         {
2529                 return RValue<Byte8>(Nucleus::createOr(lhs.value, rhs.value));
2530         }
2531
2532         RValue<Byte8> operator^(RValue<Byte8> lhs, RValue<Byte8> rhs)
2533         {
2534                 return RValue<Byte8>(Nucleus::createXor(lhs.value, rhs.value));
2535         }
2536
2537 //      RValue<Byte8> operator<<(RValue<Byte8> lhs, unsigned char rhs)
2538 //      {
2539 //              return RValue<Byte8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2540 //      }
2541
2542 //      RValue<Byte8> operator>>(RValue<Byte8> lhs, unsigned char rhs)
2543 //      {
2544 //              return RValue<Byte8>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
2545 //      }
2546
2547         RValue<Byte8> operator+=(Byte8 &lhs, RValue<Byte8> rhs)
2548         {
2549                 return lhs = lhs + rhs;
2550         }
2551
2552         RValue<Byte8> operator-=(Byte8 &lhs, RValue<Byte8> rhs)
2553         {
2554                 return lhs = lhs - rhs;
2555         }
2556
2557 //      RValue<Byte8> operator*=(Byte8 &lhs, RValue<Byte8> rhs)
2558 //      {
2559 //              return lhs = lhs * rhs;
2560 //      }
2561
2562 //      RValue<Byte8> operator/=(Byte8 &lhs, RValue<Byte8> rhs)
2563 //      {
2564 //              return lhs = lhs / rhs;
2565 //      }
2566
2567 //      RValue<Byte8> operator%=(Byte8 &lhs, RValue<Byte8> rhs)
2568 //      {
2569 //              return lhs = lhs % rhs;
2570 //      }
2571
2572         RValue<Byte8> operator&=(Byte8 &lhs, RValue<Byte8> rhs)
2573         {
2574                 return lhs = lhs & rhs;
2575         }
2576
2577         RValue<Byte8> operator|=(Byte8 &lhs, RValue<Byte8> rhs)
2578         {
2579                 return lhs = lhs | rhs;
2580         }
2581
2582         RValue<Byte8> operator^=(Byte8 &lhs, RValue<Byte8> rhs)
2583         {
2584                 return lhs = lhs ^ rhs;
2585         }
2586
2587 //      RValue<Byte8> operator<<=(Byte8 &lhs, RValue<Byte8> rhs)
2588 //      {
2589 //              return lhs = lhs << rhs;
2590 //      }
2591
2592 //      RValue<Byte8> operator>>=(Byte8 &lhs, RValue<Byte8> rhs)
2593 //      {
2594 //              return lhs = lhs >> rhs;
2595 //      }
2596
2597 //      RValue<Byte8> operator+(RValue<Byte8> val)
2598 //      {
2599 //              return val;
2600 //      }
2601
2602 //      RValue<Byte8> operator-(RValue<Byte8> val)
2603 //      {
2604 //              return RValue<Byte8>(Nucleus::createNeg(val.value));
2605 //      }
2606
2607         RValue<Byte8> operator~(RValue<Byte8> val)
2608         {
2609                 return RValue<Byte8>(Nucleus::createNot(val.value));
2610         }
2611
2612         RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
2613         {
2614                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
2615                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2616                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2617                 auto paddusb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2618                 paddusb->addArg(x.value);
2619                 paddusb->addArg(y.value);
2620                 ::basicBlock->appendInst(paddusb);
2621
2622                 return RValue<Byte8>(V(result));
2623         }
2624
2625         RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
2626         {
2627                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
2628                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2629                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2630                 auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2631                 psubusw->addArg(x.value);
2632                 psubusw->addArg(y.value);
2633                 ::basicBlock->appendInst(psubusw);
2634
2635                 return RValue<Byte8>(V(result));
2636         }
2637
2638         RValue<Short4> Unpack(RValue<Byte4> x)
2639         {
2640                 int shuffle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7};   // Real type is v16i8
2641                 return RValue<Short4>(Nucleus::createShuffleVector(x.value, x.value, shuffle));
2642         }
2643
2644         RValue<Short4> Unpack(RValue<Byte4> x, RValue<Byte4> y)
2645         {
2646                 return UnpackLow(As<Byte8>(x), As<Byte8>(y));
2647         }
2648
2649         RValue<Short4> UnpackLow(RValue<Byte8> x, RValue<Byte8> y)
2650         {
2651                 int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};   // Real type is v16i8
2652                 return RValue<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
2653         }
2654
2655         RValue<Short4> UnpackHigh(RValue<Byte8> x, RValue<Byte8> y)
2656         {
2657                 int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};   // Real type is v16i8
2658                 auto lowHigh = RValue<Byte16>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
2659                 return As<Short4>(Swizzle(As<Int4>(lowHigh), 0xEE));
2660         }
2661
2662         RValue<Int> SignMask(RValue<Byte8> x)
2663         {
2664                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
2665                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2666                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2667                 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
2668                 movmsk->addArg(x.value);
2669                 ::basicBlock->appendInst(movmsk);
2670
2671                 return RValue<Int>(V(result));
2672         }
2673
2674 //      RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
2675 //      {
2676 //              return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Ugt, x.value, y.value));
2677 //      }
2678
2679         RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
2680         {
2681                 return RValue<Byte8>(Nucleus::createICmpEQ(x.value, y.value));
2682         }
2683
2684         Type *Byte8::getType()
2685         {
2686                 return T(Type_v8i8);
2687         }
2688
2689         SByte8::SByte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7)
2690         {
2691                 int64_t constantVector[8] = { x0, x1, x2, x3, x4, x5, x6, x7 };
2692                 Value *vector = V(Nucleus::createConstantVector(constantVector, getType()));
2693
2694                 storeValue(Nucleus::createBitCast(vector, getType()));
2695         }
2696
2697         SByte8::SByte8(RValue<SByte8> rhs)
2698         {
2699                 storeValue(rhs.value);
2700         }
2701
2702         SByte8::SByte8(const SByte8 &rhs)
2703         {
2704                 Value *value = rhs.loadValue();
2705                 storeValue(value);
2706         }
2707
2708         SByte8::SByte8(const Reference<SByte8> &rhs)
2709         {
2710                 Value *value = rhs.loadValue();
2711                 storeValue(value);
2712         }
2713
2714         RValue<SByte8> SByte8::operator=(RValue<SByte8> rhs)
2715         {
2716                 storeValue(rhs.value);
2717
2718                 return rhs;
2719         }
2720
2721         RValue<SByte8> SByte8::operator=(const SByte8 &rhs)
2722         {
2723                 Value *value = rhs.loadValue();
2724                 storeValue(value);
2725
2726                 return RValue<SByte8>(value);
2727         }
2728
2729         RValue<SByte8> SByte8::operator=(const Reference<SByte8> &rhs)
2730         {
2731                 Value *value = rhs.loadValue();
2732                 storeValue(value);
2733
2734                 return RValue<SByte8>(value);
2735         }
2736
2737         RValue<SByte8> operator+(RValue<SByte8> lhs, RValue<SByte8> rhs)
2738         {
2739                 return RValue<SByte8>(Nucleus::createAdd(lhs.value, rhs.value));
2740         }
2741
2742         RValue<SByte8> operator-(RValue<SByte8> lhs, RValue<SByte8> rhs)
2743         {
2744                 return RValue<SByte8>(Nucleus::createSub(lhs.value, rhs.value));
2745         }
2746
2747 //      RValue<SByte8> operator*(RValue<SByte8> lhs, RValue<SByte8> rhs)
2748 //      {
2749 //              return RValue<SByte8>(Nucleus::createMul(lhs.value, rhs.value));
2750 //      }
2751
2752 //      RValue<SByte8> operator/(RValue<SByte8> lhs, RValue<SByte8> rhs)
2753 //      {
2754 //              return RValue<SByte8>(Nucleus::createSDiv(lhs.value, rhs.value));
2755 //      }
2756
2757 //      RValue<SByte8> operator%(RValue<SByte8> lhs, RValue<SByte8> rhs)
2758 //      {
2759 //              return RValue<SByte8>(Nucleus::createSRem(lhs.value, rhs.value));
2760 //      }
2761
2762         RValue<SByte8> operator&(RValue<SByte8> lhs, RValue<SByte8> rhs)
2763         {
2764                 return RValue<SByte8>(Nucleus::createAnd(lhs.value, rhs.value));
2765         }
2766
2767         RValue<SByte8> operator|(RValue<SByte8> lhs, RValue<SByte8> rhs)
2768         {
2769                 return RValue<SByte8>(Nucleus::createOr(lhs.value, rhs.value));
2770         }
2771
2772         RValue<SByte8> operator^(RValue<SByte8> lhs, RValue<SByte8> rhs)
2773         {
2774                 return RValue<SByte8>(Nucleus::createXor(lhs.value, rhs.value));
2775         }
2776
2777 //      RValue<SByte8> operator<<(RValue<SByte8> lhs, unsigned char rhs)
2778 //      {
2779 //              return RValue<SByte8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2780 //      }
2781
2782 //      RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2783 //      {
2784 //              return RValue<SByte8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
2785 //      }
2786
2787         RValue<SByte8> operator+=(SByte8 &lhs, RValue<SByte8> rhs)
2788         {
2789                 return lhs = lhs + rhs;
2790         }
2791
2792         RValue<SByte8> operator-=(SByte8 &lhs, RValue<SByte8> rhs)
2793         {
2794                 return lhs = lhs - rhs;
2795         }
2796
2797 //      RValue<SByte8> operator*=(SByte8 &lhs, RValue<SByte8> rhs)
2798 //      {
2799 //              return lhs = lhs * rhs;
2800 //      }
2801
2802 //      RValue<SByte8> operator/=(SByte8 &lhs, RValue<SByte8> rhs)
2803 //      {
2804 //              return lhs = lhs / rhs;
2805 //      }
2806
2807 //      RValue<SByte8> operator%=(SByte8 &lhs, RValue<SByte8> rhs)
2808 //      {
2809 //              return lhs = lhs % rhs;
2810 //      }
2811
2812         RValue<SByte8> operator&=(SByte8 &lhs, RValue<SByte8> rhs)
2813         {
2814                 return lhs = lhs & rhs;
2815         }
2816
2817         RValue<SByte8> operator|=(SByte8 &lhs, RValue<SByte8> rhs)
2818         {
2819                 return lhs = lhs | rhs;
2820         }
2821
2822         RValue<SByte8> operator^=(SByte8 &lhs, RValue<SByte8> rhs)
2823         {
2824                 return lhs = lhs ^ rhs;
2825         }
2826
2827 //      RValue<SByte8> operator<<=(SByte8 &lhs, RValue<SByte8> rhs)
2828 //      {
2829 //              return lhs = lhs << rhs;
2830 //      }
2831
2832 //      RValue<SByte8> operator>>=(SByte8 &lhs, RValue<SByte8> rhs)
2833 //      {
2834 //              return lhs = lhs >> rhs;
2835 //      }
2836
2837 //      RValue<SByte8> operator+(RValue<SByte8> val)
2838 //      {
2839 //              return val;
2840 //      }
2841
2842 //      RValue<SByte8> operator-(RValue<SByte8> val)
2843 //      {
2844 //              return RValue<SByte8>(Nucleus::createNeg(val.value));
2845 //      }
2846
2847         RValue<SByte8> operator~(RValue<SByte8> val)
2848         {
2849                 return RValue<SByte8>(Nucleus::createNot(val.value));
2850         }
2851
2852         RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
2853         {
2854                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
2855                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2856                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2857                 auto paddsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2858                 paddsb->addArg(x.value);
2859                 paddsb->addArg(y.value);
2860                 ::basicBlock->appendInst(paddsb);
2861
2862                 return RValue<SByte8>(V(result));
2863         }
2864
2865         RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
2866         {
2867                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
2868                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2869                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2870                 auto psubsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2871                 psubsb->addArg(x.value);
2872                 psubsb->addArg(y.value);
2873                 ::basicBlock->appendInst(psubsb);
2874
2875                 return RValue<SByte8>(V(result));
2876         }
2877
2878         RValue<Short4> UnpackLow(RValue<SByte8> x, RValue<SByte8> y)
2879         {
2880                 int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};   // Real type is v16i8
2881                 return RValue<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
2882         }
2883
2884         RValue<Short4> UnpackHigh(RValue<SByte8> x, RValue<SByte8> y)
2885         {
2886                 int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};   // Real type is v16i8
2887                 auto lowHigh = RValue<Byte16>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
2888                 return As<Short4>(Swizzle(As<Int4>(lowHigh), 0xEE));
2889         }
2890
2891         RValue<Int> SignMask(RValue<SByte8> x)
2892         {
2893                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
2894                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2895                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2896                 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
2897                 movmsk->addArg(x.value);
2898                 ::basicBlock->appendInst(movmsk);
2899
2900                 return RValue<Int>(V(result));
2901         }
2902
2903         RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
2904         {
2905                 return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Sgt, x.value, y.value));
2906         }
2907
2908         RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
2909         {
2910                 return RValue<Byte8>(Nucleus::createICmpEQ(x.value, y.value));
2911         }
2912
2913         Type *SByte8::getType()
2914         {
2915                 return T(Type_v8i8);
2916         }
2917
2918         Byte16::Byte16(RValue<Byte16> rhs)
2919         {
2920                 storeValue(rhs.value);
2921         }
2922
2923         Byte16::Byte16(const Byte16 &rhs)
2924         {
2925                 Value *value = rhs.loadValue();
2926                 storeValue(value);
2927         }
2928
2929         Byte16::Byte16(const Reference<Byte16> &rhs)
2930         {
2931                 Value *value = rhs.loadValue();
2932                 storeValue(value);
2933         }
2934
2935         RValue<Byte16> Byte16::operator=(RValue<Byte16> rhs)
2936         {
2937                 storeValue(rhs.value);
2938
2939                 return rhs;
2940         }
2941
2942         RValue<Byte16> Byte16::operator=(const Byte16 &rhs)
2943         {
2944                 Value *value = rhs.loadValue();
2945                 storeValue(value);
2946
2947                 return RValue<Byte16>(value);
2948         }
2949
2950         RValue<Byte16> Byte16::operator=(const Reference<Byte16> &rhs)
2951         {
2952                 Value *value = rhs.loadValue();
2953                 storeValue(value);
2954
2955                 return RValue<Byte16>(value);
2956         }
2957
2958         Type *Byte16::getType()
2959         {
2960                 return T(Ice::IceType_v16i8);
2961         }
2962
2963         Type *SByte16::getType()
2964         {
2965                 return T(Ice::IceType_v16i8);
2966         }
2967
2968         Short2::Short2(RValue<Short4> cast)
2969         {
2970                 storeValue(Nucleus::createBitCast(cast.value, getType()));
2971         }
2972
2973         Type *Short2::getType()
2974         {
2975                 return T(Type_v2i16);
2976         }
2977
2978         UShort2::UShort2(RValue<UShort4> cast)
2979         {
2980                 storeValue(Nucleus::createBitCast(cast.value, getType()));
2981         }
2982
2983         Type *UShort2::getType()
2984         {
2985                 return T(Type_v2i16);
2986         }
2987
2988         Short4::Short4(RValue<Int> cast)
2989         {
2990                 Value *vector = loadValue();
2991                 Value *element = Nucleus::createTrunc(cast.value, Short::getType());
2992                 Value *insert = Nucleus::createInsertElement(vector, element, 0);
2993                 Value *swizzle = Swizzle(RValue<Short4>(insert), 0x00).value;
2994
2995                 storeValue(swizzle);
2996         }
2997
2998         Short4::Short4(RValue<Int4> cast)
2999         {
3000                 int select[8] = {0, 2, 4, 6, 0, 2, 4, 6};
3001                 Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType());
3002                 Value *packed = Nucleus::createShuffleVector(short8, short8, select);
3003
3004                 Value *int2 = RValue<Int2>(Int2(RValue<Int4>(packed))).value;
3005                 Value *short4 = Nucleus::createBitCast(int2, Short4::getType());
3006
3007                 storeValue(short4);
3008         }
3009
3010 //      Short4::Short4(RValue<Float> cast)
3011 //      {
3012 //      }
3013
3014         Short4::Short4(RValue<Float4> cast)
3015         {
3016                 assert(false && "UNIMPLEMENTED");
3017         }
3018
3019         Short4::Short4(short xyzw)
3020         {
3021                 int64_t constantVector[4] = {xyzw, xyzw, xyzw, xyzw};
3022                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3023         }
3024
3025         Short4::Short4(short x, short y, short z, short w)
3026         {
3027                 int64_t constantVector[4] = {x, y, z, w};
3028                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3029         }
3030
3031         Short4::Short4(RValue<Short4> rhs)
3032         {
3033                 storeValue(rhs.value);
3034         }
3035
3036         Short4::Short4(const Short4 &rhs)
3037         {
3038                 Value *value = rhs.loadValue();
3039                 storeValue(value);
3040         }
3041
3042         Short4::Short4(const Reference<Short4> &rhs)
3043         {
3044                 Value *value = rhs.loadValue();
3045                 storeValue(value);
3046         }
3047
3048         Short4::Short4(RValue<UShort4> rhs)
3049         {
3050                 storeValue(rhs.value);
3051         }
3052
3053         Short4::Short4(const UShort4 &rhs)
3054         {
3055                 storeValue(rhs.loadValue());
3056         }
3057
3058         Short4::Short4(const Reference<UShort4> &rhs)
3059         {
3060                 storeValue(rhs.loadValue());
3061         }
3062
3063         RValue<Short4> Short4::operator=(RValue<Short4> rhs)
3064         {
3065                 storeValue(rhs.value);
3066
3067                 return rhs;
3068         }
3069
3070         RValue<Short4> Short4::operator=(const Short4 &rhs)
3071         {
3072                 Value *value = rhs.loadValue();
3073                 storeValue(value);
3074
3075                 return RValue<Short4>(value);
3076         }
3077
3078         RValue<Short4> Short4::operator=(const Reference<Short4> &rhs)
3079         {
3080                 Value *value = rhs.loadValue();
3081                 storeValue(value);
3082
3083                 return RValue<Short4>(value);
3084         }
3085
3086         RValue<Short4> Short4::operator=(RValue<UShort4> rhs)
3087         {
3088                 storeValue(rhs.value);
3089
3090                 return RValue<Short4>(rhs);
3091         }
3092
3093         RValue<Short4> Short4::operator=(const UShort4 &rhs)
3094         {
3095                 Value *value = rhs.loadValue();
3096                 storeValue(value);
3097
3098                 return RValue<Short4>(value);
3099         }
3100
3101         RValue<Short4> Short4::operator=(const Reference<UShort4> &rhs)
3102         {
3103                 Value *value = rhs.loadValue();
3104                 storeValue(value);
3105
3106                 return RValue<Short4>(value);
3107         }
3108
3109         RValue<Short4> operator+(RValue<Short4> lhs, RValue<Short4> rhs)
3110         {
3111                 return RValue<Short4>(Nucleus::createAdd(lhs.value, rhs.value));
3112         }
3113
3114         RValue<Short4> operator-(RValue<Short4> lhs, RValue<Short4> rhs)
3115         {
3116                 return RValue<Short4>(Nucleus::createSub(lhs.value, rhs.value));
3117         }
3118
3119         RValue<Short4> operator*(RValue<Short4> lhs, RValue<Short4> rhs)
3120         {
3121                 return RValue<Short4>(Nucleus::createMul(lhs.value, rhs.value));
3122         }
3123
3124 //      RValue<Short4> operator/(RValue<Short4> lhs, RValue<Short4> rhs)
3125 //      {
3126 //              return RValue<Short4>(Nucleus::createSDiv(lhs.value, rhs.value));
3127 //      }
3128
3129 //      RValue<Short4> operator%(RValue<Short4> lhs, RValue<Short4> rhs)
3130 //      {
3131 //              return RValue<Short4>(Nucleus::createSRem(lhs.value, rhs.value));
3132 //      }
3133
3134         RValue<Short4> operator&(RValue<Short4> lhs, RValue<Short4> rhs)
3135         {
3136                 return RValue<Short4>(Nucleus::createAnd(lhs.value, rhs.value));
3137         }
3138
3139         RValue<Short4> operator|(RValue<Short4> lhs, RValue<Short4> rhs)
3140         {
3141                 return RValue<Short4>(Nucleus::createOr(lhs.value, rhs.value));
3142         }
3143
3144         RValue<Short4> operator^(RValue<Short4> lhs, RValue<Short4> rhs)
3145         {
3146                 return RValue<Short4>(Nucleus::createXor(lhs.value, rhs.value));
3147         }
3148
3149         RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
3150         {
3151                 return RValue<Short4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
3152         }
3153
3154         RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
3155         {
3156                 return RValue<Short4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
3157         }
3158
3159         RValue<Short4> operator+=(Short4 &lhs, RValue<Short4> rhs)
3160         {
3161                 return lhs = lhs + rhs;
3162         }
3163
3164         RValue<Short4> operator-=(Short4 &lhs, RValue<Short4> rhs)
3165         {
3166                 return lhs = lhs - rhs;
3167         }
3168
3169         RValue<Short4> operator*=(Short4 &lhs, RValue<Short4> rhs)
3170         {
3171                 return lhs = lhs * rhs;
3172         }
3173
3174 //      RValue<Short4> operator/=(Short4 &lhs, RValue<Short4> rhs)
3175 //      {
3176 //              return lhs = lhs / rhs;
3177 //      }
3178
3179 //      RValue<Short4> operator%=(Short4 &lhs, RValue<Short4> rhs)
3180 //      {
3181 //              return lhs = lhs % rhs;
3182 //      }
3183
3184         RValue<Short4> operator&=(Short4 &lhs, RValue<Short4> rhs)
3185         {
3186                 return lhs = lhs & rhs;
3187         }
3188
3189         RValue<Short4> operator|=(Short4 &lhs, RValue<Short4> rhs)
3190         {
3191                 return lhs = lhs | rhs;
3192         }
3193
3194         RValue<Short4> operator^=(Short4 &lhs, RValue<Short4> rhs)
3195         {
3196                 return lhs = lhs ^ rhs;
3197         }
3198
3199         RValue<Short4> operator<<=(Short4 &lhs, unsigned char rhs)
3200         {
3201                 return lhs = lhs << rhs;
3202         }
3203
3204         RValue<Short4> operator>>=(Short4 &lhs, unsigned char rhs)
3205         {
3206                 return lhs = lhs >> rhs;
3207         }
3208
3209 //      RValue<Short4> operator+(RValue<Short4> val)
3210 //      {
3211 //              return val;
3212 //      }
3213
3214         RValue<Short4> operator-(RValue<Short4> val)
3215         {
3216                 return RValue<Short4>(Nucleus::createNeg(val.value));
3217         }
3218
3219         RValue<Short4> operator~(RValue<Short4> val)
3220         {
3221                 return RValue<Short4>(Nucleus::createNot(val.value));
3222         }
3223
3224         RValue<Short4> RoundShort4(RValue<Float4> cast)
3225         {
3226                 RValue<Int4> int4 = RoundInt(cast);
3227                 return As<Short4>(Pack(int4, int4));
3228         }
3229
3230         RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
3231         {
3232                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
3233                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value, y.value);
3234                 ::basicBlock->appendInst(cmp);
3235
3236                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3237                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3238                 ::basicBlock->appendInst(select);
3239
3240                 return RValue<Short4>(V(result));
3241         }
3242
3243         RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
3244         {
3245                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
3246                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value, y.value);
3247                 ::basicBlock->appendInst(cmp);
3248
3249                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3250                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3251                 ::basicBlock->appendInst(select);
3252
3253                 return RValue<Short4>(V(result));
3254         }
3255
3256         RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
3257         {
3258                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3259                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3260                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3261                 auto paddsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3262                 paddsw->addArg(x.value);
3263                 paddsw->addArg(y.value);
3264                 ::basicBlock->appendInst(paddsw);
3265
3266                 return RValue<Short4>(V(result));
3267         }
3268
3269         RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
3270         {
3271                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3272                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3273                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3274                 auto psubsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3275                 psubsw->addArg(x.value);
3276                 psubsw->addArg(y.value);
3277                 ::basicBlock->appendInst(psubsw);
3278
3279                 return RValue<Short4>(V(result));
3280         }
3281
3282         RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
3283         {
3284                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3285                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::MultiplyHighSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3286                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3287                 auto pmulhw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3288                 pmulhw->addArg(x.value);
3289                 pmulhw->addArg(y.value);
3290                 ::basicBlock->appendInst(pmulhw);
3291
3292                 return RValue<Short4>(V(result));
3293         }
3294
3295         RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
3296         {
3297                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3298                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::MultiplyAddPairs, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3299                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3300                 auto pmaddwd = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3301                 pmaddwd->addArg(x.value);
3302                 pmaddwd->addArg(y.value);
3303                 ::basicBlock->appendInst(pmaddwd);
3304
3305                 return RValue<Int2>(V(result));
3306         }
3307
3308         RValue<SByte8> Pack(RValue<Short4> x, RValue<Short4> y)
3309         {
3310                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
3311                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3312                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3313                 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3314                 pack->addArg(x.value);
3315                 pack->addArg(y.value);
3316                 ::basicBlock->appendInst(pack);
3317
3318                 return As<SByte8>(Swizzle(As<Int4>(V(result)), 0x88));
3319         }
3320
3321         RValue<Int2> UnpackLow(RValue<Short4> x, RValue<Short4> y)
3322         {
3323                 int shuffle[8] = {0, 8, 1, 9, 2, 10, 3, 11};   // Real type is v8i16
3324                 return RValue<Int2>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
3325         }
3326
3327         RValue<Int2> UnpackHigh(RValue<Short4> x, RValue<Short4> y)
3328         {
3329                 int shuffle[8] = {0, 8, 1, 9, 2, 10, 3, 11};   // Real type is v8i16
3330                 auto lowHigh = RValue<Short8>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
3331                 return As<Int2>(Swizzle(As<Int4>(lowHigh), 0xEE));
3332         }
3333
3334         RValue<Short4> Swizzle(RValue<Short4> x, unsigned char select)
3335         {
3336                 // Real type is v8i16
3337                 int shuffle[8] =
3338                 {
3339                         (select >> 0) & 0x03,
3340                         (select >> 2) & 0x03,
3341                         (select >> 4) & 0x03,
3342                         (select >> 6) & 0x03,
3343                         (select >> 0) & 0x03,
3344                         (select >> 2) & 0x03,
3345                         (select >> 4) & 0x03,
3346                         (select >> 6) & 0x03,
3347                 };
3348
3349                 return RValue<Short4>(Nucleus::createShuffleVector(x.value, x.value, shuffle));
3350         }
3351
3352         RValue<Short4> Insert(RValue<Short4> val, RValue<Short> element, int i)
3353         {
3354                 return RValue<Short4>(Nucleus::createInsertElement(val.value, element.value, i));
3355         }
3356
3357         RValue<Short> Extract(RValue<Short4> val, int i)
3358         {
3359                 return RValue<Short>(Nucleus::createExtractElement(val.value, Short::getType(), i));
3360         }
3361
3362         RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
3363         {
3364                 return RValue<Short4>(createIntCompare(Ice::InstIcmp::Sgt, x.value, y.value));
3365         }
3366
3367         RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
3368         {
3369                 return RValue<Short4>(Nucleus::createICmpEQ(x.value, y.value));
3370         }
3371
3372         Type *Short4::getType()
3373         {
3374                 return T(Type_v4i16);
3375         }
3376
3377         UShort4::UShort4(RValue<Int4> cast)
3378         {
3379                 *this = Short4(cast);
3380         }
3381
3382         UShort4::UShort4(RValue<Float4> cast, bool saturate)
3383         {
3384                 if(saturate)
3385                 {
3386                         if(CPUID::SSE4_1)
3387                         {
3388                                 Int4 int4(Min(cast, Float4(0xFFFF)));   // packusdw takes care of 0x0000 saturation
3389                                 *this = As<Short4>(Pack(As<UInt4>(int4), As<UInt4>(int4)));
3390                         }
3391                         else
3392                         {
3393                                 *this = Short4(Int4(Max(Min(cast, Float4(0xFFFF)), Float4(0x0000))));
3394                         }
3395                 }
3396                 else
3397                 {
3398                         *this = Short4(Int4(cast));
3399                 }
3400         }
3401
3402         UShort4::UShort4(unsigned short xyzw)
3403         {
3404                 int64_t constantVector[4] = {xyzw, xyzw, xyzw, xyzw};
3405                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3406         }
3407
3408         UShort4::UShort4(unsigned short x, unsigned short y, unsigned short z, unsigned short w)
3409         {
3410                 int64_t constantVector[4] = {x, y, z, w};
3411                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3412         }
3413
3414         UShort4::UShort4(RValue<UShort4> rhs)
3415         {
3416                 storeValue(rhs.value);
3417         }
3418
3419         UShort4::UShort4(const UShort4 &rhs)
3420         {
3421                 Value *value = rhs.loadValue();
3422                 storeValue(value);
3423         }
3424
3425         UShort4::UShort4(const Reference<UShort4> &rhs)
3426         {
3427                 Value *value = rhs.loadValue();
3428                 storeValue(value);
3429         }
3430
3431         UShort4::UShort4(RValue<Short4> rhs)
3432         {
3433                 storeValue(rhs.value);
3434         }
3435
3436         UShort4::UShort4(const Short4 &rhs)
3437         {
3438                 Value *value = rhs.loadValue();
3439                 storeValue(value);
3440         }
3441
3442         UShort4::UShort4(const Reference<Short4> &rhs)
3443         {
3444                 Value *value = rhs.loadValue();
3445                 storeValue(value);
3446         }
3447
3448         RValue<UShort4> UShort4::operator=(RValue<UShort4> rhs)
3449         {
3450                 storeValue(rhs.value);
3451
3452                 return rhs;
3453         }
3454
3455         RValue<UShort4> UShort4::operator=(const UShort4 &rhs)
3456         {
3457                 Value *value = rhs.loadValue();
3458                 storeValue(value);
3459
3460                 return RValue<UShort4>(value);
3461         }
3462
3463         RValue<UShort4> UShort4::operator=(const Reference<UShort4> &rhs)
3464         {
3465                 Value *value = rhs.loadValue();
3466                 storeValue(value);
3467
3468                 return RValue<UShort4>(value);
3469         }
3470
3471         RValue<UShort4> UShort4::operator=(RValue<Short4> rhs)
3472         {
3473                 storeValue(rhs.value);
3474
3475                 return RValue<UShort4>(rhs);
3476         }
3477
3478         RValue<UShort4> UShort4::operator=(const Short4 &rhs)
3479         {
3480                 Value *value = rhs.loadValue();
3481                 storeValue(value);
3482
3483                 return RValue<UShort4>(value);
3484         }
3485
3486         RValue<UShort4> UShort4::operator=(const Reference<Short4> &rhs)
3487         {
3488                 Value *value = rhs.loadValue();
3489                 storeValue(value);
3490
3491                 return RValue<UShort4>(value);
3492         }
3493
3494         RValue<UShort4> operator+(RValue<UShort4> lhs, RValue<UShort4> rhs)
3495         {
3496                 return RValue<UShort4>(Nucleus::createAdd(lhs.value, rhs.value));
3497         }
3498
3499         RValue<UShort4> operator-(RValue<UShort4> lhs, RValue<UShort4> rhs)
3500         {
3501                 return RValue<UShort4>(Nucleus::createSub(lhs.value, rhs.value));
3502         }
3503
3504         RValue<UShort4> operator*(RValue<UShort4> lhs, RValue<UShort4> rhs)
3505         {
3506                 return RValue<UShort4>(Nucleus::createMul(lhs.value, rhs.value));
3507         }
3508
3509         RValue<UShort4> operator&(RValue<UShort4> lhs, RValue<UShort4> rhs)
3510         {
3511                 return RValue<UShort4>(Nucleus::createAnd(lhs.value, rhs.value));
3512         }
3513
3514         RValue<UShort4> operator|(RValue<UShort4> lhs, RValue<UShort4> rhs)
3515         {
3516                 return RValue<UShort4>(Nucleus::createOr(lhs.value, rhs.value));
3517         }
3518
3519         RValue<UShort4> operator^(RValue<UShort4> lhs, RValue<UShort4> rhs)
3520         {
3521                 return RValue<UShort4>(Nucleus::createXor(lhs.value, rhs.value));
3522         }
3523
3524         RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
3525         {
3526                 return RValue<UShort4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
3527         }
3528
3529         RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
3530         {
3531                 return RValue<UShort4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
3532         }
3533
3534         RValue<UShort4> operator<<=(UShort4 &lhs, unsigned char rhs)
3535         {
3536                 return lhs = lhs << rhs;
3537         }
3538
3539         RValue<UShort4> operator>>=(UShort4 &lhs, unsigned char rhs)
3540         {
3541                 return lhs = lhs >> rhs;
3542         }
3543
3544         RValue<UShort4> operator~(RValue<UShort4> val)
3545         {
3546                 return RValue<UShort4>(Nucleus::createNot(val.value));
3547         }
3548
3549         RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
3550         {
3551                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
3552                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value, y.value);
3553                 ::basicBlock->appendInst(cmp);
3554
3555                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3556                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3557                 ::basicBlock->appendInst(select);
3558
3559                 return RValue<UShort4>(V(result));
3560         }
3561
3562         RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
3563         {
3564                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
3565                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value, y.value);
3566                 ::basicBlock->appendInst(cmp);
3567
3568                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3569                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3570                 ::basicBlock->appendInst(select);
3571
3572                 return RValue<UShort4>(V(result));
3573         }
3574
3575         RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
3576         {
3577                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3578                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3579                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3580                 auto paddusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3581                 paddusw->addArg(x.value);
3582                 paddusw->addArg(y.value);
3583                 ::basicBlock->appendInst(paddusw);
3584
3585                 return RValue<UShort4>(V(result));
3586         }
3587
3588         RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
3589         {
3590                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3591                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3592                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3593                 auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3594                 psubusw->addArg(x.value);
3595                 psubusw->addArg(y.value);
3596                 ::basicBlock->appendInst(psubusw);
3597
3598                 return RValue<UShort4>(V(result));
3599         }
3600
3601         RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
3602         {
3603                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3604                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::MultiplyHighUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3605                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3606                 auto pmulhuw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3607                 pmulhuw->addArg(x.value);
3608                 pmulhuw->addArg(y.value);
3609                 ::basicBlock->appendInst(pmulhuw);
3610
3611                 return RValue<UShort4>(V(result));
3612         }
3613
3614         RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
3615         {
3616                 assert(false && "UNIMPLEMENTED"); return RValue<UShort4>(V(nullptr));
3617         }
3618
3619         RValue<Byte8> Pack(RValue<UShort4> x, RValue<UShort4> y)
3620         {
3621                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
3622                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3623                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3624                 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3625                 pack->addArg(x.value);
3626                 pack->addArg(y.value);
3627                 ::basicBlock->appendInst(pack);
3628
3629                 return As<Byte8>(Swizzle(As<Int4>(V(result)), 0x88));
3630         }
3631
3632         Type *UShort4::getType()
3633         {
3634                 return T(Type_v4i16);
3635         }
3636
3637         Short8::Short8(short c)
3638         {
3639                 int64_t constantVector[8] = {c, c, c, c, c, c, c, c};
3640                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3641         }
3642
3643         Short8::Short8(short c0, short c1, short c2, short c3, short c4, short c5, short c6, short c7)
3644         {
3645                 int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7};
3646                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3647         }
3648
3649         Short8::Short8(RValue<Short8> rhs)
3650         {
3651                 storeValue(rhs.value);
3652         }
3653
3654         Short8::Short8(const Reference<Short8> &rhs)
3655         {
3656                 Value *value = rhs.loadValue();
3657                 storeValue(value);
3658         }
3659
3660         Short8::Short8(RValue<Short4> lo, RValue<Short4> hi)
3661         {
3662                 int shuffle[8] = {0, 1, 2, 3, 8, 9, 10, 11};   // Real type is v8i16
3663                 Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
3664
3665                 storeValue(packed);
3666         }
3667
3668         RValue<Short8> operator+(RValue<Short8> lhs, RValue<Short8> rhs)
3669         {
3670                 return RValue<Short8>(Nucleus::createAdd(lhs.value, rhs.value));
3671         }
3672
3673         RValue<Short8> operator&(RValue<Short8> lhs, RValue<Short8> rhs)
3674         {
3675                 return RValue<Short8>(Nucleus::createAnd(lhs.value, rhs.value));
3676         }
3677
3678         RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
3679         {
3680                 return RValue<Short8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
3681         }
3682
3683         RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
3684         {
3685                 return RValue<Short8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
3686         }
3687
3688         RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
3689         {
3690                 assert(false && "UNIMPLEMENTED"); return RValue<Int4>(V(nullptr));
3691         }
3692
3693         RValue<Int4> Abs(RValue<Int4> x)
3694         {
3695                 auto negative = x >> 31;
3696                 return (x ^ negative) - negative;
3697         }
3698
3699         RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
3700         {
3701                 assert(false && "UNIMPLEMENTED"); return RValue<Short8>(V(nullptr));
3702         }
3703
3704         Type *Short8::getType()
3705         {
3706                 return T(Ice::IceType_v8i16);
3707         }
3708
3709         UShort8::UShort8(unsigned short c)
3710         {
3711                 int64_t constantVector[8] = {c, c, c, c, c, c, c, c};
3712                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3713         }
3714
3715         UShort8::UShort8(unsigned short c0, unsigned short c1, unsigned short c2, unsigned short c3, unsigned short c4, unsigned short c5, unsigned short c6, unsigned short c7)
3716         {
3717                 int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7};
3718                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3719         }
3720
3721         UShort8::UShort8(RValue<UShort8> rhs)
3722         {
3723                 storeValue(rhs.value);
3724         }
3725
3726         UShort8::UShort8(const Reference<UShort8> &rhs)
3727         {
3728                 Value *value = rhs.loadValue();
3729                 storeValue(value);
3730         }
3731
3732         UShort8::UShort8(RValue<UShort4> lo, RValue<UShort4> hi)
3733         {
3734                 int shuffle[8] = {0, 1, 2, 3, 8, 9, 10, 11};   // Real type is v8i16
3735                 Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
3736
3737                 storeValue(packed);
3738         }
3739
3740         RValue<UShort8> UShort8::operator=(RValue<UShort8> rhs)
3741         {
3742                 storeValue(rhs.value);
3743
3744                 return rhs;
3745         }
3746
3747         RValue<UShort8> UShort8::operator=(const UShort8 &rhs)
3748         {
3749                 Value *value = rhs.loadValue();
3750                 storeValue(value);
3751
3752                 return RValue<UShort8>(value);
3753         }
3754
3755         RValue<UShort8> UShort8::operator=(const Reference<UShort8> &rhs)
3756         {
3757                 Value *value = rhs.loadValue();
3758                 storeValue(value);
3759
3760                 return RValue<UShort8>(value);
3761         }
3762
3763         RValue<UShort8> operator&(RValue<UShort8> lhs, RValue<UShort8> rhs)
3764         {
3765                 return RValue<UShort8>(Nucleus::createAnd(lhs.value, rhs.value));
3766         }
3767
3768         RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
3769         {
3770                 return RValue<UShort8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
3771         }
3772
3773         RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
3774         {
3775                 return RValue<UShort8>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
3776         }
3777
3778         RValue<UShort8> operator+(RValue<UShort8> lhs, RValue<UShort8> rhs)
3779         {
3780                 return RValue<UShort8>(Nucleus::createAdd(lhs.value, rhs.value));
3781         }
3782
3783         RValue<UShort8> operator*(RValue<UShort8> lhs, RValue<UShort8> rhs)
3784         {
3785                 return RValue<UShort8>(Nucleus::createMul(lhs.value, rhs.value));
3786         }
3787
3788         RValue<UShort8> operator+=(UShort8 &lhs, RValue<UShort8> rhs)
3789         {
3790                 return lhs = lhs + rhs;
3791         }
3792
3793         RValue<UShort8> operator~(RValue<UShort8> val)
3794         {
3795                 return RValue<UShort8>(Nucleus::createNot(val.value));
3796         }
3797
3798         RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7)
3799         {
3800                 assert(false && "UNIMPLEMENTED"); return RValue<UShort8>(V(nullptr));
3801         }
3802
3803         RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
3804         {
3805                 assert(false && "UNIMPLEMENTED"); return RValue<UShort8>(V(nullptr));
3806         }
3807
3808         // FIXME: Implement as Shuffle(x, y, Select(i0, ..., i16)) and Shuffle(x, y, SELECT_PACK_REPEAT(element))
3809 //      RValue<UShort8> PackRepeat(RValue<Byte16> x, RValue<Byte16> y, int element)
3810 //      {
3811 //              assert(false && "UNIMPLEMENTED"); return RValue<UShort8>(V(nullptr));
3812 //      }
3813
3814         Type *UShort8::getType()
3815         {
3816                 return T(Ice::IceType_v8i16);
3817         }
3818
3819         Int::Int(Argument<Int> argument)
3820         {
3821                 storeValue(argument.value);
3822         }
3823
3824         Int::Int(RValue<Byte> cast)
3825         {
3826                 Value *integer = Nucleus::createZExt(cast.value, Int::getType());
3827
3828                 storeValue(integer);
3829         }
3830
3831         Int::Int(RValue<SByte> cast)
3832         {
3833                 Value *integer = Nucleus::createSExt(cast.value, Int::getType());
3834
3835                 storeValue(integer);
3836         }
3837
3838         Int::Int(RValue<Short> cast)
3839         {
3840                 Value *integer = Nucleus::createSExt(cast.value, Int::getType());
3841
3842                 storeValue(integer);
3843         }
3844
3845         Int::Int(RValue<UShort> cast)
3846         {
3847                 Value *integer = Nucleus::createZExt(cast.value, Int::getType());
3848
3849                 storeValue(integer);
3850         }
3851
3852         Int::Int(RValue<Int2> cast)
3853         {
3854                 *this = Extract(cast, 0);
3855         }
3856
3857         Int::Int(RValue<Long> cast)
3858         {
3859                 Value *integer = Nucleus::createTrunc(cast.value, Int::getType());
3860
3861                 storeValue(integer);
3862         }
3863
3864         Int::Int(RValue<Float> cast)
3865         {
3866                 Value *integer = Nucleus::createFPToSI(cast.value, Int::getType());
3867
3868                 storeValue(integer);
3869         }
3870
3871         Int::Int(int x)
3872         {
3873                 storeValue(Nucleus::createConstantInt(x));
3874         }
3875
3876         Int::Int(RValue<Int> rhs)
3877         {
3878                 storeValue(rhs.value);
3879         }
3880
3881         Int::Int(RValue<UInt> rhs)
3882         {
3883                 storeValue(rhs.value);
3884         }
3885
3886         Int::Int(const Int &rhs)
3887         {
3888                 Value *value = rhs.loadValue();
3889                 storeValue(value);
3890         }
3891
3892         Int::Int(const Reference<Int> &rhs)
3893         {
3894                 Value *value = rhs.loadValue();
3895                 storeValue(value);
3896         }
3897
3898         Int::Int(const UInt &rhs)
3899         {
3900                 Value *value = rhs.loadValue();
3901                 storeValue(value);
3902         }
3903
3904         Int::Int(const Reference<UInt> &rhs)
3905         {
3906                 Value *value = rhs.loadValue();
3907                 storeValue(value);
3908         }
3909
3910         RValue<Int> Int::operator=(int rhs)
3911         {
3912                 return RValue<Int>(storeValue(Nucleus::createConstantInt(rhs)));
3913         }
3914
3915         RValue<Int> Int::operator=(RValue<Int> rhs)
3916         {
3917                 storeValue(rhs.value);
3918
3919                 return rhs;
3920         }
3921
3922         RValue<Int> Int::operator=(RValue<UInt> rhs)
3923         {
3924                 storeValue(rhs.value);
3925
3926                 return RValue<Int>(rhs);
3927         }
3928
3929         RValue<Int> Int::operator=(const Int &rhs)
3930         {
3931                 Value *value = rhs.loadValue();
3932                 storeValue(value);
3933
3934                 return RValue<Int>(value);
3935         }
3936
3937         RValue<Int> Int::operator=(const Reference<Int> &rhs)
3938         {
3939                 Value *value = rhs.loadValue();
3940                 storeValue(value);
3941
3942                 return RValue<Int>(value);
3943         }
3944
3945         RValue<Int> Int::operator=(const UInt &rhs)
3946         {
3947                 Value *value = rhs.loadValue();
3948                 storeValue(value);
3949
3950                 return RValue<Int>(value);
3951         }
3952
3953         RValue<Int> Int::operator=(const Reference<UInt> &rhs)
3954         {
3955                 Value *value = rhs.loadValue();
3956                 storeValue(value);
3957
3958                 return RValue<Int>(value);
3959         }
3960
3961         RValue<Int> operator+(RValue<Int> lhs, RValue<Int> rhs)
3962         {
3963                 return RValue<Int>(Nucleus::createAdd(lhs.value, rhs.value));
3964         }
3965
3966         RValue<Int> operator-(RValue<Int> lhs, RValue<Int> rhs)
3967         {
3968                 return RValue<Int>(Nucleus::createSub(lhs.value, rhs.value));
3969         }
3970
3971         RValue<Int> operator*(RValue<Int> lhs, RValue<Int> rhs)
3972         {
3973                 return RValue<Int>(Nucleus::createMul(lhs.value, rhs.value));
3974         }
3975
3976         RValue<Int> operator/(RValue<Int> lhs, RValue<Int> rhs)
3977         {
3978                 return RValue<Int>(Nucleus::createSDiv(lhs.value, rhs.value));
3979         }
3980
3981         RValue<Int> operator%(RValue<Int> lhs, RValue<Int> rhs)
3982         {
3983                 return RValue<Int>(Nucleus::createSRem(lhs.value, rhs.value));
3984         }
3985
3986         RValue<Int> operator&(RValue<Int> lhs, RValue<Int> rhs)
3987         {
3988                 return RValue<Int>(Nucleus::createAnd(lhs.value, rhs.value));
3989         }
3990
3991         RValue<Int> operator|(RValue<Int> lhs, RValue<Int> rhs)
3992         {
3993                 return RValue<Int>(Nucleus::createOr(lhs.value, rhs.value));
3994         }
3995
3996         RValue<Int> operator^(RValue<Int> lhs, RValue<Int> rhs)
3997         {
3998                 return RValue<Int>(Nucleus::createXor(lhs.value, rhs.value));
3999         }
4000
4001         RValue<Int> operator<<(RValue<Int> lhs, RValue<Int> rhs)
4002         {
4003                 return RValue<Int>(Nucleus::createShl(lhs.value, rhs.value));
4004         }
4005
4006         RValue<Int> operator>>(RValue<Int> lhs, RValue<Int> rhs)
4007         {
4008                 return RValue<Int>(Nucleus::createAShr(lhs.value, rhs.value));
4009         }
4010
4011         RValue<Int> operator+=(Int &lhs, RValue<Int> rhs)
4012         {
4013                 return lhs = lhs + rhs;
4014         }
4015
4016         RValue<Int> operator-=(Int &lhs, RValue<Int> rhs)
4017         {
4018                 return lhs = lhs - rhs;
4019         }
4020
4021         RValue<Int> operator*=(Int &lhs, RValue<Int> rhs)
4022         {
4023                 return lhs = lhs * rhs;
4024         }
4025
4026         RValue<Int> operator/=(Int &lhs, RValue<Int> rhs)
4027         {
4028                 return lhs = lhs / rhs;
4029         }
4030
4031         RValue<Int> operator%=(Int &lhs, RValue<Int> rhs)
4032         {
4033                 return lhs = lhs % rhs;
4034         }
4035
4036         RValue<Int> operator&=(Int &lhs, RValue<Int> rhs)
4037         {
4038                 return lhs = lhs & rhs;
4039         }
4040
4041         RValue<Int> operator|=(Int &lhs, RValue<Int> rhs)
4042         {
4043                 return lhs = lhs | rhs;
4044         }
4045
4046         RValue<Int> operator^=(Int &lhs, RValue<Int> rhs)
4047         {
4048                 return lhs = lhs ^ rhs;
4049         }
4050
4051         RValue<Int> operator<<=(Int &lhs, RValue<Int> rhs)
4052         {
4053                 return lhs = lhs << rhs;
4054         }
4055
4056         RValue<Int> operator>>=(Int &lhs, RValue<Int> rhs)
4057         {
4058                 return lhs = lhs >> rhs;
4059         }
4060
4061         RValue<Int> operator+(RValue<Int> val)
4062         {
4063                 return val;
4064         }
4065
4066         RValue<Int> operator-(RValue<Int> val)
4067         {
4068                 return RValue<Int>(Nucleus::createNeg(val.value));
4069         }
4070
4071         RValue<Int> operator~(RValue<Int> val)
4072         {
4073                 return RValue<Int>(Nucleus::createNot(val.value));
4074         }
4075
4076         RValue<Int> operator++(Int &val, int)   // Post-increment
4077         {
4078                 RValue<Int> res = val;
4079                 val += 1;
4080                 return res;
4081         }
4082
4083         const Int &operator++(Int &val)   // Pre-increment
4084         {
4085                 val += 1;
4086                 return val;
4087         }
4088
4089         RValue<Int> operator--(Int &val, int)   // Post-decrement
4090         {
4091                 RValue<Int> res = val;
4092                 val -= 1;
4093                 return res;
4094         }
4095
4096         const Int &operator--(Int &val)   // Pre-decrement
4097         {
4098                 val -= 1;
4099                 return val;
4100         }
4101
4102         RValue<Bool> operator<(RValue<Int> lhs, RValue<Int> rhs)
4103         {
4104                 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
4105         }
4106
4107         RValue<Bool> operator<=(RValue<Int> lhs, RValue<Int> rhs)
4108         {
4109                 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
4110         }
4111
4112         RValue<Bool> operator>(RValue<Int> lhs, RValue<Int> rhs)
4113         {
4114                 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
4115         }
4116
4117         RValue<Bool> operator>=(RValue<Int> lhs, RValue<Int> rhs)
4118         {
4119                 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
4120         }
4121
4122         RValue<Bool> operator!=(RValue<Int> lhs, RValue<Int> rhs)
4123         {
4124                 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
4125         }
4126
4127         RValue<Bool> operator==(RValue<Int> lhs, RValue<Int> rhs)
4128         {
4129                 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
4130         }
4131
4132         RValue<Int> Max(RValue<Int> x, RValue<Int> y)
4133         {
4134                 return IfThenElse(x > y, x, y);
4135         }
4136
4137         RValue<Int> Min(RValue<Int> x, RValue<Int> y)
4138         {
4139                 return IfThenElse(x < y, x, y);
4140         }
4141
4142         RValue<Int> Clamp(RValue<Int> x, RValue<Int> min, RValue<Int> max)
4143         {
4144                 return Min(Max(x, min), max);
4145         }
4146
4147         RValue<Int> RoundInt(RValue<Float> cast)
4148         {
4149                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
4150                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
4151                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4152                 auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
4153                 nearbyint->addArg(cast.value);
4154                 ::basicBlock->appendInst(nearbyint);
4155
4156                 return RValue<Int>(V(result));
4157         }
4158
4159         Type *Int::getType()
4160         {
4161                 return T(Ice::IceType_i32);
4162         }
4163
4164         Long::Long(RValue<Int> cast)
4165         {
4166                 Value *integer = Nucleus::createSExt(cast.value, Long::getType());
4167
4168                 storeValue(integer);
4169         }
4170
4171         Long::Long(RValue<UInt> cast)
4172         {
4173                 Value *integer = Nucleus::createZExt(cast.value, Long::getType());
4174
4175                 storeValue(integer);
4176         }
4177
4178         Long::Long(RValue<Long> rhs)
4179         {
4180                 storeValue(rhs.value);
4181         }
4182
4183         RValue<Long> Long::operator=(int64_t rhs)
4184         {
4185                 return RValue<Long>(storeValue(Nucleus::createConstantLong(rhs)));
4186         }
4187
4188         RValue<Long> Long::operator=(RValue<Long> rhs)
4189         {
4190                 storeValue(rhs.value);
4191
4192                 return rhs;
4193         }
4194
4195         RValue<Long> Long::operator=(const Long &rhs)
4196         {
4197                 Value *value = rhs.loadValue();
4198                 storeValue(value);
4199
4200                 return RValue<Long>(value);
4201         }
4202
4203         RValue<Long> Long::operator=(const Reference<Long> &rhs)
4204         {
4205                 Value *value = rhs.loadValue();
4206                 storeValue(value);
4207
4208                 return RValue<Long>(value);
4209         }
4210
4211         RValue<Long> operator+(RValue<Long> lhs, RValue<Long> rhs)
4212         {
4213                 return RValue<Long>(Nucleus::createAdd(lhs.value, rhs.value));
4214         }
4215
4216         RValue<Long> operator-(RValue<Long> lhs, RValue<Long> rhs)
4217         {
4218                 return RValue<Long>(Nucleus::createSub(lhs.value, rhs.value));
4219         }
4220
4221         RValue<Long> operator+=(Long &lhs, RValue<Long> rhs)
4222         {
4223                 return lhs = lhs + rhs;
4224         }
4225
4226         RValue<Long> operator-=(Long &lhs, RValue<Long> rhs)
4227         {
4228                 return lhs = lhs - rhs;
4229         }
4230
4231         RValue<Long> AddAtomic(RValue<Pointer<Long> > x, RValue<Long> y)
4232         {
4233                 return RValue<Long>(Nucleus::createAtomicAdd(x.value, y.value));
4234         }
4235
4236         Type *Long::getType()
4237         {
4238                 return T(Ice::IceType_i64);
4239         }
4240
4241         UInt::UInt(Argument<UInt> argument)
4242         {
4243                 storeValue(argument.value);
4244         }
4245
4246         UInt::UInt(RValue<UShort> cast)
4247         {
4248                 Value *integer = Nucleus::createZExt(cast.value, UInt::getType());
4249
4250                 storeValue(integer);
4251         }
4252
4253         UInt::UInt(RValue<Long> cast)
4254         {
4255                 Value *integer = Nucleus::createTrunc(cast.value, UInt::getType());
4256
4257                 storeValue(integer);
4258         }
4259
4260         UInt::UInt(RValue<Float> cast)
4261         {
4262                 // Smallest positive value representable in UInt, but not in Int
4263                 const unsigned int ustart = 0x80000000u;
4264                 const float ustartf = float(ustart);
4265
4266                 // If the value is negative, store 0, otherwise store the result of the conversion
4267                 storeValue((~(As<Int>(cast) >> 31) &
4268                 // Check if the value can be represented as an Int
4269                         IfThenElse(cast >= ustartf,
4270                 // If the value is too large, subtract ustart and re-add it after conversion.
4271                                 As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)),
4272                 // Otherwise, just convert normally
4273                                 Int(cast))).value);
4274         }
4275
4276         UInt::UInt(int x)
4277         {
4278                 storeValue(Nucleus::createConstantInt(x));
4279         }
4280
4281         UInt::UInt(unsigned int x)
4282         {
4283                 storeValue(Nucleus::createConstantInt(x));
4284         }
4285
4286         UInt::UInt(RValue<UInt> rhs)
4287         {
4288                 storeValue(rhs.value);
4289         }
4290
4291         UInt::UInt(RValue<Int> rhs)
4292         {
4293                 storeValue(rhs.value);
4294         }
4295
4296         UInt::UInt(const UInt &rhs)
4297         {
4298                 Value *value = rhs.loadValue();
4299                 storeValue(value);
4300         }
4301
4302         UInt::UInt(const Reference<UInt> &rhs)
4303         {
4304                 Value *value = rhs.loadValue();
4305                 storeValue(value);
4306         }
4307
4308         UInt::UInt(const Int &rhs)
4309         {
4310                 Value *value = rhs.loadValue();
4311                 storeValue(value);
4312         }
4313
4314         UInt::UInt(const Reference<Int> &rhs)
4315         {
4316                 Value *value = rhs.loadValue();
4317                 storeValue(value);
4318         }
4319
4320         RValue<UInt> UInt::operator=(unsigned int rhs)
4321         {
4322                 return RValue<UInt>(storeValue(Nucleus::createConstantInt(rhs)));
4323         }
4324
4325         RValue<UInt> UInt::operator=(RValue<UInt> rhs)
4326         {
4327                 storeValue(rhs.value);
4328
4329                 return rhs;
4330         }
4331
4332         RValue<UInt> UInt::operator=(RValue<Int> rhs)
4333         {
4334                 storeValue(rhs.value);
4335
4336                 return RValue<UInt>(rhs);
4337         }
4338
4339         RValue<UInt> UInt::operator=(const UInt &rhs)
4340         {
4341                 Value *value = rhs.loadValue();
4342                 storeValue(value);
4343
4344                 return RValue<UInt>(value);
4345         }
4346
4347         RValue<UInt> UInt::operator=(const Reference<UInt> &rhs)
4348         {
4349                 Value *value = rhs.loadValue();
4350                 storeValue(value);
4351
4352                 return RValue<UInt>(value);
4353         }
4354
4355         RValue<UInt> UInt::operator=(const Int &rhs)
4356         {
4357                 Value *value = rhs.loadValue();
4358                 storeValue(value);
4359
4360                 return RValue<UInt>(value);
4361         }
4362
4363         RValue<UInt> UInt::operator=(const Reference<Int> &rhs)
4364         {
4365                 Value *value = rhs.loadValue();
4366                 storeValue(value);
4367
4368                 return RValue<UInt>(value);
4369         }
4370
4371         RValue<UInt> operator+(RValue<UInt> lhs, RValue<UInt> rhs)
4372         {
4373                 return RValue<UInt>(Nucleus::createAdd(lhs.value, rhs.value));
4374         }
4375
4376         RValue<UInt> operator-(RValue<UInt> lhs, RValue<UInt> rhs)
4377         {
4378                 return RValue<UInt>(Nucleus::createSub(lhs.value, rhs.value));
4379         }
4380
4381         RValue<UInt> operator*(RValue<UInt> lhs, RValue<UInt> rhs)
4382         {
4383                 return RValue<UInt>(Nucleus::createMul(lhs.value, rhs.value));
4384         }
4385
4386         RValue<UInt> operator/(RValue<UInt> lhs, RValue<UInt> rhs)
4387         {
4388                 return RValue<UInt>(Nucleus::createUDiv(lhs.value, rhs.value));
4389         }
4390
4391         RValue<UInt> operator%(RValue<UInt> lhs, RValue<UInt> rhs)
4392         {
4393                 return RValue<UInt>(Nucleus::createURem(lhs.value, rhs.value));
4394         }
4395
4396         RValue<UInt> operator&(RValue<UInt> lhs, RValue<UInt> rhs)
4397         {
4398                 return RValue<UInt>(Nucleus::createAnd(lhs.value, rhs.value));
4399         }
4400
4401         RValue<UInt> operator|(RValue<UInt> lhs, RValue<UInt> rhs)
4402         {
4403                 return RValue<UInt>(Nucleus::createOr(lhs.value, rhs.value));
4404         }
4405
4406         RValue<UInt> operator^(RValue<UInt> lhs, RValue<UInt> rhs)
4407         {
4408                 return RValue<UInt>(Nucleus::createXor(lhs.value, rhs.value));
4409         }
4410
4411         RValue<UInt> operator<<(RValue<UInt> lhs, RValue<UInt> rhs)
4412         {
4413                 return RValue<UInt>(Nucleus::createShl(lhs.value, rhs.value));
4414         }
4415
4416         RValue<UInt> operator>>(RValue<UInt> lhs, RValue<UInt> rhs)
4417         {
4418                 return RValue<UInt>(Nucleus::createLShr(lhs.value, rhs.value));
4419         }
4420
4421         RValue<UInt> operator+=(UInt &lhs, RValue<UInt> rhs)
4422         {
4423                 return lhs = lhs + rhs;
4424         }
4425
4426         RValue<UInt> operator-=(UInt &lhs, RValue<UInt> rhs)
4427         {
4428                 return lhs = lhs - rhs;
4429         }
4430
4431         RValue<UInt> operator*=(UInt &lhs, RValue<UInt> rhs)
4432         {
4433                 return lhs = lhs * rhs;
4434         }
4435
4436         RValue<UInt> operator/=(UInt &lhs, RValue<UInt> rhs)
4437         {
4438                 return lhs = lhs / rhs;
4439         }
4440
4441         RValue<UInt> operator%=(UInt &lhs, RValue<UInt> rhs)
4442         {
4443                 return lhs = lhs % rhs;
4444         }
4445
4446         RValue<UInt> operator&=(UInt &lhs, RValue<UInt> rhs)
4447         {
4448                 return lhs = lhs & rhs;
4449         }
4450
4451         RValue<UInt> operator|=(UInt &lhs, RValue<UInt> rhs)
4452         {
4453                 return lhs = lhs | rhs;
4454         }
4455
4456         RValue<UInt> operator^=(UInt &lhs, RValue<UInt> rhs)
4457         {
4458                 return lhs = lhs ^ rhs;
4459         }
4460
4461         RValue<UInt> operator<<=(UInt &lhs, RValue<UInt> rhs)
4462         {
4463                 return lhs = lhs << rhs;
4464         }
4465
4466         RValue<UInt> operator>>=(UInt &lhs, RValue<UInt> rhs)
4467         {
4468                 return lhs = lhs >> rhs;
4469         }
4470
4471         RValue<UInt> operator+(RValue<UInt> val)
4472         {
4473                 return val;
4474         }
4475
4476         RValue<UInt> operator-(RValue<UInt> val)
4477         {
4478                 return RValue<UInt>(Nucleus::createNeg(val.value));
4479         }
4480
4481         RValue<UInt> operator~(RValue<UInt> val)
4482         {
4483                 return RValue<UInt>(Nucleus::createNot(val.value));
4484         }
4485
4486         RValue<UInt> operator++(UInt &val, int)   // Post-increment
4487         {
4488                 RValue<UInt> res = val;
4489                 val += 1;
4490                 return res;
4491         }
4492
4493         const UInt &operator++(UInt &val)   // Pre-increment
4494         {
4495                 val += 1;
4496                 return val;
4497         }
4498
4499         RValue<UInt> operator--(UInt &val, int)   // Post-decrement
4500         {
4501                 RValue<UInt> res = val;
4502                 val -= 1;
4503                 return res;
4504         }
4505
4506         const UInt &operator--(UInt &val)   // Pre-decrement
4507         {
4508                 val -= 1;
4509                 return val;
4510         }
4511
4512         RValue<UInt> Max(RValue<UInt> x, RValue<UInt> y)
4513         {
4514                 return IfThenElse(x > y, x, y);
4515         }
4516
4517         RValue<UInt> Min(RValue<UInt> x, RValue<UInt> y)
4518         {
4519                 return IfThenElse(x < y, x, y);
4520         }
4521
4522         RValue<UInt> Clamp(RValue<UInt> x, RValue<UInt> min, RValue<UInt> max)
4523         {
4524                 return Min(Max(x, min), max);
4525         }
4526
4527         RValue<Bool> operator<(RValue<UInt> lhs, RValue<UInt> rhs)
4528         {
4529                 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
4530         }
4531
4532         RValue<Bool> operator<=(RValue<UInt> lhs, RValue<UInt> rhs)
4533         {
4534                 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
4535         }
4536
4537         RValue<Bool> operator>(RValue<UInt> lhs, RValue<UInt> rhs)
4538         {
4539                 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
4540         }
4541
4542         RValue<Bool> operator>=(RValue<UInt> lhs, RValue<UInt> rhs)
4543         {
4544                 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
4545         }
4546
4547         RValue<Bool> operator!=(RValue<UInt> lhs, RValue<UInt> rhs)
4548         {
4549                 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
4550         }
4551
4552         RValue<Bool> operator==(RValue<UInt> lhs, RValue<UInt> rhs)
4553         {
4554                 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
4555         }
4556
4557 //      RValue<UInt> RoundUInt(RValue<Float> cast)
4558 //      {
4559 //              assert(false && "UNIMPLEMENTED"); return RValue<UInt>(V(nullptr));
4560 //      }
4561
4562         Type *UInt::getType()
4563         {
4564                 return T(Ice::IceType_i32);
4565         }
4566
4567 //      Int2::Int2(RValue<Int> cast)
4568 //      {
4569 //              Value *extend = Nucleus::createZExt(cast.value, Long::getType());
4570 //              Value *vector = Nucleus::createBitCast(extend, Int2::getType());
4571 //
4572 //              Constant *shuffle[2];
4573 //              shuffle[0] = Nucleus::createConstantInt(0);
4574 //              shuffle[1] = Nucleus::createConstantInt(0);
4575 //
4576 //              Value *replicate = Nucleus::createShuffleVector(vector, UndefValue::get(Int2::getType()), Nucleus::createConstantVector(shuffle, 2));
4577 //
4578 //              storeValue(replicate);
4579 //      }
4580
4581         Int2::Int2(RValue<Int4> cast)
4582         {
4583                 storeValue(Nucleus::createBitCast(cast.value, getType()));
4584         }
4585
4586         Int2::Int2(int x, int y)
4587         {
4588                 int64_t constantVector[2] = {x, y};
4589                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
4590         }
4591
4592         Int2::Int2(RValue<Int2> rhs)
4593         {
4594                 storeValue(rhs.value);
4595         }
4596
4597         Int2::Int2(const Int2 &rhs)
4598         {
4599                 Value *value = rhs.loadValue();
4600                 storeValue(value);
4601         }
4602
4603         Int2::Int2(const Reference<Int2> &rhs)
4604         {
4605                 Value *value = rhs.loadValue();
4606                 storeValue(value);
4607         }
4608
4609         Int2::Int2(RValue<Int> lo, RValue<Int> hi)
4610         {
4611                 int shuffle[4] = {0, 4, 1, 5};
4612                 Value *packed = Nucleus::createShuffleVector(Int4(lo).loadValue(), Int4(hi).loadValue(), shuffle);
4613
4614                 storeValue(Nucleus::createBitCast(packed, Int2::getType()));
4615         }
4616
4617         RValue<Int2> Int2::operator=(RValue<Int2> rhs)
4618         {
4619                 storeValue(rhs.value);
4620
4621                 return rhs;
4622         }
4623
4624         RValue<Int2> Int2::operator=(const Int2 &rhs)
4625         {
4626                 Value *value = rhs.loadValue();
4627                 storeValue(value);
4628
4629                 return RValue<Int2>(value);
4630         }
4631
4632         RValue<Int2> Int2::operator=(const Reference<Int2> &rhs)
4633         {
4634                 Value *value = rhs.loadValue();
4635                 storeValue(value);
4636
4637                 return RValue<Int2>(value);
4638         }
4639
4640         RValue<Int2> operator+(RValue<Int2> lhs, RValue<Int2> rhs)
4641         {
4642                 return RValue<Int2>(Nucleus::createAdd(lhs.value, rhs.value));
4643         }
4644
4645         RValue<Int2> operator-(RValue<Int2> lhs, RValue<Int2> rhs)
4646         {
4647                 return RValue<Int2>(Nucleus::createSub(lhs.value, rhs.value));
4648         }
4649
4650 //      RValue<Int2> operator*(RValue<Int2> lhs, RValue<Int2> rhs)
4651 //      {
4652 //              return RValue<Int2>(Nucleus::createMul(lhs.value, rhs.value));
4653 //      }
4654
4655 //      RValue<Int2> operator/(RValue<Int2> lhs, RValue<Int2> rhs)
4656 //      {
4657 //              return RValue<Int2>(Nucleus::createSDiv(lhs.value, rhs.value));
4658 //      }
4659
4660 //      RValue<Int2> operator%(RValue<Int2> lhs, RValue<Int2> rhs)
4661 //      {
4662 //              return RValue<Int2>(Nucleus::createSRem(lhs.value, rhs.value));
4663 //      }
4664
4665         RValue<Int2> operator&(RValue<Int2> lhs, RValue<Int2> rhs)
4666         {
4667                 return RValue<Int2>(Nucleus::createAnd(lhs.value, rhs.value));
4668         }
4669
4670         RValue<Int2> operator|(RValue<Int2> lhs, RValue<Int2> rhs)
4671         {
4672                 return RValue<Int2>(Nucleus::createOr(lhs.value, rhs.value));
4673         }
4674
4675         RValue<Int2> operator^(RValue<Int2> lhs, RValue<Int2> rhs)
4676         {
4677                 return RValue<Int2>(Nucleus::createXor(lhs.value, rhs.value));
4678         }
4679
4680         RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
4681         {
4682                 return RValue<Int2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
4683         }
4684
4685         RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
4686         {
4687                 return RValue<Int2>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
4688         }
4689
4690         RValue<Int2> operator+=(Int2 &lhs, RValue<Int2> rhs)
4691         {
4692                 return lhs = lhs + rhs;
4693         }
4694
4695         RValue<Int2> operator-=(Int2 &lhs, RValue<Int2> rhs)
4696         {
4697                 return lhs = lhs - rhs;
4698         }
4699
4700 //      RValue<Int2> operator*=(Int2 &lhs, RValue<Int2> rhs)
4701 //      {
4702 //              return lhs = lhs * rhs;
4703 //      }
4704
4705 //      RValue<Int2> operator/=(Int2 &lhs, RValue<Int2> rhs)
4706 //      {
4707 //              return lhs = lhs / rhs;
4708 //      }
4709
4710 //      RValue<Int2> operator%=(Int2 &lhs, RValue<Int2> rhs)
4711 //      {
4712 //              return lhs = lhs % rhs;
4713 //      }
4714
4715         RValue<Int2> operator&=(Int2 &lhs, RValue<Int2> rhs)
4716         {
4717                 return lhs = lhs & rhs;
4718         }
4719
4720         RValue<Int2> operator|=(Int2 &lhs, RValue<Int2> rhs)
4721         {
4722                 return lhs = lhs | rhs;
4723         }
4724
4725         RValue<Int2> operator^=(Int2 &lhs, RValue<Int2> rhs)
4726         {
4727                 return lhs = lhs ^ rhs;
4728         }
4729
4730         RValue<Int2> operator<<=(Int2 &lhs, unsigned char rhs)
4731         {
4732                 return lhs = lhs << rhs;
4733         }
4734
4735         RValue<Int2> operator>>=(Int2 &lhs, unsigned char rhs)
4736         {
4737                 return lhs = lhs >> rhs;
4738         }
4739
4740 //      RValue<Int2> operator+(RValue<Int2> val)
4741 //      {
4742 //              return val;
4743 //      }
4744
4745 //      RValue<Int2> operator-(RValue<Int2> val)
4746 //      {
4747 //              return RValue<Int2>(Nucleus::createNeg(val.value));
4748 //      }
4749
4750         RValue<Int2> operator~(RValue<Int2> val)
4751         {
4752                 return RValue<Int2>(Nucleus::createNot(val.value));
4753         }
4754
4755         RValue<Short4> UnpackLow(RValue<Int2> x, RValue<Int2> y)
4756         {
4757                 int shuffle[4] = {0, 4, 1, 5};   // Real type is v4i32
4758                 return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
4759         }
4760
4761         RValue<Short4> UnpackHigh(RValue<Int2> x, RValue<Int2> y)
4762         {
4763                 int shuffle[4] = {0, 4, 1, 5};   // Real type is v4i32
4764                 auto lowHigh = RValue<Int4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
4765                 return As<Short4>(Swizzle(lowHigh, 0xEE));
4766         }
4767
4768         RValue<Int> Extract(RValue<Int2> val, int i)
4769         {
4770                 return RValue<Int>(Nucleus::createExtractElement(val.value, Int::getType(), i));
4771         }
4772
4773         RValue<Int2> Insert(RValue<Int2> val, RValue<Int> element, int i)
4774         {
4775                 return RValue<Int2>(Nucleus::createInsertElement(val.value, element.value, i));
4776         }
4777
4778         Type *Int2::getType()
4779         {
4780                 return T(Type_v2i32);
4781         }
4782
4783         UInt2::UInt2(unsigned int x, unsigned int y)
4784         {
4785                 int64_t constantVector[2] = {x, y};
4786                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
4787         }
4788
4789         UInt2::UInt2(RValue<UInt2> rhs)
4790         {
4791                 storeValue(rhs.value);
4792         }
4793
4794         UInt2::UInt2(const UInt2 &rhs)
4795         {
4796                 Value *value = rhs.loadValue();
4797                 storeValue(value);
4798         }
4799
4800         UInt2::UInt2(const Reference<UInt2> &rhs)
4801         {
4802                 Value *value = rhs.loadValue();
4803                 storeValue(value);
4804         }
4805
4806         RValue<UInt2> UInt2::operator=(RValue<UInt2> rhs)
4807         {
4808                 storeValue(rhs.value);
4809
4810                 return rhs;
4811         }
4812
4813         RValue<UInt2> UInt2::operator=(const UInt2 &rhs)
4814         {
4815                 Value *value = rhs.loadValue();
4816                 storeValue(value);
4817
4818                 return RValue<UInt2>(value);
4819         }
4820
4821         RValue<UInt2> UInt2::operator=(const Reference<UInt2> &rhs)
4822         {
4823                 Value *value = rhs.loadValue();
4824                 storeValue(value);
4825
4826                 return RValue<UInt2>(value);
4827         }
4828
4829         RValue<UInt2> operator+(RValue<UInt2> lhs, RValue<UInt2> rhs)
4830         {
4831                 return RValue<UInt2>(Nucleus::createAdd(lhs.value, rhs.value));
4832         }
4833
4834         RValue<UInt2> operator-(RValue<UInt2> lhs, RValue<UInt2> rhs)
4835         {
4836                 return RValue<UInt2>(Nucleus::createSub(lhs.value, rhs.value));
4837         }
4838
4839 //      RValue<UInt2> operator*(RValue<UInt2> lhs, RValue<UInt2> rhs)
4840 //      {
4841 //              return RValue<UInt2>(Nucleus::createMul(lhs.value, rhs.value));
4842 //      }
4843
4844 //      RValue<UInt2> operator/(RValue<UInt2> lhs, RValue<UInt2> rhs)
4845 //      {
4846 //              return RValue<UInt2>(Nucleus::createUDiv(lhs.value, rhs.value));
4847 //      }
4848
4849 //      RValue<UInt2> operator%(RValue<UInt2> lhs, RValue<UInt2> rhs)
4850 //      {
4851 //              return RValue<UInt2>(Nucleus::createURem(lhs.value, rhs.value));
4852 //      }
4853
4854         RValue<UInt2> operator&(RValue<UInt2> lhs, RValue<UInt2> rhs)
4855         {
4856                 return RValue<UInt2>(Nucleus::createAnd(lhs.value, rhs.value));
4857         }
4858
4859         RValue<UInt2> operator|(RValue<UInt2> lhs, RValue<UInt2> rhs)
4860         {
4861                 return RValue<UInt2>(Nucleus::createOr(lhs.value, rhs.value));
4862         }
4863
4864         RValue<UInt2> operator^(RValue<UInt2> lhs, RValue<UInt2> rhs)
4865         {
4866                 return RValue<UInt2>(Nucleus::createXor(lhs.value, rhs.value));
4867         }
4868
4869         RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
4870         {
4871                 return RValue<UInt2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
4872         }
4873
4874         RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
4875         {
4876                 return RValue<UInt2>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
4877         }
4878
4879         RValue<UInt2> operator+=(UInt2 &lhs, RValue<UInt2> rhs)
4880         {
4881                 return lhs = lhs + rhs;
4882         }
4883
4884         RValue<UInt2> operator-=(UInt2 &lhs, RValue<UInt2> rhs)
4885         {
4886                 return lhs = lhs - rhs;
4887         }
4888
4889 //      RValue<UInt2> operator*=(UInt2 &lhs, RValue<UInt2> rhs)
4890 //      {
4891 //              return lhs = lhs * rhs;
4892 //      }
4893
4894 //      RValue<UInt2> operator/=(UInt2 &lhs, RValue<UInt2> rhs)
4895 //      {
4896 //              return lhs = lhs / rhs;
4897 //      }
4898
4899 //      RValue<UInt2> operator%=(UInt2 &lhs, RValue<UInt2> rhs)
4900 //      {
4901 //              return lhs = lhs % rhs;
4902 //      }
4903
4904         RValue<UInt2> operator&=(UInt2 &lhs, RValue<UInt2> rhs)
4905         {
4906                 return lhs = lhs & rhs;
4907         }
4908
4909         RValue<UInt2> operator|=(UInt2 &lhs, RValue<UInt2> rhs)
4910         {
4911                 return lhs = lhs | rhs;
4912         }
4913
4914         RValue<UInt2> operator^=(UInt2 &lhs, RValue<UInt2> rhs)
4915         {
4916                 return lhs = lhs ^ rhs;
4917         }
4918
4919         RValue<UInt2> operator<<=(UInt2 &lhs, unsigned char rhs)
4920         {
4921                 return lhs = lhs << rhs;
4922         }
4923
4924         RValue<UInt2> operator>>=(UInt2 &lhs, unsigned char rhs)
4925         {
4926                 return lhs = lhs >> rhs;
4927         }
4928
4929 //      RValue<UInt2> operator+(RValue<UInt2> val)
4930 //      {
4931 //              return val;
4932 //      }
4933
4934 //      RValue<UInt2> operator-(RValue<UInt2> val)
4935 //      {
4936 //              return RValue<UInt2>(Nucleus::createNeg(val.value));
4937 //      }
4938
4939         RValue<UInt2> operator~(RValue<UInt2> val)
4940         {
4941                 return RValue<UInt2>(Nucleus::createNot(val.value));
4942         }
4943
4944         Type *UInt2::getType()
4945         {
4946                 return T(Type_v2i32);
4947         }
4948
4949         Int4::Int4(RValue<Byte4> cast)
4950         {
4951                 Value *x = Nucleus::createBitCast(cast.value, Int::getType());
4952                 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
4953
4954                 Value *e;
4955                 int swizzle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};
4956                 Value *b = Nucleus::createBitCast(a, Byte16::getType());
4957                 Value *c = Nucleus::createShuffleVector(b, V(Nucleus::createNullValue(Byte16::getType())), swizzle);
4958
4959                 int swizzle2[8] = {0, 8, 1, 9, 2, 10, 3, 11};
4960                 Value *d = Nucleus::createBitCast(c, Short8::getType());
4961                 e = Nucleus::createShuffleVector(d, V(Nucleus::createNullValue(Short8::getType())), swizzle2);
4962
4963                 Value *f = Nucleus::createBitCast(e, Int4::getType());
4964                 storeValue(f);
4965         }
4966
4967         Int4::Int4(RValue<SByte4> cast)
4968         {
4969                 Value *x = Nucleus::createBitCast(cast.value, Int::getType());
4970                 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
4971
4972                 Value *e;
4973                 int swizzle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7};
4974                 Value *b = Nucleus::createBitCast(a, Byte16::getType());
4975                 Value *c = Nucleus::createShuffleVector(b, b, swizzle);
4976
4977                 int swizzle2[8] = {0, 0, 1, 1, 2, 2, 3, 3};
4978                 Value *d = Nucleus::createBitCast(c, Short8::getType());
4979                 e = Nucleus::createShuffleVector(d, d, swizzle2);
4980
4981                 Value *f = Nucleus::createBitCast(e, Int4::getType());
4982                 Value *g = Nucleus::createAShr(f, V(::context->getConstantInt32(24)));
4983                 storeValue(g);
4984         }
4985
4986         Int4::Int4(RValue<Float4> cast)
4987         {
4988                 Value *xyzw = Nucleus::createFPToSI(cast.value, Int4::getType());
4989
4990                 storeValue(xyzw);
4991         }
4992
4993         Int4::Int4(RValue<Short4> cast)
4994         {
4995                 int swizzle[8] = {0, 0, 1, 1, 2, 2, 3, 3};
4996                 Value *c = Nucleus::createShuffleVector(cast.value, cast.value, swizzle);
4997                 Value *d = Nucleus::createBitCast(c, Int4::getType());
4998                 Value *e = Nucleus::createAShr(d, V(::context->getConstantInt32(16)));
4999                 storeValue(e);
5000         }
5001
5002         Int4::Int4(RValue<UShort4> cast)
5003         {
5004                 int swizzle[8] = {0, 8, 1, 9, 2, 10, 3, 11};
5005                 Value *c = Nucleus::createShuffleVector(cast.value, Short8(0, 0, 0, 0, 0, 0, 0, 0).loadValue(), swizzle);
5006                 Value *d = Nucleus::createBitCast(c, Int4::getType());
5007                 storeValue(d);
5008         }
5009
5010         Int4::Int4(int xyzw)
5011         {
5012                 constant(xyzw, xyzw, xyzw, xyzw);
5013         }
5014
5015         Int4::Int4(int x, int yzw)
5016         {
5017                 constant(x, yzw, yzw, yzw);
5018         }
5019
5020         Int4::Int4(int x, int y, int zw)
5021         {
5022                 constant(x, y, zw, zw);
5023         }
5024
5025         Int4::Int4(int x, int y, int z, int w)
5026         {
5027                 constant(x, y, z, w);
5028         }
5029
5030         void Int4::constant(int x, int y, int z, int w)
5031         {
5032                 int64_t constantVector[4] = {x, y, z, w};
5033                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
5034         }
5035
5036         Int4::Int4(RValue<Int4> rhs)
5037         {
5038                 storeValue(rhs.value);
5039         }
5040
5041         Int4::Int4(const Int4 &rhs)
5042         {
5043                 Value *value = rhs.loadValue();
5044                 storeValue(value);
5045         }
5046
5047         Int4::Int4(const Reference<Int4> &rhs)
5048         {
5049                 Value *value = rhs.loadValue();
5050                 storeValue(value);
5051         }
5052
5053         Int4::Int4(RValue<UInt4> rhs)
5054         {
5055                 storeValue(rhs.value);
5056         }
5057
5058         Int4::Int4(const UInt4 &rhs)
5059         {
5060                 Value *value = rhs.loadValue();
5061                 storeValue(value);
5062         }
5063
5064         Int4::Int4(const Reference<UInt4> &rhs)
5065         {
5066                 Value *value = rhs.loadValue();
5067                 storeValue(value);
5068         }
5069
5070         Int4::Int4(RValue<Int2> lo, RValue<Int2> hi)
5071         {
5072                 int shuffle[4] = {0, 1, 4, 5};   // Real type is v4i32
5073                 Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
5074
5075                 storeValue(packed);
5076         }
5077
5078         Int4::Int4(RValue<Int> rhs)
5079         {
5080                 Value *vector = Nucleus::createBitCast(rhs.value, Int4::getType());
5081
5082                 int swizzle[4] = {0, 0, 0, 0};
5083                 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
5084
5085                 storeValue(replicate);
5086         }
5087
5088         Int4::Int4(const Int &rhs)
5089         {
5090                 *this = RValue<Int>(rhs.loadValue());
5091         }
5092
5093         Int4::Int4(const Reference<Int> &rhs)
5094         {
5095                 *this = RValue<Int>(rhs.loadValue());
5096         }
5097
5098         RValue<Int4> Int4::operator=(RValue<Int4> rhs)
5099         {
5100                 storeValue(rhs.value);
5101
5102                 return rhs;
5103         }
5104
5105         RValue<Int4> Int4::operator=(const Int4 &rhs)
5106         {
5107                 Value *value = rhs.loadValue();
5108                 storeValue(value);
5109
5110                 return RValue<Int4>(value);
5111         }
5112
5113         RValue<Int4> Int4::operator=(const Reference<Int4> &rhs)
5114         {
5115                 Value *value = rhs.loadValue();
5116                 storeValue(value);
5117
5118                 return RValue<Int4>(value);
5119         }
5120
5121         RValue<Int4> operator+(RValue<Int4> lhs, RValue<Int4> rhs)
5122         {
5123                 return RValue<Int4>(Nucleus::createAdd(lhs.value, rhs.value));
5124         }
5125
5126         RValue<Int4> operator-(RValue<Int4> lhs, RValue<Int4> rhs)
5127         {
5128                 return RValue<Int4>(Nucleus::createSub(lhs.value, rhs.value));
5129         }
5130
5131         RValue<Int4> operator*(RValue<Int4> lhs, RValue<Int4> rhs)
5132         {
5133                 return RValue<Int4>(Nucleus::createMul(lhs.value, rhs.value));
5134         }
5135
5136         RValue<Int4> operator/(RValue<Int4> lhs, RValue<Int4> rhs)
5137         {
5138                 return RValue<Int4>(Nucleus::createSDiv(lhs.value, rhs.value));
5139         }
5140
5141         RValue<Int4> operator%(RValue<Int4> lhs, RValue<Int4> rhs)
5142         {
5143                 return RValue<Int4>(Nucleus::createSRem(lhs.value, rhs.value));
5144         }
5145
5146         RValue<Int4> operator&(RValue<Int4> lhs, RValue<Int4> rhs)
5147         {
5148                 return RValue<Int4>(Nucleus::createAnd(lhs.value, rhs.value));
5149         }
5150
5151         RValue<Int4> operator|(RValue<Int4> lhs, RValue<Int4> rhs)
5152         {
5153                 return RValue<Int4>(Nucleus::createOr(lhs.value, rhs.value));
5154         }
5155
5156         RValue<Int4> operator^(RValue<Int4> lhs, RValue<Int4> rhs)
5157         {
5158                 return RValue<Int4>(Nucleus::createXor(lhs.value, rhs.value));
5159         }
5160
5161         RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
5162         {
5163                 return RValue<Int4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
5164         }
5165
5166         RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
5167         {
5168                 return RValue<Int4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
5169         }
5170
5171         RValue<Int4> operator<<(RValue<Int4> lhs, RValue<Int4> rhs)
5172         {
5173                 return RValue<Int4>(Nucleus::createShl(lhs.value, rhs.value));
5174         }
5175
5176         RValue<Int4> operator>>(RValue<Int4> lhs, RValue<Int4> rhs)
5177         {
5178                 return RValue<Int4>(Nucleus::createAShr(lhs.value, rhs.value));
5179         }
5180
5181         RValue<Int4> operator+=(Int4 &lhs, RValue<Int4> rhs)
5182         {
5183                 return lhs = lhs + rhs;
5184         }
5185
5186         RValue<Int4> operator-=(Int4 &lhs, RValue<Int4> rhs)
5187         {
5188                 return lhs = lhs - rhs;
5189         }
5190
5191         RValue<Int4> operator*=(Int4 &lhs, RValue<Int4> rhs)
5192         {
5193                 return lhs = lhs * rhs;
5194         }
5195
5196 //      RValue<Int4> operator/=(Int4 &lhs, RValue<Int4> rhs)
5197 //      {
5198 //              return lhs = lhs / rhs;
5199 //      }
5200
5201 //      RValue<Int4> operator%=(Int4 &lhs, RValue<Int4> rhs)
5202 //      {
5203 //              return lhs = lhs % rhs;
5204 //      }
5205
5206         RValue<Int4> operator&=(Int4 &lhs, RValue<Int4> rhs)
5207         {
5208                 return lhs = lhs & rhs;
5209         }
5210
5211         RValue<Int4> operator|=(Int4 &lhs, RValue<Int4> rhs)
5212         {
5213                 return lhs = lhs | rhs;
5214         }
5215
5216         RValue<Int4> operator^=(Int4 &lhs, RValue<Int4> rhs)
5217         {
5218                 return lhs = lhs ^ rhs;
5219         }
5220
5221         RValue<Int4> operator<<=(Int4 &lhs, unsigned char rhs)
5222         {
5223                 return lhs = lhs << rhs;
5224         }
5225
5226         RValue<Int4> operator>>=(Int4 &lhs, unsigned char rhs)
5227         {
5228                 return lhs = lhs >> rhs;
5229         }
5230
5231         RValue<Int4> operator+(RValue<Int4> val)
5232         {
5233                 return val;
5234         }
5235
5236         RValue<Int4> operator-(RValue<Int4> val)
5237         {
5238                 return RValue<Int4>(Nucleus::createNeg(val.value));
5239         }
5240
5241         RValue<Int4> operator~(RValue<Int4> val)
5242         {
5243                 return RValue<Int4>(Nucleus::createNot(val.value));
5244         }
5245
5246         RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
5247         {
5248                 return RValue<Int4>(Nucleus::createICmpEQ(x.value, y.value));
5249         }
5250
5251         RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
5252         {
5253                 return RValue<Int4>(Nucleus::createICmpSLT(x.value, y.value));
5254         }
5255
5256         RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
5257         {
5258                 return RValue<Int4>(Nucleus::createICmpSLE(x.value, y.value));
5259         }
5260
5261         RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
5262         {
5263                 return RValue<Int4>(Nucleus::createICmpNE(x.value, y.value));
5264         }
5265
5266         RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
5267         {
5268                 return RValue<Int4>(Nucleus::createICmpSGE(x.value, y.value));
5269         }
5270
5271         RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
5272         {
5273                 return RValue<Int4>(Nucleus::createICmpSGT(x.value, y.value));
5274         }
5275
5276         RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
5277         {
5278                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
5279                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value, y.value);
5280                 ::basicBlock->appendInst(cmp);
5281
5282                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
5283                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
5284                 ::basicBlock->appendInst(select);
5285
5286                 return RValue<Int4>(V(result));
5287         }
5288
5289         RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
5290         {
5291                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
5292                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value, y.value);
5293                 ::basicBlock->appendInst(cmp);
5294
5295                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
5296                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
5297                 ::basicBlock->appendInst(select);
5298
5299                 return RValue<Int4>(V(result));
5300         }
5301
5302         RValue<Int4> RoundInt(RValue<Float4> cast)
5303         {
5304                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
5305                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
5306                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
5307                 auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
5308                 nearbyint->addArg(cast.value);
5309                 ::basicBlock->appendInst(nearbyint);
5310
5311                 return RValue<Int4>(V(result));
5312         }
5313
5314         RValue<Short8> Pack(RValue<Int4> x, RValue<Int4> y)
5315         {
5316                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
5317                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
5318                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
5319                 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
5320                 pack->addArg(x.value);
5321                 pack->addArg(y.value);
5322                 ::basicBlock->appendInst(pack);
5323
5324                 return RValue<Short8>(V(result));
5325         }
5326
5327         RValue<Int> Extract(RValue<Int4> x, int i)
5328         {
5329                 return RValue<Int>(Nucleus::createExtractElement(x.value, Int::getType(), i));
5330         }
5331
5332         RValue<Int4> Insert(RValue<Int4> x, RValue<Int> element, int i)
5333         {
5334                 return RValue<Int4>(Nucleus::createInsertElement(x.value, element.value, i));
5335         }
5336
5337         RValue<Int> SignMask(RValue<Int4> x)
5338         {
5339                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
5340                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
5341                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
5342                 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
5343                 movmsk->addArg(x.value);
5344                 ::basicBlock->appendInst(movmsk);
5345
5346                 return RValue<Int>(V(result));
5347         }
5348
5349         RValue<Int4> Swizzle(RValue<Int4> x, unsigned char select)
5350         {
5351                 return RValue<Int4>(createSwizzle4(x.value, select));
5352         }
5353
5354         Type *Int4::getType()
5355         {
5356                 return T(Ice::IceType_v4i32);
5357         }
5358
5359         UInt4::UInt4(RValue<Float4> cast)
5360         {
5361                 // Smallest positive value representable in UInt, but not in Int
5362                 const unsigned int ustart = 0x80000000u;
5363                 const float ustartf = float(ustart);
5364
5365                 // Check if the value can be represented as an Int
5366                 Int4 uiValue = CmpNLT(cast, Float4(ustartf));
5367                 // If the value is too large, subtract ustart and re-add it after conversion.
5368                 uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) |
5369                 // Otherwise, just convert normally
5370                           (~uiValue & Int4(cast));
5371                 // If the value is negative, store 0, otherwise store the result of the conversion
5372                 storeValue((~(As<Int4>(cast) >> 31) & uiValue).value);
5373         }
5374
5375         UInt4::UInt4(int xyzw)
5376         {
5377                 constant(xyzw, xyzw, xyzw, xyzw);
5378         }
5379
5380         UInt4::UInt4(int x, int yzw)
5381         {
5382                 constant(x, yzw, yzw, yzw);
5383         }
5384
5385         UInt4::UInt4(int x, int y, int zw)
5386         {
5387                 constant(x, y, zw, zw);
5388         }
5389
5390         UInt4::UInt4(int x, int y, int z, int w)
5391         {
5392                 constant(x, y, z, w);
5393         }
5394
5395         void UInt4::constant(int x, int y, int z, int w)
5396         {
5397                 int64_t constantVector[4] = {x, y, z, w};
5398                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
5399         }
5400
5401         UInt4::UInt4(RValue<UInt4> rhs)
5402         {
5403                 storeValue(rhs.value);
5404         }
5405
5406         UInt4::UInt4(const UInt4 &rhs)
5407         {
5408                 Value *value = rhs.loadValue();
5409                 storeValue(value);
5410         }
5411
5412         UInt4::UInt4(const Reference<UInt4> &rhs)
5413         {
5414                 Value *value = rhs.loadValue();
5415                 storeValue(value);
5416         }
5417
5418         UInt4::UInt4(RValue<Int4> rhs)
5419         {
5420                 storeValue(rhs.value);
5421         }
5422
5423         UInt4::UInt4(const Int4 &rhs)
5424         {
5425                 Value *value = rhs.loadValue();
5426                 storeValue(value);
5427         }
5428
5429         UInt4::UInt4(const Reference<Int4> &rhs)
5430         {
5431                 Value *value = rhs.loadValue();
5432                 storeValue(value);
5433         }
5434
5435         UInt4::UInt4(RValue<UInt2> lo, RValue<UInt2> hi)
5436         {
5437                 int shuffle[4] = {0, 1, 4, 5};   // Real type is v4i32
5438                 Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
5439
5440                 storeValue(packed);
5441         }
5442
5443         RValue<UInt4> UInt4::operator=(RValue<UInt4> rhs)
5444         {
5445                 storeValue(rhs.value);
5446
5447                 return rhs;
5448         }
5449
5450         RValue<UInt4> UInt4::operator=(const UInt4 &rhs)
5451         {
5452                 Value *value = rhs.loadValue();
5453                 storeValue(value);
5454
5455                 return RValue<UInt4>(value);
5456         }
5457
5458         RValue<UInt4> UInt4::operator=(const Reference<UInt4> &rhs)
5459         {
5460                 Value *value = rhs.loadValue();
5461                 storeValue(value);
5462
5463                 return RValue<UInt4>(value);
5464         }
5465
5466         RValue<UInt4> operator+(RValue<UInt4> lhs, RValue<UInt4> rhs)
5467         {
5468                 return RValue<UInt4>(Nucleus::createAdd(lhs.value, rhs.value));
5469         }
5470
5471         RValue<UInt4> operator-(RValue<UInt4> lhs, RValue<UInt4> rhs)
5472         {
5473                 return RValue<UInt4>(Nucleus::createSub(lhs.value, rhs.value));
5474         }
5475
5476         RValue<UInt4> operator*(RValue<UInt4> lhs, RValue<UInt4> rhs)
5477         {
5478                 return RValue<UInt4>(Nucleus::createMul(lhs.value, rhs.value));
5479         }
5480
5481         RValue<UInt4> operator/(RValue<UInt4> lhs, RValue<UInt4> rhs)
5482         {
5483                 return RValue<UInt4>(Nucleus::createUDiv(lhs.value, rhs.value));
5484         }
5485
5486         RValue<UInt4> operator%(RValue<UInt4> lhs, RValue<UInt4> rhs)
5487         {
5488                 return RValue<UInt4>(Nucleus::createURem(lhs.value, rhs.value));
5489         }
5490
5491         RValue<UInt4> operator&(RValue<UInt4> lhs, RValue<UInt4> rhs)
5492         {
5493                 return RValue<UInt4>(Nucleus::createAnd(lhs.value, rhs.value));
5494         }
5495
5496         RValue<UInt4> operator|(RValue<UInt4> lhs, RValue<UInt4> rhs)
5497         {
5498                 return RValue<UInt4>(Nucleus::createOr(lhs.value, rhs.value));
5499         }
5500
5501         RValue<UInt4> operator^(RValue<UInt4> lhs, RValue<UInt4> rhs)
5502         {
5503                 return RValue<UInt4>(Nucleus::createXor(lhs.value, rhs.value));
5504         }
5505
5506         RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
5507         {
5508                 return RValue<UInt4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
5509         }
5510
5511         RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
5512         {
5513                 return RValue<UInt4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
5514         }
5515
5516         RValue<UInt4> operator<<(RValue<UInt4> lhs, RValue<UInt4> rhs)
5517         {
5518                 return RValue<UInt4>(Nucleus::createShl(lhs.value, rhs.value));
5519         }
5520
5521         RValue<UInt4> operator>>(RValue<UInt4> lhs, RValue<UInt4> rhs)
5522         {
5523                 return RValue<UInt4>(Nucleus::createLShr(lhs.value, rhs.value));
5524         }
5525
5526         RValue<UInt4> operator+=(UInt4 &lhs, RValue<UInt4> rhs)
5527         {
5528                 return lhs = lhs + rhs;
5529         }
5530
5531         RValue<UInt4> operator-=(UInt4 &lhs, RValue<UInt4> rhs)
5532         {
5533                 return lhs = lhs - rhs;
5534         }
5535
5536         RValue<UInt4> operator*=(UInt4 &lhs, RValue<UInt4> rhs)
5537         {
5538                 return lhs = lhs * rhs;
5539         }
5540
5541 //      RValue<UInt4> operator/=(UInt4 &lhs, RValue<UInt4> rhs)
5542 //      {
5543 //              return lhs = lhs / rhs;
5544 //      }
5545
5546 //      RValue<UInt4> operator%=(UInt4 &lhs, RValue<UInt4> rhs)
5547 //      {
5548 //              return lhs = lhs % rhs;
5549 //      }
5550
5551         RValue<UInt4> operator&=(UInt4 &lhs, RValue<UInt4> rhs)
5552         {
5553                 return lhs = lhs & rhs;
5554         }
5555
5556         RValue<UInt4> operator|=(UInt4 &lhs, RValue<UInt4> rhs)
5557         {
5558                 return lhs = lhs | rhs;
5559         }
5560
5561         RValue<UInt4> operator^=(UInt4 &lhs, RValue<UInt4> rhs)
5562         {
5563                 return lhs = lhs ^ rhs;
5564         }
5565
5566         RValue<UInt4> operator<<=(UInt4 &lhs, unsigned char rhs)
5567         {
5568                 return lhs = lhs << rhs;
5569         }
5570
5571         RValue<UInt4> operator>>=(UInt4 &lhs, unsigned char rhs)
5572         {
5573                 return lhs = lhs >> rhs;
5574         }
5575
5576         RValue<UInt4> operator+(RValue<UInt4> val)
5577         {
5578                 return val;
5579         }
5580
5581         RValue<UInt4> operator-(RValue<UInt4> val)
5582         {
5583                 return RValue<UInt4>(Nucleus::createNeg(val.value));
5584         }
5585
5586         RValue<UInt4> operator~(RValue<UInt4> val)
5587         {
5588                 return RValue<UInt4>(Nucleus::createNot(val.value));
5589         }
5590
5591         RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
5592         {
5593                 return RValue<UInt4>(Nucleus::createICmpEQ(x.value, y.value));
5594         }
5595
5596         RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
5597         {
5598                 return RValue<UInt4>(Nucleus::createICmpULT(x.value, y.value));
5599         }
5600
5601         RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
5602         {
5603                 return RValue<UInt4>(Nucleus::createICmpULE(x.value, y.value));
5604         }
5605
5606         RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
5607         {
5608                 return RValue<UInt4>(Nucleus::createICmpNE(x.value, y.value));
5609         }
5610
5611         RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
5612         {
5613                 return RValue<UInt4>(Nucleus::createICmpUGE(x.value, y.value));
5614         }
5615
5616         RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
5617         {
5618                 return RValue<UInt4>(Nucleus::createICmpUGT(x.value, y.value));
5619         }
5620
5621         RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
5622         {
5623                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
5624                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value, y.value);
5625                 ::basicBlock->appendInst(cmp);
5626
5627                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
5628                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
5629                 ::basicBlock->appendInst(select);
5630
5631                 return RValue<UInt4>(V(result));
5632         }
5633
5634         RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
5635         {
5636                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
5637                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value, y.value);
5638                 ::basicBlock->appendInst(cmp);
5639
5640                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
5641                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
5642                 ::basicBlock->appendInst(select);
5643
5644                 return RValue<UInt4>(V(result));
5645         }
5646
5647         RValue<UShort8> Pack(RValue<UInt4> x, RValue<UInt4> y)
5648         {
5649                 if(CPUID::SSE4_1)
5650                 {
5651                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
5652                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
5653                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
5654                         auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
5655                         pack->addArg(x.value);
5656                         pack->addArg(y.value);
5657                         ::basicBlock->appendInst(pack);
5658
5659                         return RValue<UShort8>(V(result));
5660                 }
5661                 else
5662                 {
5663                         RValue<Int4> sx = As<Int4>(x);
5664                         RValue<Int4> bx = (sx & ~(sx >> 31)) - Int4(0x8000);
5665
5666                         RValue<Int4> sy = As<Int4>(y);
5667                         RValue<Int4> by = (sy & ~(sy >> 31)) - Int4(0x8000);
5668
5669                         return As<UShort8>(Pack(bx, by) + Short8(0x8000u));
5670                 }
5671         }
5672
5673         Type *UInt4::getType()
5674         {
5675                 return T(Ice::IceType_v4i32);
5676         }
5677
5678         Float::Float(RValue<Int> cast)
5679         {
5680                 Value *integer = Nucleus::createSIToFP(cast.value, Float::getType());
5681
5682                 storeValue(integer);
5683         }
5684
5685         Float::Float(float x)
5686         {
5687                 storeValue(Nucleus::createConstantFloat(x));
5688         }
5689
5690         Float::Float(RValue<Float> rhs)
5691         {
5692                 storeValue(rhs.value);
5693         }
5694
5695         Float::Float(const Float &rhs)
5696         {
5697                 Value *value = rhs.loadValue();
5698                 storeValue(value);
5699         }
5700
5701         Float::Float(const Reference<Float> &rhs)
5702         {
5703                 Value *value = rhs.loadValue();
5704                 storeValue(value);
5705         }
5706
5707         RValue<Float> Float::operator=(RValue<Float> rhs)
5708         {
5709                 storeValue(rhs.value);
5710
5711                 return rhs;
5712         }
5713
5714         RValue<Float> Float::operator=(const Float &rhs)
5715         {
5716                 Value *value = rhs.loadValue();
5717                 storeValue(value);
5718
5719                 return RValue<Float>(value);
5720         }
5721
5722         RValue<Float> Float::operator=(const Reference<Float> &rhs)
5723         {
5724                 Value *value = rhs.loadValue();
5725                 storeValue(value);
5726
5727                 return RValue<Float>(value);
5728         }
5729
5730         RValue<Float> operator+(RValue<Float> lhs, RValue<Float> rhs)
5731         {
5732                 return RValue<Float>(Nucleus::createFAdd(lhs.value, rhs.value));
5733         }
5734
5735         RValue<Float> operator-(RValue<Float> lhs, RValue<Float> rhs)
5736         {
5737                 return RValue<Float>(Nucleus::createFSub(lhs.value, rhs.value));
5738         }
5739
5740         RValue<Float> operator*(RValue<Float> lhs, RValue<Float> rhs)
5741         {
5742                 return RValue<Float>(Nucleus::createFMul(lhs.value, rhs.value));
5743         }
5744
5745         RValue<Float> operator/(RValue<Float> lhs, RValue<Float> rhs)
5746         {
5747                 return RValue<Float>(Nucleus::createFDiv(lhs.value, rhs.value));
5748         }
5749
5750         RValue<Float> operator+=(Float &lhs, RValue<Float> rhs)
5751         {
5752                 return lhs = lhs + rhs;
5753         }
5754
5755         RValue<Float> operator-=(Float &lhs, RValue<Float> rhs)
5756         {
5757                 return lhs = lhs - rhs;
5758         }
5759
5760         RValue<Float> operator*=(Float &lhs, RValue<Float> rhs)
5761         {
5762                 return lhs = lhs * rhs;
5763         }
5764
5765         RValue<Float> operator/=(Float &lhs, RValue<Float> rhs)
5766         {
5767                 return lhs = lhs / rhs;
5768         }
5769
5770         RValue<Float> operator+(RValue<Float> val)
5771         {
5772                 return val;
5773         }
5774
5775         RValue<Float> operator-(RValue<Float> val)
5776         {
5777                 return RValue<Float>(Nucleus::createFNeg(val.value));
5778         }
5779
5780         RValue<Bool> operator<(RValue<Float> lhs, RValue<Float> rhs)
5781         {
5782                 return RValue<Bool>(Nucleus::createFCmpOLT(lhs.value, rhs.value));
5783         }
5784
5785         RValue<Bool> operator<=(RValue<Float> lhs, RValue<Float> rhs)
5786         {
5787                 return RValue<Bool>(Nucleus::createFCmpOLE(lhs.value, rhs.value));
5788         }
5789
5790         RValue<Bool> operator>(RValue<Float> lhs, RValue<Float> rhs)
5791         {
5792                 return RValue<Bool>(Nucleus::createFCmpOGT(lhs.value, rhs.value));
5793         }
5794
5795         RValue<Bool> operator>=(RValue<Float> lhs, RValue<Float> rhs)
5796         {
5797                 return RValue<Bool>(Nucleus::createFCmpOGE(lhs.value, rhs.value));
5798         }
5799
5800         RValue<Bool> operator!=(RValue<Float> lhs, RValue<Float> rhs)
5801         {
5802                 return RValue<Bool>(Nucleus::createFCmpONE(lhs.value, rhs.value));
5803         }
5804
5805         RValue<Bool> operator==(RValue<Float> lhs, RValue<Float> rhs)
5806         {
5807                 return RValue<Bool>(Nucleus::createFCmpOEQ(lhs.value, rhs.value));
5808         }
5809
5810         RValue<Float> Abs(RValue<Float> x)
5811         {
5812                 return IfThenElse(x > 0.0f, x, -x);
5813         }
5814
5815         RValue<Float> Max(RValue<Float> x, RValue<Float> y)
5816         {
5817                 return IfThenElse(x > y, x, y);
5818         }
5819
5820         RValue<Float> Min(RValue<Float> x, RValue<Float> y)
5821         {
5822                 return IfThenElse(x < y, x, y);
5823         }
5824
5825         RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
5826         {
5827                 return 1.0f / x;
5828         }
5829
5830         RValue<Float> RcpSqrt_pp(RValue<Float> x)
5831         {
5832                 return Rcp_pp(Sqrt(x));
5833         }
5834
5835         RValue<Float> Sqrt(RValue<Float> x)
5836         {
5837                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_f32);
5838                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
5839                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
5840                 auto sqrt = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
5841                 sqrt->addArg(x.value);
5842                 ::basicBlock->appendInst(sqrt);
5843
5844                 return RValue<Float>(V(result));
5845         }
5846
5847         RValue<Float> Round(RValue<Float> x)
5848         {
5849                 return Float4(Round(Float4(x))).x;
5850         }
5851
5852         RValue<Float> Trunc(RValue<Float> x)
5853         {
5854                 return Float4(Trunc(Float4(x))).x;
5855         }
5856
5857         RValue<Float> Frac(RValue<Float> x)
5858         {
5859                 return Float4(Frac(Float4(x))).x;
5860         }
5861
5862         RValue<Float> Floor(RValue<Float> x)
5863         {
5864                 return Float4(Floor(Float4(x))).x;
5865         }
5866
5867         RValue<Float> Ceil(RValue<Float> x)
5868         {
5869                 return Float4(Ceil(Float4(x))).x;
5870         }
5871
5872         Type *Float::getType()
5873         {
5874                 return T(Ice::IceType_f32);
5875         }
5876
5877         Float2::Float2(RValue<Float4> cast)
5878         {
5879                 storeValue(Nucleus::createBitCast(cast.value, getType()));
5880         }
5881
5882         Type *Float2::getType()
5883         {
5884                 return T(Type_v2f32);
5885         }
5886
5887         Float4::Float4(RValue<Byte4> cast) : FloatXYZW(this)
5888         {
5889                 Value *a = Int4(cast).loadValue();
5890                 Value *xyzw = Nucleus::createSIToFP(a, Float4::getType());
5891
5892                 storeValue(xyzw);
5893         }
5894
5895         Float4::Float4(RValue<SByte4> cast) : FloatXYZW(this)
5896         {
5897                 Value *a = Int4(cast).loadValue();
5898                 Value *xyzw = Nucleus::createSIToFP(a, Float4::getType());
5899
5900                 storeValue(xyzw);
5901         }
5902
5903         Float4::Float4(RValue<Short4> cast) : FloatXYZW(this)
5904         {
5905                 Int4 c(cast);
5906                 storeValue(Nucleus::createSIToFP(RValue<Int4>(c).value, Float4::getType()));
5907         }
5908
5909         Float4::Float4(RValue<UShort4> cast) : FloatXYZW(this)
5910         {
5911                 Int4 c(cast);
5912                 storeValue(Nucleus::createSIToFP(RValue<Int4>(c).value, Float4::getType()));
5913         }
5914
5915         Float4::Float4(RValue<Int4> cast) : FloatXYZW(this)
5916         {
5917                 Value *xyzw = Nucleus::createSIToFP(cast.value, Float4::getType());
5918
5919                 storeValue(xyzw);
5920         }
5921
5922         Float4::Float4(RValue<UInt4> cast) : FloatXYZW(this)
5923         {
5924                 RValue<Float4> result = Float4(Int4(cast & UInt4(0x7FFFFFFF))) +
5925                                         As<Float4>((As<Int4>(cast) >> 31) & As<Int4>(Float4(0x80000000u)));
5926
5927                 storeValue(result.value);
5928         }
5929
5930         Float4::Float4() : FloatXYZW(this)
5931         {
5932         }
5933
5934         Float4::Float4(float xyzw) : FloatXYZW(this)
5935         {
5936                 constant(xyzw, xyzw, xyzw, xyzw);
5937         }
5938
5939         Float4::Float4(float x, float yzw) : FloatXYZW(this)
5940         {
5941                 constant(x, yzw, yzw, yzw);
5942         }
5943
5944         Float4::Float4(float x, float y, float zw) : FloatXYZW(this)
5945         {
5946                 constant(x, y, zw, zw);
5947         }
5948
5949         Float4::Float4(float x, float y, float z, float w) : FloatXYZW(this)
5950         {
5951                 constant(x, y, z, w);
5952         }
5953
5954         void Float4::constant(float x, float y, float z, float w)
5955         {
5956                 double constantVector[4] = {x, y, z, w};
5957                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
5958         }
5959
5960         Float4::Float4(RValue<Float4> rhs) : FloatXYZW(this)
5961         {
5962                 storeValue(rhs.value);
5963         }
5964
5965         Float4::Float4(const Float4 &rhs) : FloatXYZW(this)
5966         {
5967                 Value *value = rhs.loadValue();
5968                 storeValue(value);
5969         }
5970
5971         Float4::Float4(const Reference<Float4> &rhs) : FloatXYZW(this)
5972         {
5973                 Value *value = rhs.loadValue();
5974                 storeValue(value);
5975         }
5976
5977         Float4::Float4(RValue<Float> rhs) : FloatXYZW(this)
5978         {
5979                 Value *vector = Nucleus::createBitCast(rhs.value, Float4::getType());
5980
5981                 int swizzle[4] = {0, 0, 0, 0};
5982                 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
5983
5984                 storeValue(replicate);
5985         }
5986
5987         Float4::Float4(const Float &rhs) : FloatXYZW(this)
5988         {
5989                 *this = RValue<Float>(rhs.loadValue());
5990         }
5991
5992         Float4::Float4(const Reference<Float> &rhs) : FloatXYZW(this)
5993         {
5994                 *this = RValue<Float>(rhs.loadValue());
5995         }
5996
5997         RValue<Float4> Float4::operator=(float x)
5998         {
5999                 return *this = Float4(x, x, x, x);
6000         }
6001
6002         RValue<Float4> Float4::operator=(RValue<Float4> rhs)
6003         {
6004                 storeValue(rhs.value);
6005
6006                 return rhs;
6007         }
6008
6009         RValue<Float4> Float4::operator=(const Float4 &rhs)
6010         {
6011                 Value *value = rhs.loadValue();
6012                 storeValue(value);
6013
6014                 return RValue<Float4>(value);
6015         }
6016
6017         RValue<Float4> Float4::operator=(const Reference<Float4> &rhs)
6018         {
6019                 Value *value = rhs.loadValue();
6020                 storeValue(value);
6021
6022                 return RValue<Float4>(value);
6023         }
6024
6025         RValue<Float4> Float4::operator=(RValue<Float> rhs)
6026         {
6027                 return *this = Float4(rhs);
6028         }
6029
6030         RValue<Float4> Float4::operator=(const Float &rhs)
6031         {
6032                 return *this = Float4(rhs);
6033         }
6034
6035         RValue<Float4> Float4::operator=(const Reference<Float> &rhs)
6036         {
6037                 return *this = Float4(rhs);
6038         }
6039
6040         RValue<Float4> operator+(RValue<Float4> lhs, RValue<Float4> rhs)
6041         {
6042                 return RValue<Float4>(Nucleus::createFAdd(lhs.value, rhs.value));
6043         }
6044
6045         RValue<Float4> operator-(RValue<Float4> lhs, RValue<Float4> rhs)
6046         {
6047                 return RValue<Float4>(Nucleus::createFSub(lhs.value, rhs.value));
6048         }
6049
6050         RValue<Float4> operator*(RValue<Float4> lhs, RValue<Float4> rhs)
6051         {
6052                 return RValue<Float4>(Nucleus::createFMul(lhs.value, rhs.value));
6053         }
6054
6055         RValue<Float4> operator/(RValue<Float4> lhs, RValue<Float4> rhs)
6056         {
6057                 return RValue<Float4>(Nucleus::createFDiv(lhs.value, rhs.value));
6058         }
6059
6060         RValue<Float4> operator%(RValue<Float4> lhs, RValue<Float4> rhs)
6061         {
6062                 return RValue<Float4>(Nucleus::createFRem(lhs.value, rhs.value));
6063         }
6064
6065         RValue<Float4> operator+=(Float4 &lhs, RValue<Float4> rhs)
6066         {
6067                 return lhs = lhs + rhs;
6068         }
6069
6070         RValue<Float4> operator-=(Float4 &lhs, RValue<Float4> rhs)
6071         {
6072                 return lhs = lhs - rhs;
6073         }
6074
6075         RValue<Float4> operator*=(Float4 &lhs, RValue<Float4> rhs)
6076         {
6077                 return lhs = lhs * rhs;
6078         }
6079
6080         RValue<Float4> operator/=(Float4 &lhs, RValue<Float4> rhs)
6081         {
6082                 return lhs = lhs / rhs;
6083         }
6084
6085         RValue<Float4> operator%=(Float4 &lhs, RValue<Float4> rhs)
6086         {
6087                 return lhs = lhs % rhs;
6088         }
6089
6090         RValue<Float4> operator+(RValue<Float4> val)
6091         {
6092                 return val;
6093         }
6094
6095         RValue<Float4> operator-(RValue<Float4> val)
6096         {
6097                 return RValue<Float4>(Nucleus::createFNeg(val.value));
6098         }
6099
6100         RValue<Float4> Abs(RValue<Float4> x)
6101         {
6102                 Value *vector = Nucleus::createBitCast(x.value, Int4::getType());
6103                 int64_t constantVector[4] = {0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF};
6104                 Value *result = Nucleus::createAnd(vector, V(Nucleus::createConstantVector(constantVector, Int4::getType())));
6105
6106                 return As<Float4>(result);
6107         }
6108
6109         RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
6110         {
6111                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
6112                 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Ogt, condition, x.value, y.value);
6113                 ::basicBlock->appendInst(cmp);
6114
6115                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
6116                 auto select = Ice::InstSelect::create(::function, result, condition, x.value, y.value);
6117                 ::basicBlock->appendInst(select);
6118
6119                 return RValue<Float4>(V(result));
6120         }
6121
6122         RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
6123         {
6124                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
6125                 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Olt, condition, x.value, y.value);
6126                 ::basicBlock->appendInst(cmp);
6127
6128                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
6129                 auto select = Ice::InstSelect::create(::function, result, condition, x.value, y.value);
6130                 ::basicBlock->appendInst(select);
6131
6132                 return RValue<Float4>(V(result));
6133         }
6134
6135         RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
6136         {
6137                 return Float4(1.0f) / x;
6138         }
6139
6140         RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
6141         {
6142                 return Rcp_pp(Sqrt(x));
6143         }
6144
6145         RValue<Float4> Sqrt(RValue<Float4> x)
6146         {
6147                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
6148                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
6149                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
6150                 auto sqrt = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
6151                 sqrt->addArg(x.value);
6152                 ::basicBlock->appendInst(sqrt);
6153
6154                 return RValue<Float4>(V(result));
6155         }
6156
6157         RValue<Float4> Insert(RValue<Float4> x, RValue<Float> element, int i)
6158         {
6159                 return RValue<Float4>(Nucleus::createInsertElement(x.value, element.value, i));
6160         }
6161
6162         RValue<Float> Extract(RValue<Float4> x, int i)
6163         {
6164                 return RValue<Float>(Nucleus::createExtractElement(x.value, Float::getType(), i));
6165         }
6166
6167         RValue<Float4> Swizzle(RValue<Float4> x, unsigned char select)
6168         {
6169                 return RValue<Float4>(createSwizzle4(x.value, select));
6170         }
6171
6172         RValue<Float4> ShuffleLowHigh(RValue<Float4> x, RValue<Float4> y, unsigned char imm)
6173         {
6174                 int shuffle[4] =
6175                 {
6176                         ((imm >> 0) & 0x03) + 0,
6177                         ((imm >> 2) & 0x03) + 0,
6178                         ((imm >> 4) & 0x03) + 4,
6179                         ((imm >> 6) & 0x03) + 4,
6180                 };
6181
6182                 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
6183         }
6184
6185         RValue<Float4> UnpackLow(RValue<Float4> x, RValue<Float4> y)
6186         {
6187                 int shuffle[4] = {0, 4, 1, 5};
6188                 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
6189         }
6190
6191         RValue<Float4> UnpackHigh(RValue<Float4> x, RValue<Float4> y)
6192         {
6193                 int shuffle[4] = {2, 6, 3, 7};
6194                 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
6195         }
6196
6197         RValue<Float4> Mask(Float4 &lhs, RValue<Float4> rhs, unsigned char select)
6198         {
6199                 Value *vector = lhs.loadValue();
6200                 Value *result = createMask4(vector, rhs.value, select);
6201                 lhs.storeValue(result);
6202
6203                 return RValue<Float4>(result);
6204         }
6205
6206         RValue<Int> SignMask(RValue<Float4> x)
6207         {
6208                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
6209                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
6210                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
6211                 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
6212                 movmsk->addArg(x.value);
6213                 ::basicBlock->appendInst(movmsk);
6214
6215                 return RValue<Int>(V(result));
6216         }
6217
6218         RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
6219         {
6220                 return RValue<Int4>(Nucleus::createFCmpOEQ(x.value, y.value));
6221         }
6222
6223         RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
6224         {
6225                 return RValue<Int4>(Nucleus::createFCmpOLT(x.value, y.value));
6226         }
6227
6228         RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
6229         {
6230                 return RValue<Int4>(Nucleus::createFCmpOLE(x.value, y.value));
6231         }
6232
6233         RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
6234         {
6235                 return RValue<Int4>(Nucleus::createFCmpONE(x.value, y.value));
6236         }
6237
6238         RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
6239         {
6240                 return RValue<Int4>(Nucleus::createFCmpOGE(x.value, y.value));
6241         }
6242
6243         RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
6244         {
6245                 return RValue<Int4>(Nucleus::createFCmpOGT(x.value, y.value));
6246         }
6247
6248         RValue<Float4> Round(RValue<Float4> x)
6249         {
6250                 if(CPUID::SSE4_1)
6251                 {
6252                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
6253                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
6254                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
6255                         auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
6256                         round->addArg(x.value);
6257                         round->addArg(::context->getConstantInt32(0));
6258                         ::basicBlock->appendInst(round);
6259
6260                         return RValue<Float4>(V(result));
6261                 }
6262                 else
6263                 {
6264                         return Float4(RoundInt(x));
6265                 }
6266         }
6267
6268         RValue<Float4> Trunc(RValue<Float4> x)
6269         {
6270                 if(CPUID::SSE4_1)
6271                 {
6272                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
6273                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
6274                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
6275                         auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
6276                         round->addArg(x.value);
6277                         round->addArg(::context->getConstantInt32(3));
6278                         ::basicBlock->appendInst(round);
6279
6280                         return RValue<Float4>(V(result));
6281                 }
6282                 else
6283                 {
6284                         return Float4(Int4(x));
6285                 }
6286         }
6287
6288         RValue<Float4> Frac(RValue<Float4> x)
6289         {
6290                 if(CPUID::SSE4_1)
6291                 {
6292                         return x - Floor(x);
6293                 }
6294                 else
6295                 {
6296                         Float4 frc = x - Float4(Int4(x));   // Signed fractional part
6297
6298                         return frc + As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1, 1, 1, 1)));
6299                 }
6300         }
6301
6302         RValue<Float4> Floor(RValue<Float4> x)
6303         {
6304                 if(CPUID::SSE4_1)
6305                 {
6306                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
6307                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
6308                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
6309                         auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
6310                         round->addArg(x.value);
6311                         round->addArg(::context->getConstantInt32(1));
6312                         ::basicBlock->appendInst(round);
6313
6314                         return RValue<Float4>(V(result));
6315                 }
6316                 else
6317                 {
6318                         return x - Frac(x);
6319                 }
6320         }
6321
6322         RValue<Float4> Ceil(RValue<Float4> x)
6323         {
6324                 if(CPUID::SSE4_1)
6325                 {
6326                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
6327                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
6328                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
6329                         auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
6330                         round->addArg(x.value);
6331                         round->addArg(::context->getConstantInt32(2));
6332                         ::basicBlock->appendInst(round);
6333
6334                         return RValue<Float4>(V(result));
6335                 }
6336                 else
6337                 {
6338                         return -Floor(-x);
6339                 }
6340         }
6341
6342         Type *Float4::getType()
6343         {
6344                 return T(Ice::IceType_v4f32);
6345         }
6346
6347         RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, int offset)
6348         {
6349                 return lhs + RValue<Int>(Nucleus::createConstantInt(offset));
6350         }
6351
6352         RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<Int> offset)
6353         {
6354                 return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), offset.value, false));
6355         }
6356
6357         RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<UInt> offset)
6358         {
6359                 return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), offset.value, true));
6360         }
6361
6362         RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, int offset)
6363         {
6364                 return lhs = lhs + offset;
6365         }
6366
6367         RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, RValue<Int> offset)
6368         {
6369                 return lhs = lhs + offset;
6370         }
6371
6372         RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, RValue<UInt> offset)
6373         {
6374                 return lhs = lhs + offset;
6375         }
6376
6377         RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, int offset)
6378         {
6379                 return lhs + -offset;
6380         }
6381
6382         RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, RValue<Int> offset)
6383         {
6384                 return lhs + -offset;
6385         }
6386
6387         RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, RValue<UInt> offset)
6388         {
6389                 return lhs + -offset;
6390         }
6391
6392         RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, int offset)
6393         {
6394                 return lhs = lhs - offset;
6395         }
6396
6397         RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, RValue<Int> offset)
6398         {
6399                 return lhs = lhs - offset;
6400         }
6401
6402         RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, RValue<UInt> offset)
6403         {
6404                 return lhs = lhs - offset;
6405         }
6406
6407         void Return()
6408         {
6409                 Nucleus::createRetVoid();
6410                 Nucleus::setInsertBlock(Nucleus::createBasicBlock());
6411                 Nucleus::createUnreachable();
6412         }
6413
6414         void Return(RValue<Int> ret)
6415         {
6416                 Nucleus::createRet(ret.value);
6417                 Nucleus::setInsertBlock(Nucleus::createBasicBlock());
6418                 Nucleus::createUnreachable();
6419         }
6420
6421         bool branch(RValue<Bool> cmp, BasicBlock *bodyBB, BasicBlock *endBB)
6422         {
6423                 Nucleus::createCondBr(cmp.value, bodyBB, endBB);
6424                 Nucleus::setInsertBlock(bodyBB);
6425
6426                 return true;
6427         }
6428
6429         RValue<Long> Ticks()
6430         {
6431                 assert(false && "UNIMPLEMENTED"); return RValue<Long>(V(nullptr));
6432         }
6433 }