src/Reactor/SubzeroReactor.cpp

   1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
   2 //
   3 // Licensed under the Apache License, Version 2.0 (the "License");
   4 // you may not use this file except in compliance with the License.
   5 // You may obtain a copy of the License at
   6 //
   7 //    http://www.apache.org/licenses/LICENSE-2.0
   8 //
   9 // Unless required by applicable law or agreed to in writing, software
  10 // distributed under the License is distributed on an "AS IS" BASIS,
  11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 // See the License for the specific language governing permissions and
  13 // limitations under the License.
  14
  15 #include "Nucleus.hpp"
  16
  17 #include "Reactor.hpp"
  18 #include "Routine.hpp"
  19
  20 #include "Optimizer.hpp"
  21
  22 #include "src/IceTypes.h"
  23 #include "src/IceCfg.h"
  24 #include "src/IceELFStreamer.h"
  25 #include "src/IceGlobalContext.h"
  26 #include "src/IceCfgNode.h"
  27 #include "src/IceELFObjectWriter.h"
  28 #include "src/IceGlobalInits.h"
  29
  30 #include "llvm/Support/FileSystem.h"
  31 #include "llvm/Support/raw_os_ostream.h"
  32
  33 #if defined(_WIN32)
  34 #define WIN32_LEAN_AND_MEAN
  35 #define NOMINMAX
  36 #include <Windows.h>
  37 #else
  38 #include <sys/mman.h>
  39 #endif
  40
  41 #include <mutex>
  42 #include <limits>
  43 #include <iostream>
  44 #include <cassert>
  45
  46 namespace
  47 {
  48         Ice::GlobalContext *context = nullptr;
  49         Ice::Cfg *function = nullptr;
  50         Ice::CfgNode *basicBlock = nullptr;
  51         Ice::CfgLocalAllocatorScope *allocator = nullptr;
  52         sw::Routine *routine = nullptr;
  53
  54         std::mutex codegenMutex;
  55
  56         Ice::ELFFileStreamer *elfFile = nullptr;
  57         Ice::Fdstream *out = nullptr;
  58 }
  59
  60 namespace sw
  61 {
  62         enum EmulatedType
  63         {
  64                 EmulatedShift = 16,
  65                 EmulatedV2 = 2 << EmulatedShift,
  66                 EmulatedV4 = 4 << EmulatedShift,
  67                 EmulatedV8 = 8 << EmulatedShift,
  68                 EmulatedBits = EmulatedV2 | EmulatedV4 | EmulatedV8,
  69
  70                 Type_v2i32 = Ice::IceType_v4i32 | EmulatedV2,
  71                 Type_v4i16 = Ice::IceType_v8i16 | EmulatedV4,
  72                 Type_v2i16 = Ice::IceType_v8i16 | EmulatedV2,
  73                 Type_v8i8 =  Ice::IceType_v16i8 | EmulatedV8,
  74                 Type_v4i8 =  Ice::IceType_v16i8 | EmulatedV4,
  75                 Type_v2f32 = Ice::IceType_v4f32 | EmulatedV2,
  76         };
  77
  78         class Value : public Ice::Operand {};
  79         class SwitchCases : public Ice::InstSwitch {};
  80         class BasicBlock : public Ice::CfgNode {};
  81
  82         Ice::Type T(Type *t)
  83         {
  84                 static_assert(Ice::IceType_NUM < EmulatedBits, "Ice::Type overlaps with our emulated types!");
  85                 return (Ice::Type)(reinterpret_cast<std::intptr_t>(t) & ~EmulatedBits);
  86         }
  87
  88         Type *T(Ice::Type t)
  89         {
  90                 return reinterpret_cast<Type*>(t);
  91         }
  92
  93         Type *T(EmulatedType t)
  94         {
  95                 return reinterpret_cast<Type*>(t);
  96         }
  97
  98         Value *V(Ice::Operand *v)
  99         {
 100                 return reinterpret_cast<Value*>(v);
 101         }
 102
 103         BasicBlock *B(Ice::CfgNode *b)
 104         {
 105                 return reinterpret_cast<BasicBlock*>(b);
 106         }
 107
 108         Optimization optimization[10] = {InstructionCombining, Disabled};
 109
 110         using ElfHeader = std::conditional<sizeof(void*) == 8, Elf64_Ehdr, Elf32_Ehdr>::type;
 111         using SectionHeader = std::conditional<sizeof(void*) == 8, Elf64_Shdr, Elf32_Shdr>::type;
 112
 113         inline const SectionHeader *sectionHeader(const ElfHeader *elfHeader)
 114         {
 115                 return reinterpret_cast<const SectionHeader*>((intptr_t)elfHeader + elfHeader->e_shoff);
 116         }
 117
 118         inline const SectionHeader *elfSection(const ElfHeader *elfHeader, int index)
 119         {
 120                 return &sectionHeader(elfHeader)[index];
 121         }
 122
 123         static void *relocateSymbol(const ElfHeader *elfHeader, const Elf32_Rel &relocation, const SectionHeader &relocationTable)
 124         {
 125                 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
 126
 127                 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
 128                 int32_t *patchSite = (int*)(address + relocation.r_offset);
 129                 uint32_t index = relocation.getSymbol();
 130                 int table = relocationTable.sh_link;
 131                 void *symbolValue = nullptr;
 132
 133                 if(index != SHN_UNDEF)
 134                 {
 135                         if(table == SHN_UNDEF) return nullptr;
 136                         const SectionHeader *symbolTable = elfSection(elfHeader, table);
 137
 138                         uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
 139                         if(index >= symtab_entries)
 140                         {
 141                                 assert(index < symtab_entries && "Symbol Index out of range");
 142                                 return nullptr;
 143                         }
 144
 145                         intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
 146                         Elf32_Sym &symbol = ((Elf32_Sym*)symbolAddress)[index];
 147                         uint16_t section = symbol.st_shndx;
 148
 149                         if(section != SHN_UNDEF && section < SHN_LORESERVE)
 150                         {
 151                                 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
 152                                 symbolValue = reinterpret_cast<void*>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
 153                         }
 154                         else
 155                         {
 156                                 return nullptr;
 157                         }
 158                 }
 159
 160                 switch(relocation.getType())
 161                 {
 162                 case R_386_NONE:
 163                         // No relocation
 164                         break;
 165                 case R_386_32:
 166                         *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite);
 167                         break;
 168         //      case R_386_PC32:
 169         //              *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite - (intptr_t)patchSite);
 170         //              break;
 171                 default:
 172                         assert(false && "Unsupported relocation type");
 173                         return nullptr;
 174                 }
 175
 176                 return symbolValue;
 177         }
 178
 179         static void *relocateSymbol(const ElfHeader *elfHeader, const Elf64_Rela &relocation, const SectionHeader &relocationTable)
 180         {
 181                 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
 182
 183                 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
 184                 int32_t *patchSite = (int*)(address + relocation.r_offset);
 185                 uint32_t index = relocation.getSymbol();
 186                 int table = relocationTable.sh_link;
 187                 void *symbolValue = nullptr;
 188
 189                 if(index != SHN_UNDEF)
 190                 {
 191                         if(table == SHN_UNDEF) return nullptr;
 192                         const SectionHeader *symbolTable = elfSection(elfHeader, table);
 193
 194                         uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
 195                         if(index >= symtab_entries)
 196                         {
 197                                 assert(index < symtab_entries && "Symbol Index out of range");
 198                                 return nullptr;
 199                         }
 200
 201                         intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
 202                         Elf64_Sym &symbol = ((Elf64_Sym*)symbolAddress)[index];
 203                         uint16_t section = symbol.st_shndx;
 204
 205                         if(section != SHN_UNDEF && section < SHN_LORESERVE)
 206                         {
 207                                 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
 208                                 symbolValue = reinterpret_cast<void*>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
 209                         }
 210                         else
 211                         {
 212                                 return nullptr;
 213                         }
 214                 }
 215
 216                 switch(relocation.getType())
 217                 {
 218                 case R_X86_64_NONE:
 219                         // No relocation
 220                         break;
 221                 case R_X86_64_64:
 222                         *(int64_t*)patchSite = (int64_t)((intptr_t)symbolValue + *(int64_t*)patchSite) + relocation.r_addend;
 223                         break;
 224                 case R_X86_64_PC32:
 225                         *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite - (intptr_t)patchSite) + relocation.r_addend;
 226                         break;
 227                 case R_X86_64_32S:
 228                         *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite) + relocation.r_addend;
 229                         break;
 230                 default:
 231                         assert(false && "Unsupported relocation type");
 232                         return nullptr;
 233                 }
 234
 235                 return symbolValue;
 236         }
 237
 238         void *loadImage(uint8_t *const elfImage)
 239         {
 240                 ElfHeader *elfHeader = (ElfHeader*)elfImage;
 241
 242                 if(!elfHeader->checkMagic())
 243                 {
 244                         return nullptr;
 245                 }
 246
 247                 // Expect ELF bitness to match platform
 248                 assert(sizeof(void*) == 8 ? elfHeader->getFileClass() == ELFCLASS64 : elfHeader->getFileClass() == ELFCLASS32);
 249                 assert(sizeof(void*) == 8 ? elfHeader->e_machine == EM_X86_64 : elfHeader->e_machine == EM_386);
 250
 251                 SectionHeader *sectionHeader = (SectionHeader*)(elfImage + elfHeader->e_shoff);
 252                 void *entry = nullptr;
 253
 254                 for(int i = 0; i < elfHeader->e_shnum; i++)
 255                 {
 256                         if(sectionHeader[i].sh_type == SHT_PROGBITS)
 257                         {
 258                                 if(sectionHeader[i].sh_flags & SHF_EXECINSTR)
 259                                 {
 260                                         entry = elfImage + sectionHeader[i].sh_offset;
 261                                 }
 262                         }
 263                         else if(sectionHeader[i].sh_type == SHT_REL)
 264                         {
 265                                 assert(sizeof(void*) == 4 && "UNIMPLEMENTED");   // Only expected/implemented for 32-bit code
 266
 267                                 for(int index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
 268                                 {
 269                                         const Elf32_Rel &relocation = ((const Elf32_Rel*)(elfImage + sectionHeader[i].sh_offset))[index];
 270                                         void *symbol = relocateSymbol(elfHeader, relocation, sectionHeader[i]);
 271                                 }
 272                         }
 273                         else if(sectionHeader[i].sh_type == SHT_RELA)
 274                         {
 275                                 assert(sizeof(void*) == 8 && "UNIMPLEMENTED");   // Only expected/implemented for 64-bit code
 276
 277                                 for(int index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
 278                                 {
 279                                         const Elf64_Rela &relocation = ((const Elf64_Rela*)(elfImage + sectionHeader[i].sh_offset))[index];
 280                                         void *symbol = relocateSymbol(elfHeader, relocation, sectionHeader[i]);
 281                                 }
 282                         }
 283                 }
 284
 285                 return entry;
 286         }
 287
 288         template<typename T>
 289         struct ExecutableAllocator
 290         {
 291                 ExecutableAllocator() {};
 292                 template<class U> ExecutableAllocator(const ExecutableAllocator<U> &other) {};
 293
 294                 using value_type = T;
 295                 using size_type = std::size_t;
 296
 297                 T *allocate(size_type n)
 298                 {
 299                         #if defined(_WIN32)
 300                                 return (T*)VirtualAlloc(NULL, sizeof(T) * n, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
 301                         #else
 302                                 return (T*)mmap(nullptr, sizeof(T) * n, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
 303                         #endif
 304                 }
 305
 306                 void deallocate(T *p, size_type n)
 307                 {
 308                         #if defined(_WIN32)
 309                                 VirtualFree(p, 0, MEM_RELEASE);
 310                         #else
 311                                 munmap(p, sizeof(T) * n);
 312                         #endif
 313                 }
 314         };
 315
 316         class ELFMemoryStreamer : public Ice::ELFStreamer, public Routine
 317         {
 318                 ELFMemoryStreamer(const ELFMemoryStreamer &) = delete;
 319                 ELFMemoryStreamer &operator=(const ELFMemoryStreamer &) = delete;
 320
 321         public:
 322                 ELFMemoryStreamer() : Routine(), entry(nullptr)
 323                 {
 324                         position = 0;
 325                         buffer.reserve(0x1000);
 326                 }
 327
 328                 virtual ~ELFMemoryStreamer()
 329                 {
 330                         #if defined(_WIN32)
 331                                 if(buffer.size() != 0)
 332                                 {
 333                                         DWORD exeProtection;
 334                                         VirtualProtect(&buffer[0], buffer.size(), oldProtection, &exeProtection);
 335                                 }
 336                         #endif
 337                 }
 338
 339                 void write8(uint8_t Value) override
 340                 {
 341                         if(position == (uint64_t)buffer.size())
 342                         {
 343                                 buffer.push_back(Value);
 344                                 position++;
 345                         }
 346                         else if(position < (uint64_t)buffer.size())
 347                         {
 348                                 buffer[position] = Value;
 349                                 position++;
 350                         }
 351                         else assert(false && "UNIMPLEMENTED");
 352                 }
 353
 354                 void writeBytes(llvm::StringRef Bytes) override
 355                 {
 356                         std::size_t oldSize = buffer.size();
 357                         buffer.resize(oldSize + Bytes.size());
 358                         memcpy(&buffer[oldSize], Bytes.begin(), Bytes.size());
 359                         position += Bytes.size();
 360                 }
 361
 362                 uint64_t tell() const override { return position; }
 363
 364                 void seek(uint64_t Off) override { position = Off; }
 365
 366                 const void *getEntry() override
 367                 {
 368                         if(!entry)
 369                         {
 370                                 #if defined(_WIN32)
 371                                         VirtualProtect(&buffer[0], buffer.size(), PAGE_EXECUTE_READWRITE, &oldProtection);
 372                                 #else
 373                                         mprotect(&buffer[0], buffer.size(), PROT_READ | PROT_WRITE | PROT_EXEC);
 374                                 #endif
 375
 376                                 position = std::numeric_limits<std::size_t>::max();   // Can't stream more data after this
 377
 378                                 entry = loadImage(&buffer[0]);
 379                         }
 380
 381                         return entry;
 382                 }
 383
 384         private:
 385                 void *entry;
 386                 std::vector<uint8_t, ExecutableAllocator<uint8_t>> buffer;
 387                 std::size_t position;
 388
 389                 #if defined(_WIN32)
 390                 DWORD oldProtection;
 391                 #endif
 392         };
 393
 394         Nucleus::Nucleus()
 395         {
 396                 ::codegenMutex.lock();   // Reactor is currently not thread safe
 397
 398                 Ice::ClFlags &Flags = Ice::ClFlags::Flags;
 399                 Ice::ClFlags::getParsedClFlags(Flags);
 400
 401                 Flags.setTargetArch(sizeof(void*) == 8 ? Ice::Target_X8664 : Ice::Target_X8632);
 402                 Flags.setOutFileType(Ice::FT_Elf);
 403                 Flags.setOptLevel(Ice::Opt_2);
 404                 Flags.setApplicationBinaryInterface(Ice::ABI_Platform);
 405                 Flags.setTargetInstructionSet(Ice::X86InstructionSet_SSE4_1);
 406                 Flags.setVerbose(false ? Ice::IceV_All : Ice::IceV_None);
 407
 408                 static llvm::raw_os_ostream cout(std::cout);
 409                 static llvm::raw_os_ostream cerr(std::cerr);
 410
 411                 if(false)   // Write out to a file
 412                 {
 413                         std::error_code errorCode;
 414                         ::out = new Ice::Fdstream("out.o", errorCode, llvm::sys::fs::F_None);
 415                         ::elfFile = new Ice::ELFFileStreamer(*out);
 416                         ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfFile);
 417                 }
 418                 else
 419                 {
 420                         ELFMemoryStreamer *elfMemory = new ELFMemoryStreamer();
 421                         ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfMemory);
 422                         ::routine = elfMemory;
 423                 }
 424         }
 425
 426         Nucleus::~Nucleus()
 427         {
 428                 delete ::allocator;
 429                 delete ::function;
 430                 delete ::context;
 431
 432                 delete ::elfFile;
 433                 delete ::out;
 434
 435                 ::codegenMutex.unlock();
 436         }
 437
 438         Routine *Nucleus::acquireRoutine(const wchar_t *name, bool runOptimizations)
 439         {
 440                 if(basicBlock->getInsts().empty() || basicBlock->getInsts().back().getKind() != Ice::Inst::Ret)
 441                 {
 442                         createRetVoid();
 443                 }
 444
 445                 std::wstring wideName(name);
 446                 std::string asciiName(wideName.begin(), wideName.end());
 447                 ::function->setFunctionName(Ice::GlobalString::createWithString(::context, asciiName));
 448
 449                 optimize();
 450
 451                 ::function->translate();
 452                 assert(!::function->hasError());
 453
 454                 auto *globals = ::function->getGlobalInits().release();
 455
 456                 if(globals && !globals->empty())
 457                 {
 458                         ::context->getGlobals()->merge(globals);
 459                 }
 460
 461                 ::context->emitFileHeader();
 462                 ::function->emitIAS();
 463                 auto assembler = ::function->releaseAssembler();
 464                 auto objectWriter = ::context->getObjectWriter();
 465                 assembler->alignFunction();
 466                 objectWriter->writeFunctionCode(::function->getFunctionName(), false, assembler.get());
 467                 ::context->lowerGlobals("last");
 468                 ::context->lowerConstants();
 469                 ::context->lowerJumpTables();
 470                 objectWriter->setUndefinedSyms(::context->getConstantExternSyms());
 471                 objectWriter->writeNonUserSections();
 472
 473                 return ::routine;
 474         }
 475
 476         void Nucleus::optimize()
 477         {
 478                 sw::optimize(::function);
 479         }
 480
 481         Value *Nucleus::allocateStackVariable(Type *t, int arraySize)
 482         {
 483                 Ice::Type type = T(t);
 484                 int typeSize = Ice::typeWidthInBytes(type);
 485                 int totalSize = typeSize * (arraySize ? arraySize : 1);
 486
 487                 auto bytes = Ice::ConstantInteger32::create(::context, type, totalSize);
 488                 auto address = ::function->makeVariable(T(getPointerType(t)));
 489                 auto alloca = Ice::InstAlloca::create(::function, address, bytes, typeSize);
 490                 ::function->getEntryNode()->getInsts().push_front(alloca);
 491
 492                 return V(address);
 493         }
 494
 495         BasicBlock *Nucleus::createBasicBlock()
 496         {
 497                 return B(::function->makeNode());
 498         }
 499
 500         BasicBlock *Nucleus::getInsertBlock()
 501         {
 502                 return B(::basicBlock);
 503         }
 504
 505         void Nucleus::setInsertBlock(BasicBlock *basicBlock)
 506         {
 507         //      assert(::basicBlock->getInsts().back().getTerminatorEdges().size() >= 0 && "Previous basic block must have a terminator");
 508                 ::basicBlock = basicBlock;
 509         }
 510
 511         void Nucleus::createFunction(Type *ReturnType, std::vector<Type*> &Params)
 512         {
 513                 uint32_t sequenceNumber = 0;
 514                 ::function = Ice::Cfg::create(::context, sequenceNumber).release();
 515                 ::allocator = new Ice::CfgLocalAllocatorScope(::function);
 516
 517                 for(Type *type : Params)
 518                 {
 519                         Ice::Variable *arg = ::function->makeVariable(T(type));
 520                         ::function->addArg(arg);
 521                 }
 522
 523                 Ice::CfgNode *node = ::function->makeNode();
 524                 ::function->setEntryNode(node);
 525                 ::basicBlock = node;
 526         }
 527
 528         Value *Nucleus::getArgument(unsigned int index)
 529         {
 530                 return V(::function->getArgs()[index]);
 531         }
 532
 533         void Nucleus::createRetVoid()
 534         {
 535                 Ice::InstRet *ret = Ice::InstRet::create(::function);
 536                 ::basicBlock->appendInst(ret);
 537         }
 538
 539         void Nucleus::createRet(Value *v)
 540         {
 541                 Ice::InstRet *ret = Ice::InstRet::create(::function, v);
 542                 ::basicBlock->appendInst(ret);
 543         }
 544
 545         void Nucleus::createBr(BasicBlock *dest)
 546         {
 547                 auto br = Ice::InstBr::create(::function, dest);
 548                 ::basicBlock->appendInst(br);
 549         }
 550
 551         void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
 552         {
 553                 auto br = Ice::InstBr::create(::function, cond, ifTrue, ifFalse);
 554                 ::basicBlock->appendInst(br);
 555         }
 556
 557         static Value *createArithmetic(Ice::InstArithmetic::OpKind op, Value *lhs, Value *rhs)
 558         {
 559                 assert(lhs->getType() == rhs->getType() || (llvm::isa<Ice::Constant>(rhs) && (op == Ice::InstArithmetic::Shl || Ice::InstArithmetic::Lshr || Ice::InstArithmetic::Ashr)));
 560
 561                 Ice::Variable *result = ::function->makeVariable(lhs->getType());
 562                 Ice::InstArithmetic *arithmetic = Ice::InstArithmetic::create(::function, op, result, lhs, rhs);
 563                 ::basicBlock->appendInst(arithmetic);
 564
 565                 return V(result);
 566         }
 567
 568         Value *Nucleus::createAdd(Value *lhs, Value *rhs)
 569         {
 570                 return createArithmetic(Ice::InstArithmetic::Add, lhs, rhs);
 571         }
 572
 573         Value *Nucleus::createSub(Value *lhs, Value *rhs)
 574         {
 575                 return createArithmetic(Ice::InstArithmetic::Sub, lhs, rhs);
 576         }
 577
 578         Value *Nucleus::createMul(Value *lhs, Value *rhs)
 579         {
 580                 return createArithmetic(Ice::InstArithmetic::Mul, lhs, rhs);
 581         }
 582
 583         Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
 584         {
 585                 return createArithmetic(Ice::InstArithmetic::Udiv, lhs, rhs);
 586         }
 587
 588         Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
 589         {
 590                 return createArithmetic(Ice::InstArithmetic::Sdiv, lhs, rhs);
 591         }
 592
 593         Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
 594         {
 595                 return createArithmetic(Ice::InstArithmetic::Fadd, lhs, rhs);
 596         }
 597
 598         Value *Nucleus::createFSub(Value *lhs, Value *rhs)
 599         {
 600                 return createArithmetic(Ice::InstArithmetic::Fsub, lhs, rhs);
 601         }
 602
 603         Value *Nucleus::createFMul(Value *lhs, Value *rhs)
 604         {
 605                 return createArithmetic(Ice::InstArithmetic::Fmul, lhs, rhs);
 606         }
 607
 608         Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
 609         {
 610                 return createArithmetic(Ice::InstArithmetic::Fdiv, lhs, rhs);
 611         }
 612
 613         Value *Nucleus::createURem(Value *lhs, Value *rhs)
 614         {
 615                 return createArithmetic(Ice::InstArithmetic::Urem, lhs, rhs);
 616         }
 617
 618         Value *Nucleus::createSRem(Value *lhs, Value *rhs)
 619         {
 620                 return createArithmetic(Ice::InstArithmetic::Srem, lhs, rhs);
 621         }
 622
 623         Value *Nucleus::createFRem(Value *lhs, Value *rhs)
 624         {
 625                 return createArithmetic(Ice::InstArithmetic::Frem, lhs, rhs);
 626         }
 627
 628         Value *Nucleus::createShl(Value *lhs, Value *rhs)
 629         {
 630                 return createArithmetic(Ice::InstArithmetic::Shl, lhs, rhs);
 631         }
 632
 633         Value *Nucleus::createLShr(Value *lhs, Value *rhs)
 634         {
 635                 return createArithmetic(Ice::InstArithmetic::Lshr, lhs, rhs);
 636         }
 637
 638         Value *Nucleus::createAShr(Value *lhs, Value *rhs)
 639         {
 640                 return createArithmetic(Ice::InstArithmetic::Ashr, lhs, rhs);
 641         }
 642
 643         Value *Nucleus::createAnd(Value *lhs, Value *rhs)
 644         {
 645                 return createArithmetic(Ice::InstArithmetic::And, lhs, rhs);
 646         }
 647
 648         Value *Nucleus::createOr(Value *lhs, Value *rhs)
 649         {
 650                 return createArithmetic(Ice::InstArithmetic::Or, lhs, rhs);
 651         }
 652
 653         Value *Nucleus::createXor(Value *lhs, Value *rhs)
 654         {
 655                 return createArithmetic(Ice::InstArithmetic::Xor, lhs, rhs);
 656         }
 657
 658         static Ice::Variable *createAssign(Ice::Operand *constant)
 659         {
 660                 Ice::Variable *value = ::function->makeVariable(constant->getType());
 661                 auto assign = Ice::InstAssign::create(::function, value, constant);
 662                 ::basicBlock->appendInst(assign);
 663
 664                 return value;
 665         }
 666
 667         Value *Nucleus::createNeg(Value *v)
 668         {
 669                 return createSub(createNullValue(T(v->getType())), v);
 670         }
 671
 672         Value *Nucleus::createFNeg(Value *v)
 673         {
 674                 double c[4] = {-0.0, -0.0, -0.0, -0.0};
 675                 Value *negativeZero = Ice::isVectorType(v->getType()) ?
 676                                       createConstantVector(c, T(v->getType())) :
 677                                       V(::context->getConstantFloat(-0.0f));
 678
 679                 return createFSub(negativeZero, v);
 680         }
 681
 682         Value *Nucleus::createNot(Value *v)
 683         {
 684                 if(Ice::isScalarIntegerType(v->getType()))
 685                 {
 686                         return createXor(v, V(::context->getConstantInt(v->getType(), -1)));
 687                 }
 688                 else   // Vector
 689                 {
 690                         int64_t c[4] = {-1, -1, -1, -1};
 691                         return createXor(v, createConstantVector(c, T(v->getType())));
 692                 }
 693         }
 694
 695         Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int align)
 696         {
 697                 int valueType = (int)reinterpret_cast<intptr_t>(type);
 698                 Ice::Variable *result = ::function->makeVariable(T(type));
 699
 700                 if(valueType & EmulatedBits)
 701                 {
 702                         switch(valueType)
 703                         {
 704                         case Type_v4i8:
 705                         case Type_v2i16:
 706                                 {
 707                                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::LoadSubVector, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
 708                                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
 709                                         auto load = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
 710                                         load->addArg(ptr);
 711                                         load->addArg(::context->getConstantInt32(4));
 712                                         ::basicBlock->appendInst(load);
 713                                 }
 714                                 break;
 715                         case Type_v2i32:
 716                         case Type_v8i8:
 717                         case Type_v4i16:
 718                         case Type_v2f32:
 719                                 {
 720                                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::LoadSubVector, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
 721                                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
 722                                         auto load = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
 723                                         load->addArg(ptr);
 724                                         load->addArg(::context->getConstantInt32(8));
 725                                         ::basicBlock->appendInst(load);
 726                                 }
 727                                 break;
 728                         default: assert(false && "UNIMPLEMENTED");
 729                         }
 730                 }
 731                 else
 732                 {
 733                         auto load = Ice::InstLoad::create(::function, result, ptr, align);
 734                         ::basicBlock->appendInst(load);
 735                 }
 736
 737                 return V(result);
 738         }
 739
 740         Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int align)
 741         {
 742                 int valueType = (int)reinterpret_cast<intptr_t>(type);
 743
 744                 if(valueType & EmulatedBits)
 745                 {
 746                         switch(valueType)
 747                         {
 748                         case Type_v4i8:
 749                         case Type_v2i16:
 750                                 {
 751                                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::StoreSubVector, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T};
 752                                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
 753                                         auto store = Ice::InstIntrinsicCall::create(::function, 3, nullptr, target, intrinsic);
 754                                         store->addArg(value);
 755                                         store->addArg(ptr);
 756                                         store->addArg(::context->getConstantInt32(4));
 757                                         ::basicBlock->appendInst(store);
 758                                 }
 759                                 break;
 760                         case Type_v2i32:
 761                         case Type_v8i8:
 762                         case Type_v4i16:
 763                         case Type_v2f32:
 764                                 {
 765                                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::StoreSubVector, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T};
 766                                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
 767                                         auto store = Ice::InstIntrinsicCall::create(::function, 3, nullptr, target, intrinsic);
 768                                         store->addArg(value);
 769                                         store->addArg(ptr);
 770                                         store->addArg(::context->getConstantInt32(8));
 771                                         ::basicBlock->appendInst(store);
 772                                 }
 773                                 break;
 774                         default: assert(false && "UNIMPLEMENTED");
 775                         }
 776                 }
 777                 else
 778                 {
 779                         assert(T(value->getType()) == type);
 780
 781                         auto store = Ice::InstStore::create(::function, value, ptr, align);
 782                         ::basicBlock->appendInst(store);
 783                 }
 784
 785                 return value;
 786         }
 787
 788         Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index)
 789         {
 790                 assert(index->getType() == Ice::IceType_i32);
 791
 792                 if(auto *constant = llvm::dyn_cast<Ice::ConstantInteger32>(index))
 793                 {
 794                         int32_t offset = constant->getValue() * (int)Ice::typeWidthInBytes(T(type));
 795
 796                         if(offset == 0)
 797                         {
 798                                 return ptr;
 799                         }
 800
 801                         return createAdd(ptr, createConstantInt(offset));
 802                 }
 803
 804                 if(!Ice::isByteSizedType(T(type)))
 805                 {
 806                         index = createMul(index, createConstantInt((int)Ice::typeWidthInBytes(T(type))));
 807                 }
 808
 809                 if(sizeof(void*) == 8)
 810                 {
 811                         index = createSExt(index, T(Ice::IceType_i64));
 812                 }
 813
 814                 return createAdd(ptr, index);
 815         }
 816
 817         Value *Nucleus::createAtomicAdd(Value *ptr, Value *value)
 818         {
 819                 assert(false && "UNIMPLEMENTED"); return nullptr;
 820         }
 821
 822         static Value *createCast(Ice::InstCast::OpKind op, Value *v, Type *destType)
 823         {
 824                 if(v->getType() == T(destType))
 825                 {
 826                         return v;
 827                 }
 828
 829                 Ice::Variable *result = ::function->makeVariable(T(destType));
 830                 Ice::InstCast *cast = Ice::InstCast::create(::function, op, result, v);
 831                 ::basicBlock->appendInst(cast);
 832
 833                 return V(result);
 834         }
 835
 836         Value *Nucleus::createTrunc(Value *v, Type *destType)
 837         {
 838                 return createCast(Ice::InstCast::Trunc, v, destType);
 839         }
 840
 841         Value *Nucleus::createZExt(Value *v, Type *destType)
 842         {
 843                 return createCast(Ice::InstCast::Zext, v, destType);
 844         }
 845
 846         Value *Nucleus::createSExt(Value *v, Type *destType)
 847         {
 848                 return createCast(Ice::InstCast::Sext, v, destType);
 849         }
 850
 851         Value *Nucleus::createFPToSI(Value *v, Type *destType)
 852         {
 853                 return createCast(Ice::InstCast::Fptosi, v, destType);
 854         }
 855
 856         Value *Nucleus::createSIToFP(Value *v, Type *destType)
 857         {
 858                 return createCast(Ice::InstCast::Sitofp, v, destType);
 859         }
 860
 861         Value *Nucleus::createFPTrunc(Value *v, Type *destType)
 862         {
 863                 return createCast(Ice::InstCast::Fptrunc, v, destType);
 864         }
 865
 866         Value *Nucleus::createFPExt(Value *v, Type *destType)
 867         {
 868                 return createCast(Ice::InstCast::Fpext, v, destType);
 869         }
 870
 871         Value *Nucleus::createBitCast(Value *v, Type *destType)
 872         {
 873                 return createCast(Ice::InstCast::Bitcast, v, destType);
 874         }
 875
 876         static Value *createIntCompare(Ice::InstIcmp::ICond condition, Value *lhs, Value *rhs)
 877         {
 878                 assert(lhs->getType() == rhs->getType());
 879
 880                 auto result = ::function->makeVariable(Ice::isScalarIntegerType(lhs->getType()) ? Ice::IceType_i1 : lhs->getType());
 881                 auto cmp = Ice::InstIcmp::create(::function, condition, result, lhs, rhs);
 882                 ::basicBlock->appendInst(cmp);
 883
 884                 return V(result);
 885         }
 886
 887         Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
 888         {
 889                 return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
 890         }
 891
 892         Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
 893         {
 894                 return createIntCompare(Ice::InstIcmp::Ne, lhs, rhs);
 895         }
 896
 897         Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
 898         {
 899                 return createIntCompare(Ice::InstIcmp::Ugt, lhs, rhs);
 900         }
 901
 902         Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
 903         {
 904                 return createIntCompare(Ice::InstIcmp::Uge, lhs, rhs);
 905         }
 906
 907         Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
 908         {
 909                 return createIntCompare(Ice::InstIcmp::Ult, lhs, rhs);
 910         }
 911
 912         Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
 913         {
 914                 return createIntCompare(Ice::InstIcmp::Ule, lhs, rhs);
 915         }
 916
 917         Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
 918         {
 919                 return createIntCompare(Ice::InstIcmp::Sgt, lhs, rhs);
 920         }
 921
 922         Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
 923         {
 924                 return createIntCompare(Ice::InstIcmp::Sge, lhs, rhs);
 925         }
 926
 927         Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
 928         {
 929                 return createIntCompare(Ice::InstIcmp::Slt, lhs, rhs);
 930         }
 931
 932         Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
 933         {
 934                 return createIntCompare(Ice::InstIcmp::Sle, lhs, rhs);
 935         }
 936
 937         static Value *createFloatCompare(Ice::InstFcmp::FCond condition, Value *lhs, Value *rhs)
 938         {
 939                 assert(lhs->getType() == rhs->getType());
 940                 assert(Ice::isScalarFloatingType(lhs->getType()) || lhs->getType() == Ice::IceType_v4f32);
 941
 942                 auto result = ::function->makeVariable(Ice::isScalarFloatingType(lhs->getType()) ? Ice::IceType_i1 : Ice::IceType_v4i32);
 943                 auto cmp = Ice::InstFcmp::create(::function, condition, result, lhs, rhs);
 944                 ::basicBlock->appendInst(cmp);
 945
 946                 return V(result);
 947         }
 948
 949         Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
 950         {
 951                 return createFloatCompare(Ice::InstFcmp::Oeq, lhs, rhs);
 952         }
 953
 954         Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
 955         {
 956                 return createFloatCompare(Ice::InstFcmp::Ogt, lhs, rhs);
 957         }
 958
 959         Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
 960         {
 961                 return createFloatCompare(Ice::InstFcmp::Oge, lhs, rhs);
 962         }
 963
 964         Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
 965         {
 966                 return createFloatCompare(Ice::InstFcmp::Olt, lhs, rhs);
 967         }
 968
 969         Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
 970         {
 971                 return createFloatCompare(Ice::InstFcmp::Ole, lhs, rhs);
 972         }
 973
 974         Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
 975         {
 976                 return createFloatCompare(Ice::InstFcmp::One, lhs, rhs);
 977         }
 978
 979         Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
 980         {
 981                 return createFloatCompare(Ice::InstFcmp::Ord, lhs, rhs);
 982         }
 983
 984         Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
 985         {
 986                 return createFloatCompare(Ice::InstFcmp::Uno, lhs, rhs);
 987         }
 988
 989         Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
 990         {
 991                 return createFloatCompare(Ice::InstFcmp::Ueq, lhs, rhs);
 992         }
 993
 994         Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
 995         {
 996                 return createFloatCompare(Ice::InstFcmp::Ugt, lhs, rhs);
 997         }
 998
 999         Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
1000         {
1001                 return createFloatCompare(Ice::InstFcmp::Uge, lhs, rhs);
1002         }
1003
1004         Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
1005         {
1006                 return createFloatCompare(Ice::InstFcmp::Ult, lhs, rhs);
1007         }
1008
1009         Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
1010         {
1011                 return createFloatCompare(Ice::InstFcmp::Ule, lhs, rhs);
1012         }
1013
1014         Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
1015         {
1016                 return createFloatCompare(Ice::InstFcmp::Une, lhs, rhs);
1017         }
1018
1019         Value *Nucleus::createExtractElement(Value *vector, Type *type, int index)
1020         {
1021                 auto result = ::function->makeVariable(T(type));
1022                 auto extract = Ice::InstExtractElement::create(::function, result, vector, ::context->getConstantInt32(index));
1023                 ::basicBlock->appendInst(extract);
1024
1025                 return V(result);
1026         }
1027
1028         Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
1029         {
1030                 auto result = ::function->makeVariable(vector->getType());
1031                 auto insert = Ice::InstInsertElement::create(::function, result, vector, element, ::context->getConstantInt32(index));
1032                 ::basicBlock->appendInst(insert);
1033
1034                 return V(result);
1035         }
1036
1037         Value *Nucleus::createShuffleVector(Value *V1, Value *V2, const int *select)
1038         {
1039                 assert(V1->getType() == V2->getType());
1040
1041                 int size = Ice::typeNumElements(V1->getType());
1042                 auto result = ::function->makeVariable(V1->getType());
1043                 auto shuffle = Ice::InstShuffleVector::create(::function, result, V1, V2);
1044
1045                 for(int i = 0; i < size; i++)
1046                 {
1047                         shuffle->addIndex(llvm::cast<Ice::ConstantInteger32>(::context->getConstantInt32(select[i])));
1048                 }
1049
1050                 ::basicBlock->appendInst(shuffle);
1051
1052                 return V(result);
1053         }
1054
1055         Value *Nucleus::createSelect(Value *C, Value *ifTrue, Value *ifFalse)
1056         {
1057                 assert(ifTrue->getType() == ifFalse->getType());
1058
1059                 auto result = ::function->makeVariable(ifTrue->getType());
1060                 auto *select = Ice::InstSelect::create(::function, result, C, ifTrue, ifFalse);
1061                 ::basicBlock->appendInst(select);
1062
1063                 return V(result);
1064         }
1065
1066         SwitchCases *Nucleus::createSwitch(Value *control, BasicBlock *defaultBranch, unsigned numCases)
1067         {
1068                 auto switchInst = Ice::InstSwitch::create(::function, numCases, control, defaultBranch);
1069                 ::basicBlock->appendInst(switchInst);
1070
1071                 return reinterpret_cast<SwitchCases*>(switchInst);
1072         }
1073
1074         void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch)
1075         {
1076                 switchCases->addBranch(label, label, branch);
1077         }
1078
1079         void Nucleus::createUnreachable()
1080         {
1081                 Ice::InstUnreachable *unreachable = Ice::InstUnreachable::create(::function);
1082                 ::basicBlock->appendInst(unreachable);
1083         }
1084
1085         static Value *createSwizzle4(Value *val, unsigned char select)
1086         {
1087                 int swizzle[4] =
1088                 {
1089                         (select >> 0) & 0x03,
1090                         (select >> 2) & 0x03,
1091                         (select >> 4) & 0x03,
1092                         (select >> 6) & 0x03,
1093                 };
1094
1095                 return Nucleus::createShuffleVector(val, val, swizzle);
1096         }
1097
1098         static Value *createMask4(Value *lhs, Value *rhs, unsigned char select)
1099         {
1100                 int64_t mask[4] = {0, 0, 0, 0};
1101
1102                 mask[(select >> 0) & 0x03] = -1;
1103                 mask[(select >> 2) & 0x03] = -1;
1104                 mask[(select >> 4) & 0x03] = -1;
1105                 mask[(select >> 6) & 0x03] = -1;
1106
1107                 Value *condition = Nucleus::createConstantVector(mask, T(Ice::IceType_v4i1));
1108                 Value *result = Nucleus::createSelect(condition, rhs, lhs);
1109
1110                 return result;
1111         }
1112
1113         Type *Nucleus::getPointerType(Type *ElementType)
1114         {
1115                 if(sizeof(void*) == 8)
1116                 {
1117                         return T(Ice::IceType_i64);
1118                 }
1119                 else
1120                 {
1121                         return T(Ice::IceType_i32);
1122                 }
1123         }
1124
1125         Value *Nucleus::createNullValue(Type *Ty)
1126         {
1127                 if(Ice::isVectorType(T(Ty)))
1128                 {
1129                         int64_t c[4] = {0, 0, 0, 0};
1130                         return createConstantVector(c, Ty);
1131                 }
1132                 else
1133                 {
1134                         return V(::context->getConstantZero(T(Ty)));
1135                 }
1136         }
1137
1138         Value *Nucleus::createConstantLong(int64_t i)
1139         {
1140                 return V(::context->getConstantInt64(i));
1141         }
1142
1143         Value *Nucleus::createConstantInt(int i)
1144         {
1145                 return V(::context->getConstantInt32(i));
1146         }
1147
1148         Value *Nucleus::createConstantInt(unsigned int i)
1149         {
1150                 return V(::context->getConstantInt32(i));
1151         }
1152
1153         Value *Nucleus::createConstantBool(bool b)
1154         {
1155                 return V(::context->getConstantInt1(b));
1156         }
1157
1158         Value *Nucleus::createConstantByte(signed char i)
1159         {
1160                 return V(::context->getConstantInt8(i));
1161         }
1162
1163         Value *Nucleus::createConstantByte(unsigned char i)
1164         {
1165                 return V(::context->getConstantInt8(i));
1166         }
1167
1168         Value *Nucleus::createConstantShort(short i)
1169         {
1170                 return V(::context->getConstantInt16(i));
1171         }
1172
1173         Value *Nucleus::createConstantShort(unsigned short i)
1174         {
1175                 return V(::context->getConstantInt16(i));
1176         }
1177
1178         Value *Nucleus::createConstantFloat(float x)
1179         {
1180                 return V(::context->getConstantFloat(x));
1181         }
1182
1183         Value *Nucleus::createNullPointer(Type *Ty)
1184         {
1185                 return createNullValue(T(sizeof(void*) == 8 ? Ice::IceType_i64 : Ice::IceType_i32));
1186         }
1187
1188         Value *Nucleus::createConstantVector(const int64_t *constants, Type *type)
1189         {
1190                 const int vectorSize = 16;
1191                 assert(Ice::typeWidthInBytes(T(type)) == vectorSize);
1192                 const int alignment = vectorSize;
1193                 auto globalPool = ::function->getGlobalPool();
1194
1195                 const int64_t *i = constants;
1196                 const double *f = reinterpret_cast<const double*>(constants);
1197                 Ice::VariableDeclaration::DataInitializer *dataInitializer = nullptr;
1198
1199                 switch((int)reinterpret_cast<intptr_t>(type))
1200                 {
1201                 case Ice::IceType_v4i32:
1202                 case Ice::IceType_v4i1:
1203                         {
1204                                 const int initializer[4] = {(int)i[0], (int)i[1], (int)i[2], (int)i[3]};
1205                                 static_assert(sizeof(initializer) == vectorSize, "!");
1206                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1207                         }
1208                         break;
1209                 case Ice::IceType_v4f32:
1210                         {
1211                                 const float initializer[4] = {(float)f[0], (float)f[1], (float)f[2], (float)f[3]};
1212                                 static_assert(sizeof(initializer) == vectorSize, "!");
1213                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1214                         }
1215                         break;
1216                 case Ice::IceType_v8i16:
1217                 case Ice::IceType_v8i1:
1218                         {
1219                                 const short initializer[8] = {(short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[4], (short)i[5], (short)i[6], (short)i[7]};
1220                                 static_assert(sizeof(initializer) == vectorSize, "!");
1221                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1222                         }
1223                         break;
1224                 case Ice::IceType_v16i8:
1225                 case Ice::IceType_v16i1:
1226                         {
1227                                 const char initializer[16] = {(char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[8], (char)i[9], (char)i[10], (char)i[11], (char)i[12], (char)i[13], (char)i[14], (char)i[15]};
1228                                 static_assert(sizeof(initializer) == vectorSize, "!");
1229                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1230                         }
1231                         break;
1232                 case Type_v2i32:
1233                         {
1234                                 const int initializer[4] = {(int)i[0], (int)i[1], (int)i[0], (int)i[1]};
1235                                 static_assert(sizeof(initializer) == vectorSize, "!");
1236                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1237                         }
1238                         break;
1239                 case Type_v2f32:
1240                         {
1241                                 const float initializer[4] = {(float)f[0], (float)f[1], (float)f[0], (float)f[1]};
1242                                 static_assert(sizeof(initializer) == vectorSize, "!");
1243                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1244                         }
1245                         break;
1246                 case Type_v4i16:
1247                         {
1248                                 const short initializer[8] = {(short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[0], (short)i[1], (short)i[2], (short)i[3]};
1249                                 static_assert(sizeof(initializer) == vectorSize, "!");
1250                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1251                         }
1252                         break;
1253                 case Type_v8i8:
1254                         {
1255                                 const char initializer[16] = {(char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7]};
1256                                 static_assert(sizeof(initializer) == vectorSize, "!");
1257                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1258                         }
1259                         break;
1260                 case Type_v4i8:
1261                         {
1262                                 const char initializer[16] = {(char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3]};
1263                                 static_assert(sizeof(initializer) == vectorSize, "!");
1264                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1265                         }
1266                         break;
1267                 default:
1268                         assert(false && "Unknown constant vector type" && type);
1269                 }
1270
1271                 auto name = Ice::GlobalString::createWithoutString(::context);
1272                 auto *variableDeclaration = Ice::VariableDeclaration::create(globalPool);
1273                 variableDeclaration->setName(name);
1274                 variableDeclaration->setAlignment(alignment);
1275                 variableDeclaration->setIsConstant(true);
1276                 variableDeclaration->addInitializer(dataInitializer);
1277
1278                 ::function->addGlobal(variableDeclaration);
1279
1280                 constexpr int32_t offset = 0;
1281                 Ice::Operand *ptr = ::context->getConstantSym(offset, name);
1282
1283                 Ice::Variable *result = ::function->makeVariable(T(type));
1284                 auto load = Ice::InstLoad::create(::function, result, ptr, alignment);
1285                 ::basicBlock->appendInst(load);
1286
1287                 return V(result);
1288         }
1289
1290         Value *Nucleus::createConstantVector(const double *constants, Type *type)
1291         {
1292                 return createConstantVector((const int64_t*)constants, type);
1293         }
1294
1295         Type *Void::getType()
1296         {
1297                 return T(Ice::IceType_void);
1298         }
1299
1300         Bool::Bool(Argument<Bool> argument)
1301         {
1302                 storeValue(argument.value);
1303         }
1304
1305         Bool::Bool(bool x)
1306         {
1307                 storeValue(Nucleus::createConstantBool(x));
1308         }
1309
1310         Bool::Bool(RValue<Bool> rhs)
1311         {
1312                 storeValue(rhs.value);
1313         }
1314
1315         Bool::Bool(const Bool &rhs)
1316         {
1317                 Value *value = rhs.loadValue();
1318                 storeValue(value);
1319         }
1320
1321         Bool::Bool(const Reference<Bool> &rhs)
1322         {
1323                 Value *value = rhs.loadValue();
1324                 storeValue(value);
1325         }
1326
1327         RValue<Bool> Bool::operator=(RValue<Bool> rhs)
1328         {
1329                 storeValue(rhs.value);
1330
1331                 return rhs;
1332         }
1333
1334         RValue<Bool> Bool::operator=(const Bool &rhs)
1335         {
1336                 Value *value = rhs.loadValue();
1337                 storeValue(value);
1338
1339                 return RValue<Bool>(value);
1340         }
1341
1342         RValue<Bool> Bool::operator=(const Reference<Bool> &rhs)
1343         {
1344                 Value *value = rhs.loadValue();
1345                 storeValue(value);
1346
1347                 return RValue<Bool>(value);
1348         }
1349
1350         RValue<Bool> operator!(RValue<Bool> val)
1351         {
1352                 return RValue<Bool>(Nucleus::createNot(val.value));
1353         }
1354
1355         RValue<Bool> operator&&(RValue<Bool> lhs, RValue<Bool> rhs)
1356         {
1357                 return RValue<Bool>(Nucleus::createAnd(lhs.value, rhs.value));
1358         }
1359
1360         RValue<Bool> operator||(RValue<Bool> lhs, RValue<Bool> rhs)
1361         {
1362                 return RValue<Bool>(Nucleus::createOr(lhs.value, rhs.value));
1363         }
1364
1365         Type *Bool::getType()
1366         {
1367                 return T(Ice::IceType_i1);
1368         }
1369
1370         Byte::Byte(Argument<Byte> argument)
1371         {
1372                 storeValue(argument.value);
1373         }
1374
1375         Byte::Byte(RValue<Int> cast)
1376         {
1377                 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
1378
1379                 storeValue(integer);
1380         }
1381
1382         Byte::Byte(RValue<UInt> cast)
1383         {
1384                 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
1385
1386                 storeValue(integer);
1387         }
1388
1389         Byte::Byte(RValue<UShort> cast)
1390         {
1391                 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
1392
1393                 storeValue(integer);
1394         }
1395
1396         Byte::Byte(int x)
1397         {
1398                 storeValue(Nucleus::createConstantByte((unsigned char)x));
1399         }
1400
1401         Byte::Byte(unsigned char x)
1402         {
1403                 storeValue(Nucleus::createConstantByte(x));
1404         }
1405
1406         Byte::Byte(RValue<Byte> rhs)
1407         {
1408                 storeValue(rhs.value);
1409         }
1410
1411         Byte::Byte(const Byte &rhs)
1412         {
1413                 Value *value = rhs.loadValue();
1414                 storeValue(value);
1415         }
1416
1417         Byte::Byte(const Reference<Byte> &rhs)
1418         {
1419                 Value *value = rhs.loadValue();
1420                 storeValue(value);
1421         }
1422
1423         RValue<Byte> Byte::operator=(RValue<Byte> rhs)
1424         {
1425                 storeValue(rhs.value);
1426
1427                 return rhs;
1428         }
1429
1430         RValue<Byte> Byte::operator=(const Byte &rhs)
1431         {
1432                 Value *value = rhs.loadValue();
1433                 storeValue(value);
1434
1435                 return RValue<Byte>(value);
1436         }
1437
1438         RValue<Byte> Byte::operator=(const Reference<Byte> &rhs)
1439         {
1440                 Value *value = rhs.loadValue();
1441                 storeValue(value);
1442
1443                 return RValue<Byte>(value);
1444         }
1445
1446         RValue<Byte> operator+(RValue<Byte> lhs, RValue<Byte> rhs)
1447         {
1448                 return RValue<Byte>(Nucleus::createAdd(lhs.value, rhs.value));
1449         }
1450
1451         RValue<Byte> operator-(RValue<Byte> lhs, RValue<Byte> rhs)
1452         {
1453                 return RValue<Byte>(Nucleus::createSub(lhs.value, rhs.value));
1454         }
1455
1456         RValue<Byte> operator*(RValue<Byte> lhs, RValue<Byte> rhs)
1457         {
1458                 return RValue<Byte>(Nucleus::createMul(lhs.value, rhs.value));
1459         }
1460
1461         RValue<Byte> operator/(RValue<Byte> lhs, RValue<Byte> rhs)
1462         {
1463                 return RValue<Byte>(Nucleus::createUDiv(lhs.value, rhs.value));
1464         }
1465
1466         RValue<Byte> operator%(RValue<Byte> lhs, RValue<Byte> rhs)
1467         {
1468                 return RValue<Byte>(Nucleus::createURem(lhs.value, rhs.value));
1469         }
1470
1471         RValue<Byte> operator&(RValue<Byte> lhs, RValue<Byte> rhs)
1472         {
1473                 return RValue<Byte>(Nucleus::createAnd(lhs.value, rhs.value));
1474         }
1475
1476         RValue<Byte> operator|(RValue<Byte> lhs, RValue<Byte> rhs)
1477         {
1478                 return RValue<Byte>(Nucleus::createOr(lhs.value, rhs.value));
1479         }
1480
1481         RValue<Byte> operator^(RValue<Byte> lhs, RValue<Byte> rhs)
1482         {
1483                 return RValue<Byte>(Nucleus::createXor(lhs.value, rhs.value));
1484         }
1485
1486         RValue<Byte> operator<<(RValue<Byte> lhs, RValue<Byte> rhs)
1487         {
1488                 return RValue<Byte>(Nucleus::createShl(lhs.value, rhs.value));
1489         }
1490
1491         RValue<Byte> operator>>(RValue<Byte> lhs, RValue<Byte> rhs)
1492         {
1493                 return RValue<Byte>(Nucleus::createLShr(lhs.value, rhs.value));
1494         }
1495
1496         RValue<Byte> operator+=(Byte &lhs, RValue<Byte> rhs)
1497         {
1498                 return lhs = lhs + rhs;
1499         }
1500
1501         RValue<Byte> operator-=(Byte &lhs, RValue<Byte> rhs)
1502         {
1503                 return lhs = lhs - rhs;
1504         }
1505
1506         RValue<Byte> operator*=(Byte &lhs, RValue<Byte> rhs)
1507         {
1508                 return lhs = lhs * rhs;
1509         }
1510
1511         RValue<Byte> operator/=(Byte &lhs, RValue<Byte> rhs)
1512         {
1513                 return lhs = lhs / rhs;
1514         }
1515
1516         RValue<Byte> operator%=(Byte &lhs, RValue<Byte> rhs)
1517         {
1518                 return lhs = lhs % rhs;
1519         }
1520
1521         RValue<Byte> operator&=(Byte &lhs, RValue<Byte> rhs)
1522         {
1523                 return lhs = lhs & rhs;
1524         }
1525
1526         RValue<Byte> operator|=(Byte &lhs, RValue<Byte> rhs)
1527         {
1528                 return lhs = lhs | rhs;
1529         }
1530
1531         RValue<Byte> operator^=(Byte &lhs, RValue<Byte> rhs)
1532         {
1533                 return lhs = lhs ^ rhs;
1534         }
1535
1536         RValue<Byte> operator<<=(Byte &lhs, RValue<Byte> rhs)
1537         {
1538                 return lhs = lhs << rhs;
1539         }
1540
1541         RValue<Byte> operator>>=(Byte &lhs, RValue<Byte> rhs)
1542         {
1543                 return lhs = lhs >> rhs;
1544         }
1545
1546         RValue<Byte> operator+(RValue<Byte> val)
1547         {
1548                 return val;
1549         }
1550
1551         RValue<Byte> operator-(RValue<Byte> val)
1552         {
1553                 return RValue<Byte>(Nucleus::createNeg(val.value));
1554         }
1555
1556         RValue<Byte> operator~(RValue<Byte> val)
1557         {
1558                 return RValue<Byte>(Nucleus::createNot(val.value));
1559         }
1560
1561         RValue<Byte> operator++(Byte &val, int)   // Post-increment
1562         {
1563                 RValue<Byte> res = val;
1564                 val += Byte(1);
1565                 return res;
1566         }
1567
1568         const Byte &operator++(Byte &val)   // Pre-increment
1569         {
1570                 val += Byte(1);
1571                 return val;
1572         }
1573
1574         RValue<Byte> operator--(Byte &val, int)   // Post-decrement
1575         {
1576                 RValue<Byte> res = val;
1577                 val -= Byte(1);
1578                 return res;
1579         }
1580
1581         const Byte &operator--(Byte &val)   // Pre-decrement
1582         {
1583                 val -= Byte(1);
1584                 return val;
1585         }
1586
1587         RValue<Bool> operator<(RValue<Byte> lhs, RValue<Byte> rhs)
1588         {
1589                 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
1590         }
1591
1592         RValue<Bool> operator<=(RValue<Byte> lhs, RValue<Byte> rhs)
1593         {
1594                 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
1595         }
1596
1597         RValue<Bool> operator>(RValue<Byte> lhs, RValue<Byte> rhs)
1598         {
1599                 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
1600         }
1601
1602         RValue<Bool> operator>=(RValue<Byte> lhs, RValue<Byte> rhs)
1603         {
1604                 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
1605         }
1606
1607         RValue<Bool> operator!=(RValue<Byte> lhs, RValue<Byte> rhs)
1608         {
1609                 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
1610         }
1611
1612         RValue<Bool> operator==(RValue<Byte> lhs, RValue<Byte> rhs)
1613         {
1614                 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
1615         }
1616
1617         Type *Byte::getType()
1618         {
1619                 return T(Ice::IceType_i8);
1620         }
1621
1622         SByte::SByte(Argument<SByte> argument)
1623         {
1624                 storeValue(argument.value);
1625         }
1626
1627         SByte::SByte(RValue<Int> cast)
1628         {
1629                 Value *integer = Nucleus::createTrunc(cast.value, SByte::getType());
1630
1631                 storeValue(integer);
1632         }
1633
1634         SByte::SByte(RValue<Short> cast)
1635         {
1636                 Value *integer = Nucleus::createTrunc(cast.value, SByte::getType());
1637
1638                 storeValue(integer);
1639         }
1640
1641         SByte::SByte(signed char x)
1642         {
1643                 storeValue(Nucleus::createConstantByte(x));
1644         }
1645
1646         SByte::SByte(RValue<SByte> rhs)
1647         {
1648                 storeValue(rhs.value);
1649         }
1650
1651         SByte::SByte(const SByte &rhs)
1652         {
1653                 Value *value = rhs.loadValue();
1654                 storeValue(value);
1655         }
1656
1657         SByte::SByte(const Reference<SByte> &rhs)
1658         {
1659                 Value *value = rhs.loadValue();
1660                 storeValue(value);
1661         }
1662
1663         RValue<SByte> SByte::operator=(RValue<SByte> rhs)
1664         {
1665                 storeValue(rhs.value);
1666
1667                 return rhs;
1668         }
1669
1670         RValue<SByte> SByte::operator=(const SByte &rhs)
1671         {
1672                 Value *value = rhs.loadValue();
1673                 storeValue(value);
1674
1675                 return RValue<SByte>(value);
1676         }
1677
1678         RValue<SByte> SByte::operator=(const Reference<SByte> &rhs)
1679         {
1680                 Value *value = rhs.loadValue();
1681                 storeValue(value);
1682
1683                 return RValue<SByte>(value);
1684         }
1685
1686         RValue<SByte> operator+(RValue<SByte> lhs, RValue<SByte> rhs)
1687         {
1688                 return RValue<SByte>(Nucleus::createAdd(lhs.value, rhs.value));
1689         }
1690
1691         RValue<SByte> operator-(RValue<SByte> lhs, RValue<SByte> rhs)
1692         {
1693                 return RValue<SByte>(Nucleus::createSub(lhs.value, rhs.value));
1694         }
1695
1696         RValue<SByte> operator*(RValue<SByte> lhs, RValue<SByte> rhs)
1697         {
1698                 return RValue<SByte>(Nucleus::createMul(lhs.value, rhs.value));
1699         }
1700
1701         RValue<SByte> operator/(RValue<SByte> lhs, RValue<SByte> rhs)
1702         {
1703                 return RValue<SByte>(Nucleus::createSDiv(lhs.value, rhs.value));
1704         }
1705
1706         RValue<SByte> operator%(RValue<SByte> lhs, RValue<SByte> rhs)
1707         {
1708                 return RValue<SByte>(Nucleus::createSRem(lhs.value, rhs.value));
1709         }
1710
1711         RValue<SByte> operator&(RValue<SByte> lhs, RValue<SByte> rhs)
1712         {
1713                 return RValue<SByte>(Nucleus::createAnd(lhs.value, rhs.value));
1714         }
1715
1716         RValue<SByte> operator|(RValue<SByte> lhs, RValue<SByte> rhs)
1717         {
1718                 return RValue<SByte>(Nucleus::createOr(lhs.value, rhs.value));
1719         }
1720
1721         RValue<SByte> operator^(RValue<SByte> lhs, RValue<SByte> rhs)
1722         {
1723                 return RValue<SByte>(Nucleus::createXor(lhs.value, rhs.value));
1724         }
1725
1726         RValue<SByte> operator<<(RValue<SByte> lhs, RValue<SByte> rhs)
1727         {
1728                 return RValue<SByte>(Nucleus::createShl(lhs.value, rhs.value));
1729         }
1730
1731         RValue<SByte> operator>>(RValue<SByte> lhs, RValue<SByte> rhs)
1732         {
1733                 return RValue<SByte>(Nucleus::createAShr(lhs.value, rhs.value));
1734         }
1735
1736         RValue<SByte> operator+=(SByte &lhs, RValue<SByte> rhs)
1737         {
1738                 return lhs = lhs + rhs;
1739         }
1740
1741         RValue<SByte> operator-=(SByte &lhs, RValue<SByte> rhs)
1742         {
1743                 return lhs = lhs - rhs;
1744         }
1745
1746         RValue<SByte> operator*=(SByte &lhs, RValue<SByte> rhs)
1747         {
1748                 return lhs = lhs * rhs;
1749         }
1750
1751         RValue<SByte> operator/=(SByte &lhs, RValue<SByte> rhs)
1752         {
1753                 return lhs = lhs / rhs;
1754         }
1755
1756         RValue<SByte> operator%=(SByte &lhs, RValue<SByte> rhs)
1757         {
1758                 return lhs = lhs % rhs;
1759         }
1760
1761         RValue<SByte> operator&=(SByte &lhs, RValue<SByte> rhs)
1762         {
1763                 return lhs = lhs & rhs;
1764         }
1765
1766         RValue<SByte> operator|=(SByte &lhs, RValue<SByte> rhs)
1767         {
1768                 return lhs = lhs | rhs;
1769         }
1770
1771         RValue<SByte> operator^=(SByte &lhs, RValue<SByte> rhs)
1772         {
1773                 return lhs = lhs ^ rhs;
1774         }
1775
1776         RValue<SByte> operator<<=(SByte &lhs, RValue<SByte> rhs)
1777         {
1778                 return lhs = lhs << rhs;
1779         }
1780
1781         RValue<SByte> operator>>=(SByte &lhs, RValue<SByte> rhs)
1782         {
1783                 return lhs = lhs >> rhs;
1784         }
1785
1786         RValue<SByte> operator+(RValue<SByte> val)
1787         {
1788                 return val;
1789         }
1790
1791         RValue<SByte> operator-(RValue<SByte> val)
1792         {
1793                 return RValue<SByte>(Nucleus::createNeg(val.value));
1794         }
1795
1796         RValue<SByte> operator~(RValue<SByte> val)
1797         {
1798                 return RValue<SByte>(Nucleus::createNot(val.value));
1799         }
1800
1801         RValue<SByte> operator++(SByte &val, int)   // Post-increment
1802         {
1803                 RValue<SByte> res = val;
1804                 val += SByte(1);
1805                 return res;
1806         }
1807
1808         const SByte &operator++(SByte &val)   // Pre-increment
1809         {
1810                 val += SByte(1);
1811                 return val;
1812         }
1813
1814         RValue<SByte> operator--(SByte &val, int)   // Post-decrement
1815         {
1816                 RValue<SByte> res = val;
1817                 val -= SByte(1);
1818                 return res;
1819         }
1820
1821         const SByte &operator--(SByte &val)   // Pre-decrement
1822         {
1823                 val -= SByte(1);
1824                 return val;
1825         }
1826
1827         RValue<Bool> operator<(RValue<SByte> lhs, RValue<SByte> rhs)
1828         {
1829                 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
1830         }
1831
1832         RValue<Bool> operator<=(RValue<SByte> lhs, RValue<SByte> rhs)
1833         {
1834                 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
1835         }
1836
1837         RValue<Bool> operator>(RValue<SByte> lhs, RValue<SByte> rhs)
1838         {
1839                 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
1840         }
1841
1842         RValue<Bool> operator>=(RValue<SByte> lhs, RValue<SByte> rhs)
1843         {
1844                 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
1845         }
1846
1847         RValue<Bool> operator!=(RValue<SByte> lhs, RValue<SByte> rhs)
1848         {
1849                 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
1850         }
1851
1852         RValue<Bool> operator==(RValue<SByte> lhs, RValue<SByte> rhs)
1853         {
1854                 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
1855         }
1856
1857         Type *SByte::getType()
1858         {
1859                 return T(Ice::IceType_i8);
1860         }
1861
1862         Short::Short(Argument<Short> argument)
1863         {
1864                 storeValue(argument.value);
1865         }
1866
1867         Short::Short(RValue<Int> cast)
1868         {
1869                 Value *integer = Nucleus::createTrunc(cast.value, Short::getType());
1870
1871                 storeValue(integer);
1872         }
1873
1874         Short::Short(short x)
1875         {
1876                 storeValue(Nucleus::createConstantShort(x));
1877         }
1878
1879         Short::Short(RValue<Short> rhs)
1880         {
1881                 storeValue(rhs.value);
1882         }
1883
1884         Short::Short(const Short &rhs)
1885         {
1886                 Value *value = rhs.loadValue();
1887                 storeValue(value);
1888         }
1889
1890         Short::Short(const Reference<Short> &rhs)
1891         {
1892                 Value *value = rhs.loadValue();
1893                 storeValue(value);
1894         }
1895
1896         RValue<Short> Short::operator=(RValue<Short> rhs)
1897         {
1898                 storeValue(rhs.value);
1899
1900                 return rhs;
1901         }
1902
1903         RValue<Short> Short::operator=(const Short &rhs)
1904         {
1905                 Value *value = rhs.loadValue();
1906                 storeValue(value);
1907
1908                 return RValue<Short>(value);
1909         }
1910
1911         RValue<Short> Short::operator=(const Reference<Short> &rhs)
1912         {
1913                 Value *value = rhs.loadValue();
1914                 storeValue(value);
1915
1916                 return RValue<Short>(value);
1917         }
1918
1919         RValue<Short> operator+(RValue<Short> lhs, RValue<Short> rhs)
1920         {
1921                 return RValue<Short>(Nucleus::createAdd(lhs.value, rhs.value));
1922         }
1923
1924         RValue<Short> operator-(RValue<Short> lhs, RValue<Short> rhs)
1925         {
1926                 return RValue<Short>(Nucleus::createSub(lhs.value, rhs.value));
1927         }
1928
1929         RValue<Short> operator*(RValue<Short> lhs, RValue<Short> rhs)
1930         {
1931                 return RValue<Short>(Nucleus::createMul(lhs.value, rhs.value));
1932         }
1933
1934         RValue<Short> operator/(RValue<Short> lhs, RValue<Short> rhs)
1935         {
1936                 return RValue<Short>(Nucleus::createSDiv(lhs.value, rhs.value));
1937         }
1938
1939         RValue<Short> operator%(RValue<Short> lhs, RValue<Short> rhs)
1940         {
1941                 return RValue<Short>(Nucleus::createSRem(lhs.value, rhs.value));
1942         }
1943
1944         RValue<Short> operator&(RValue<Short> lhs, RValue<Short> rhs)
1945         {
1946                 return RValue<Short>(Nucleus::createAnd(lhs.value, rhs.value));
1947         }
1948
1949         RValue<Short> operator|(RValue<Short> lhs, RValue<Short> rhs)
1950         {
1951                 return RValue<Short>(Nucleus::createOr(lhs.value, rhs.value));
1952         }
1953
1954         RValue<Short> operator^(RValue<Short> lhs, RValue<Short> rhs)
1955         {
1956                 return RValue<Short>(Nucleus::createXor(lhs.value, rhs.value));
1957         }
1958
1959         RValue<Short> operator<<(RValue<Short> lhs, RValue<Short> rhs)
1960         {
1961                 return RValue<Short>(Nucleus::createShl(lhs.value, rhs.value));
1962         }
1963
1964         RValue<Short> operator>>(RValue<Short> lhs, RValue<Short> rhs)
1965         {
1966                 return RValue<Short>(Nucleus::createAShr(lhs.value, rhs.value));
1967         }
1968
1969         RValue<Short> operator+=(Short &lhs, RValue<Short> rhs)
1970         {
1971                 return lhs = lhs + rhs;
1972         }
1973
1974         RValue<Short> operator-=(Short &lhs, RValue<Short> rhs)
1975         {
1976                 return lhs = lhs - rhs;
1977         }
1978
1979         RValue<Short> operator*=(Short &lhs, RValue<Short> rhs)
1980         {
1981                 return lhs = lhs * rhs;
1982         }
1983
1984         RValue<Short> operator/=(Short &lhs, RValue<Short> rhs)
1985         {
1986                 return lhs = lhs / rhs;
1987         }
1988
1989         RValue<Short> operator%=(Short &lhs, RValue<Short> rhs)
1990         {
1991                 return lhs = lhs % rhs;
1992         }
1993
1994         RValue<Short> operator&=(Short &lhs, RValue<Short> rhs)
1995         {
1996                 return lhs = lhs & rhs;
1997         }
1998
1999         RValue<Short> operator|=(Short &lhs, RValue<Short> rhs)
2000         {
2001                 return lhs = lhs | rhs;
2002         }
2003
2004         RValue<Short> operator^=(Short &lhs, RValue<Short> rhs)
2005         {
2006                 return lhs = lhs ^ rhs;
2007         }
2008
2009         RValue<Short> operator<<=(Short &lhs, RValue<Short> rhs)
2010         {
2011                 return lhs = lhs << rhs;
2012         }
2013
2014         RValue<Short> operator>>=(Short &lhs, RValue<Short> rhs)
2015         {
2016                 return lhs = lhs >> rhs;
2017         }
2018
2019         RValue<Short> operator+(RValue<Short> val)
2020         {
2021                 return val;
2022         }
2023
2024         RValue<Short> operator-(RValue<Short> val)
2025         {
2026                 return RValue<Short>(Nucleus::createNeg(val.value));
2027         }
2028
2029         RValue<Short> operator~(RValue<Short> val)
2030         {
2031                 return RValue<Short>(Nucleus::createNot(val.value));
2032         }
2033
2034         RValue<Short> operator++(Short &val, int)   // Post-increment
2035         {
2036                 RValue<Short> res = val;
2037                 val += Short(1);
2038                 return res;
2039         }
2040
2041         const Short &operator++(Short &val)   // Pre-increment
2042         {
2043                 val += Short(1);
2044                 return val;
2045         }
2046
2047         RValue<Short> operator--(Short &val, int)   // Post-decrement
2048         {
2049                 RValue<Short> res = val;
2050                 val -= Short(1);
2051                 return res;
2052         }
2053
2054         const Short &operator--(Short &val)   // Pre-decrement
2055         {
2056                 val -= Short(1);
2057                 return val;
2058         }
2059
2060         RValue<Bool> operator<(RValue<Short> lhs, RValue<Short> rhs)
2061         {
2062                 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
2063         }
2064
2065         RValue<Bool> operator<=(RValue<Short> lhs, RValue<Short> rhs)
2066         {
2067                 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
2068         }
2069
2070         RValue<Bool> operator>(RValue<Short> lhs, RValue<Short> rhs)
2071         {
2072                 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
2073         }
2074
2075         RValue<Bool> operator>=(RValue<Short> lhs, RValue<Short> rhs)
2076         {
2077                 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
2078         }
2079
2080         RValue<Bool> operator!=(RValue<Short> lhs, RValue<Short> rhs)
2081         {
2082                 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
2083         }
2084
2085         RValue<Bool> operator==(RValue<Short> lhs, RValue<Short> rhs)
2086         {
2087                 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
2088         }
2089
2090         Type *Short::getType()
2091         {
2092                 return T(Ice::IceType_i16);
2093         }
2094
2095         UShort::UShort(Argument<UShort> argument)
2096         {
2097                 storeValue(argument.value);
2098         }
2099
2100         UShort::UShort(RValue<UInt> cast)
2101         {
2102                 Value *integer = Nucleus::createTrunc(cast.value, UShort::getType());
2103
2104                 storeValue(integer);
2105         }
2106
2107         UShort::UShort(RValue<Int> cast)
2108         {
2109                 Value *integer = Nucleus::createTrunc(cast.value, UShort::getType());
2110
2111                 storeValue(integer);
2112         }
2113
2114         UShort::UShort(unsigned short x)
2115         {
2116                 storeValue(Nucleus::createConstantShort(x));
2117         }
2118
2119         UShort::UShort(RValue<UShort> rhs)
2120         {
2121                 storeValue(rhs.value);
2122         }
2123
2124         UShort::UShort(const UShort &rhs)
2125         {
2126                 Value *value = rhs.loadValue();
2127                 storeValue(value);
2128         }
2129
2130         UShort::UShort(const Reference<UShort> &rhs)
2131         {
2132                 Value *value = rhs.loadValue();
2133                 storeValue(value);
2134         }
2135
2136         RValue<UShort> UShort::operator=(RValue<UShort> rhs)
2137         {
2138                 storeValue(rhs.value);
2139
2140                 return rhs;
2141         }
2142
2143         RValue<UShort> UShort::operator=(const UShort &rhs)
2144         {
2145                 Value *value = rhs.loadValue();
2146                 storeValue(value);
2147
2148                 return RValue<UShort>(value);
2149         }
2150
2151         RValue<UShort> UShort::operator=(const Reference<UShort> &rhs)
2152         {
2153                 Value *value = rhs.loadValue();
2154                 storeValue(value);
2155
2156                 return RValue<UShort>(value);
2157         }
2158
2159         RValue<UShort> operator+(RValue<UShort> lhs, RValue<UShort> rhs)
2160         {
2161                 return RValue<UShort>(Nucleus::createAdd(lhs.value, rhs.value));
2162         }
2163
2164         RValue<UShort> operator-(RValue<UShort> lhs, RValue<UShort> rhs)
2165         {
2166                 return RValue<UShort>(Nucleus::createSub(lhs.value, rhs.value));
2167         }
2168
2169         RValue<UShort> operator*(RValue<UShort> lhs, RValue<UShort> rhs)
2170         {
2171                 return RValue<UShort>(Nucleus::createMul(lhs.value, rhs.value));
2172         }
2173
2174         RValue<UShort> operator/(RValue<UShort> lhs, RValue<UShort> rhs)
2175         {
2176                 return RValue<UShort>(Nucleus::createUDiv(lhs.value, rhs.value));
2177         }
2178
2179         RValue<UShort> operator%(RValue<UShort> lhs, RValue<UShort> rhs)
2180         {
2181                 return RValue<UShort>(Nucleus::createURem(lhs.value, rhs.value));
2182         }
2183
2184         RValue<UShort> operator&(RValue<UShort> lhs, RValue<UShort> rhs)
2185         {
2186                 return RValue<UShort>(Nucleus::createAnd(lhs.value, rhs.value));
2187         }
2188
2189         RValue<UShort> operator|(RValue<UShort> lhs, RValue<UShort> rhs)
2190         {
2191                 return RValue<UShort>(Nucleus::createOr(lhs.value, rhs.value));
2192         }
2193
2194         RValue<UShort> operator^(RValue<UShort> lhs, RValue<UShort> rhs)
2195         {
2196                 return RValue<UShort>(Nucleus::createXor(lhs.value, rhs.value));
2197         }
2198
2199         RValue<UShort> operator<<(RValue<UShort> lhs, RValue<UShort> rhs)
2200         {
2201                 return RValue<UShort>(Nucleus::createShl(lhs.value, rhs.value));
2202         }
2203
2204         RValue<UShort> operator>>(RValue<UShort> lhs, RValue<UShort> rhs)
2205         {
2206                 return RValue<UShort>(Nucleus::createLShr(lhs.value, rhs.value));
2207         }
2208
2209         RValue<UShort> operator+=(UShort &lhs, RValue<UShort> rhs)
2210         {
2211                 return lhs = lhs + rhs;
2212         }
2213
2214         RValue<UShort> operator-=(UShort &lhs, RValue<UShort> rhs)
2215         {
2216                 return lhs = lhs - rhs;
2217         }
2218
2219         RValue<UShort> operator*=(UShort &lhs, RValue<UShort> rhs)
2220         {
2221                 return lhs = lhs * rhs;
2222         }
2223
2224         RValue<UShort> operator/=(UShort &lhs, RValue<UShort> rhs)
2225         {
2226                 return lhs = lhs / rhs;
2227         }
2228
2229         RValue<UShort> operator%=(UShort &lhs, RValue<UShort> rhs)
2230         {
2231                 return lhs = lhs % rhs;
2232         }
2233
2234         RValue<UShort> operator&=(UShort &lhs, RValue<UShort> rhs)
2235         {
2236                 return lhs = lhs & rhs;
2237         }
2238
2239         RValue<UShort> operator|=(UShort &lhs, RValue<UShort> rhs)
2240         {
2241                 return lhs = lhs | rhs;
2242         }
2243
2244         RValue<UShort> operator^=(UShort &lhs, RValue<UShort> rhs)
2245         {
2246                 return lhs = lhs ^ rhs;
2247         }
2248
2249         RValue<UShort> operator<<=(UShort &lhs, RValue<UShort> rhs)
2250         {
2251                 return lhs = lhs << rhs;
2252         }
2253
2254         RValue<UShort> operator>>=(UShort &lhs, RValue<UShort> rhs)
2255         {
2256                 return lhs = lhs >> rhs;
2257         }
2258
2259         RValue<UShort> operator+(RValue<UShort> val)
2260         {
2261                 return val;
2262         }
2263
2264         RValue<UShort> operator-(RValue<UShort> val)
2265         {
2266                 return RValue<UShort>(Nucleus::createNeg(val.value));
2267         }
2268
2269         RValue<UShort> operator~(RValue<UShort> val)
2270         {
2271                 return RValue<UShort>(Nucleus::createNot(val.value));
2272         }
2273
2274         RValue<UShort> operator++(UShort &val, int)   // Post-increment
2275         {
2276                 RValue<UShort> res = val;
2277                 val += UShort(1);
2278                 return res;
2279         }
2280
2281         const UShort &operator++(UShort &val)   // Pre-increment
2282         {
2283                 val += UShort(1);
2284                 return val;
2285         }
2286
2287         RValue<UShort> operator--(UShort &val, int)   // Post-decrement
2288         {
2289                 RValue<UShort> res = val;
2290                 val -= UShort(1);
2291                 return res;
2292         }
2293
2294         const UShort &operator--(UShort &val)   // Pre-decrement
2295         {
2296                 val -= UShort(1);
2297                 return val;
2298         }
2299
2300         RValue<Bool> operator<(RValue<UShort> lhs, RValue<UShort> rhs)
2301         {
2302                 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
2303         }
2304
2305         RValue<Bool> operator<=(RValue<UShort> lhs, RValue<UShort> rhs)
2306         {
2307                 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
2308         }
2309
2310         RValue<Bool> operator>(RValue<UShort> lhs, RValue<UShort> rhs)
2311         {
2312                 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
2313         }
2314
2315         RValue<Bool> operator>=(RValue<UShort> lhs, RValue<UShort> rhs)
2316         {
2317                 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
2318         }
2319
2320         RValue<Bool> operator!=(RValue<UShort> lhs, RValue<UShort> rhs)
2321         {
2322                 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
2323         }
2324
2325         RValue<Bool> operator==(RValue<UShort> lhs, RValue<UShort> rhs)
2326         {
2327                 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
2328         }
2329
2330         Type *UShort::getType()
2331         {
2332                 return T(Ice::IceType_i16);
2333         }
2334
2335         Byte4::Byte4(RValue<Byte8> cast)
2336         {
2337                 storeValue(Nucleus::createBitCast(cast.value, getType()));
2338         }
2339
2340         Byte4::Byte4(const Reference<Byte4> &rhs)
2341         {
2342                 Value *value = rhs.loadValue();
2343                 storeValue(value);
2344         }
2345
2346         Type *Byte4::getType()
2347         {
2348                 return T(Type_v4i8);
2349         }
2350
2351         Type *SByte4::getType()
2352         {
2353                 return T(Type_v4i8);
2354         }
2355
2356         Byte8::Byte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7)
2357         {
2358                 int64_t constantVector[8] = {x0, x1, x2, x3, x4, x5, x6, x7};
2359                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
2360         }
2361
2362         Byte8::Byte8(RValue<Byte8> rhs)
2363         {
2364                 storeValue(rhs.value);
2365         }
2366
2367         Byte8::Byte8(const Byte8 &rhs)
2368         {
2369                 Value *value = rhs.loadValue();
2370                 storeValue(value);
2371         }
2372
2373         Byte8::Byte8(const Reference<Byte8> &rhs)
2374         {
2375                 Value *value = rhs.loadValue();
2376                 storeValue(value);
2377         }
2378
2379         RValue<Byte8> Byte8::operator=(RValue<Byte8> rhs)
2380         {
2381                 storeValue(rhs.value);
2382
2383                 return rhs;
2384         }
2385
2386         RValue<Byte8> Byte8::operator=(const Byte8 &rhs)
2387         {
2388                 Value *value = rhs.loadValue();
2389                 storeValue(value);
2390
2391                 return RValue<Byte8>(value);
2392         }
2393
2394         RValue<Byte8> Byte8::operator=(const Reference<Byte8> &rhs)
2395         {
2396                 Value *value = rhs.loadValue();
2397                 storeValue(value);
2398
2399                 return RValue<Byte8>(value);
2400         }
2401
2402         RValue<Byte8> operator+(RValue<Byte8> lhs, RValue<Byte8> rhs)
2403         {
2404                 return RValue<Byte8>(Nucleus::createAdd(lhs.value, rhs.value));
2405         }
2406
2407         RValue<Byte8> operator-(RValue<Byte8> lhs, RValue<Byte8> rhs)
2408         {
2409                 return RValue<Byte8>(Nucleus::createSub(lhs.value, rhs.value));
2410         }
2411
2412 //      RValue<Byte8> operator*(RValue<Byte8> lhs, RValue<Byte8> rhs)
2413 //      {
2414 //              return RValue<Byte8>(Nucleus::createMul(lhs.value, rhs.value));
2415 //      }
2416
2417 //      RValue<Byte8> operator/(RValue<Byte8> lhs, RValue<Byte8> rhs)
2418 //      {
2419 //              return RValue<Byte8>(Nucleus::createUDiv(lhs.value, rhs.value));
2420 //      }
2421
2422 //      RValue<Byte8> operator%(RValue<Byte8> lhs, RValue<Byte8> rhs)
2423 //      {
2424 //              return RValue<Byte8>(Nucleus::createURem(lhs.value, rhs.value));
2425 //      }
2426
2427         RValue<Byte8> operator&(RValue<Byte8> lhs, RValue<Byte8> rhs)
2428         {
2429                 return RValue<Byte8>(Nucleus::createAnd(lhs.value, rhs.value));
2430         }
2431
2432         RValue<Byte8> operator|(RValue<Byte8> lhs, RValue<Byte8> rhs)
2433         {
2434                 return RValue<Byte8>(Nucleus::createOr(lhs.value, rhs.value));
2435         }
2436
2437         RValue<Byte8> operator^(RValue<Byte8> lhs, RValue<Byte8> rhs)
2438         {
2439                 return RValue<Byte8>(Nucleus::createXor(lhs.value, rhs.value));
2440         }
2441
2442 //      RValue<Byte8> operator<<(RValue<Byte8> lhs, unsigned char rhs)
2443 //      {
2444 //              return RValue<Byte8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2445 //      }
2446
2447 //      RValue<Byte8> operator>>(RValue<Byte8> lhs, unsigned char rhs)
2448 //      {
2449 //              return RValue<Byte8>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
2450 //      }
2451
2452         RValue<Byte8> operator+=(Byte8 &lhs, RValue<Byte8> rhs)
2453         {
2454                 return lhs = lhs + rhs;
2455         }
2456
2457         RValue<Byte8> operator-=(Byte8 &lhs, RValue<Byte8> rhs)
2458         {
2459                 return lhs = lhs - rhs;
2460         }
2461
2462 //      RValue<Byte8> operator*=(Byte8 &lhs, RValue<Byte8> rhs)
2463 //      {
2464 //              return lhs = lhs * rhs;
2465 //      }
2466
2467 //      RValue<Byte8> operator/=(Byte8 &lhs, RValue<Byte8> rhs)
2468 //      {
2469 //              return lhs = lhs / rhs;
2470 //      }
2471
2472 //      RValue<Byte8> operator%=(Byte8 &lhs, RValue<Byte8> rhs)
2473 //      {
2474 //              return lhs = lhs % rhs;
2475 //      }
2476
2477         RValue<Byte8> operator&=(Byte8 &lhs, RValue<Byte8> rhs)
2478         {
2479                 return lhs = lhs & rhs;
2480         }
2481
2482         RValue<Byte8> operator|=(Byte8 &lhs, RValue<Byte8> rhs)
2483         {
2484                 return lhs = lhs | rhs;
2485         }
2486
2487         RValue<Byte8> operator^=(Byte8 &lhs, RValue<Byte8> rhs)
2488         {
2489                 return lhs = lhs ^ rhs;
2490         }
2491
2492 //      RValue<Byte8> operator<<=(Byte8 &lhs, RValue<Byte8> rhs)
2493 //      {
2494 //              return lhs = lhs << rhs;
2495 //      }
2496
2497 //      RValue<Byte8> operator>>=(Byte8 &lhs, RValue<Byte8> rhs)
2498 //      {
2499 //              return lhs = lhs >> rhs;
2500 //      }
2501
2502 //      RValue<Byte8> operator+(RValue<Byte8> val)
2503 //      {
2504 //              return val;
2505 //      }
2506
2507 //      RValue<Byte8> operator-(RValue<Byte8> val)
2508 //      {
2509 //              return RValue<Byte8>(Nucleus::createNeg(val.value));
2510 //      }
2511
2512         RValue<Byte8> operator~(RValue<Byte8> val)
2513         {
2514                 return RValue<Byte8>(Nucleus::createNot(val.value));
2515         }
2516
2517         RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
2518         {
2519                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
2520                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2521                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2522                 auto paddusb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2523                 paddusb->addArg(x.value);
2524                 paddusb->addArg(y.value);
2525                 ::basicBlock->appendInst(paddusb);
2526
2527                 return RValue<Byte8>(V(result));
2528         }
2529
2530         RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
2531         {
2532                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
2533                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2534                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2535                 auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2536                 psubusw->addArg(x.value);
2537                 psubusw->addArg(y.value);
2538                 ::basicBlock->appendInst(psubusw);
2539
2540                 return RValue<Byte8>(V(result));
2541         }
2542
2543         RValue<Short4> Unpack(RValue<Byte4> x)
2544         {
2545                 int shuffle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7};   // Real type is v16i8
2546                 return RValue<Short4>(Nucleus::createShuffleVector(x.value, x.value, shuffle));
2547         }
2548
2549         RValue<Short4> UnpackLow(RValue<Byte8> x, RValue<Byte8> y)
2550         {
2551                 int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};   // Real type is v16i8
2552                 return RValue<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
2553         }
2554
2555         RValue<Short4> UnpackHigh(RValue<Byte8> x, RValue<Byte8> y)
2556         {
2557                 int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};   // Real type is v16i8
2558                 auto lowHigh = RValue<Byte16>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
2559                 return As<Short4>(Swizzle(As<Int4>(lowHigh), 0xEE));
2560         }
2561
2562         RValue<Int> SignMask(RValue<Byte8> x)
2563         {
2564                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
2565                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2566                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2567                 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
2568                 movmsk->addArg(x.value);
2569                 ::basicBlock->appendInst(movmsk);
2570
2571                 return RValue<Int>(V(result));
2572         }
2573
2574 //      RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
2575 //      {
2576 //              return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Ugt, x.value, y.value));
2577 //      }
2578
2579         RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
2580         {
2581                 return RValue<Byte8>(Nucleus::createICmpEQ(x.value, y.value));
2582         }
2583
2584         Type *Byte8::getType()
2585         {
2586                 return T(Type_v8i8);
2587         }
2588
2589         SByte8::SByte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7)
2590         {
2591                 int64_t constantVector[8] = { x0, x1, x2, x3, x4, x5, x6, x7 };
2592                 Value *vector = V(Nucleus::createConstantVector(constantVector, getType()));
2593
2594                 storeValue(Nucleus::createBitCast(vector, getType()));
2595         }
2596
2597         SByte8::SByte8(RValue<SByte8> rhs)
2598         {
2599                 storeValue(rhs.value);
2600         }
2601
2602         SByte8::SByte8(const SByte8 &rhs)
2603         {
2604                 Value *value = rhs.loadValue();
2605                 storeValue(value);
2606         }
2607
2608         SByte8::SByte8(const Reference<SByte8> &rhs)
2609         {
2610                 Value *value = rhs.loadValue();
2611                 storeValue(value);
2612         }
2613
2614         RValue<SByte8> SByte8::operator=(RValue<SByte8> rhs)
2615         {
2616                 storeValue(rhs.value);
2617
2618                 return rhs;
2619         }
2620
2621         RValue<SByte8> SByte8::operator=(const SByte8 &rhs)
2622         {
2623                 Value *value = rhs.loadValue();
2624                 storeValue(value);
2625
2626                 return RValue<SByte8>(value);
2627         }
2628
2629         RValue<SByte8> SByte8::operator=(const Reference<SByte8> &rhs)
2630         {
2631                 Value *value = rhs.loadValue();
2632                 storeValue(value);
2633
2634                 return RValue<SByte8>(value);
2635         }
2636
2637         RValue<SByte8> operator+(RValue<SByte8> lhs, RValue<SByte8> rhs)
2638         {
2639                 return RValue<SByte8>(Nucleus::createAdd(lhs.value, rhs.value));
2640         }
2641
2642         RValue<SByte8> operator-(RValue<SByte8> lhs, RValue<SByte8> rhs)
2643         {
2644                 return RValue<SByte8>(Nucleus::createSub(lhs.value, rhs.value));
2645         }
2646
2647 //      RValue<SByte8> operator*(RValue<SByte8> lhs, RValue<SByte8> rhs)
2648 //      {
2649 //              return RValue<SByte8>(Nucleus::createMul(lhs.value, rhs.value));
2650 //      }
2651
2652 //      RValue<SByte8> operator/(RValue<SByte8> lhs, RValue<SByte8> rhs)
2653 //      {
2654 //              return RValue<SByte8>(Nucleus::createSDiv(lhs.value, rhs.value));
2655 //      }
2656
2657 //      RValue<SByte8> operator%(RValue<SByte8> lhs, RValue<SByte8> rhs)
2658 //      {
2659 //              return RValue<SByte8>(Nucleus::createSRem(lhs.value, rhs.value));
2660 //      }
2661
2662         RValue<SByte8> operator&(RValue<SByte8> lhs, RValue<SByte8> rhs)
2663         {
2664                 return RValue<SByte8>(Nucleus::createAnd(lhs.value, rhs.value));
2665         }
2666
2667         RValue<SByte8> operator|(RValue<SByte8> lhs, RValue<SByte8> rhs)
2668         {
2669                 return RValue<SByte8>(Nucleus::createOr(lhs.value, rhs.value));
2670         }
2671
2672         RValue<SByte8> operator^(RValue<SByte8> lhs, RValue<SByte8> rhs)
2673         {
2674                 return RValue<SByte8>(Nucleus::createXor(lhs.value, rhs.value));
2675         }
2676
2677 //      RValue<SByte8> operator<<(RValue<SByte8> lhs, unsigned char rhs)
2678 //      {
2679 //              return RValue<SByte8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2680 //      }
2681
2682 //      RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2683 //      {
2684 //              return RValue<SByte8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
2685 //      }
2686
2687         RValue<SByte8> operator+=(SByte8 &lhs, RValue<SByte8> rhs)
2688         {
2689                 return lhs = lhs + rhs;
2690         }
2691
2692         RValue<SByte8> operator-=(SByte8 &lhs, RValue<SByte8> rhs)
2693         {
2694                 return lhs = lhs - rhs;
2695         }
2696
2697 //      RValue<SByte8> operator*=(SByte8 &lhs, RValue<SByte8> rhs)
2698 //      {
2699 //              return lhs = lhs * rhs;
2700 //      }
2701
2702 //      RValue<SByte8> operator/=(SByte8 &lhs, RValue<SByte8> rhs)
2703 //      {
2704 //              return lhs = lhs / rhs;
2705 //      }
2706
2707 //      RValue<SByte8> operator%=(SByte8 &lhs, RValue<SByte8> rhs)
2708 //      {
2709 //              return lhs = lhs % rhs;
2710 //      }
2711
2712         RValue<SByte8> operator&=(SByte8 &lhs, RValue<SByte8> rhs)
2713         {
2714                 return lhs = lhs & rhs;
2715         }
2716
2717         RValue<SByte8> operator|=(SByte8 &lhs, RValue<SByte8> rhs)
2718         {
2719                 return lhs = lhs | rhs;
2720         }
2721
2722         RValue<SByte8> operator^=(SByte8 &lhs, RValue<SByte8> rhs)
2723         {
2724                 return lhs = lhs ^ rhs;
2725         }
2726
2727 //      RValue<SByte8> operator<<=(SByte8 &lhs, RValue<SByte8> rhs)
2728 //      {
2729 //              return lhs = lhs << rhs;
2730 //      }
2731
2732 //      RValue<SByte8> operator>>=(SByte8 &lhs, RValue<SByte8> rhs)
2733 //      {
2734 //              return lhs = lhs >> rhs;
2735 //      }
2736
2737 //      RValue<SByte8> operator+(RValue<SByte8> val)
2738 //      {
2739 //              return val;
2740 //      }
2741
2742 //      RValue<SByte8> operator-(RValue<SByte8> val)
2743 //      {
2744 //              return RValue<SByte8>(Nucleus::createNeg(val.value));
2745 //      }
2746
2747         RValue<SByte8> operator~(RValue<SByte8> val)
2748         {
2749                 return RValue<SByte8>(Nucleus::createNot(val.value));
2750         }
2751
2752         RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
2753         {
2754                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
2755                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2756                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2757                 auto paddsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2758                 paddsb->addArg(x.value);
2759                 paddsb->addArg(y.value);
2760                 ::basicBlock->appendInst(paddsb);
2761
2762                 return RValue<SByte8>(V(result));
2763         }
2764
2765         RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
2766         {
2767                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
2768                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2769                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2770                 auto psubsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2771                 psubsb->addArg(x.value);
2772                 psubsb->addArg(y.value);
2773                 ::basicBlock->appendInst(psubsb);
2774
2775                 return RValue<SByte8>(V(result));
2776         }
2777
2778         RValue<Short4> UnpackLow(RValue<SByte8> x, RValue<SByte8> y)
2779         {
2780                 int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};   // Real type is v16i8
2781                 return RValue<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
2782         }
2783
2784         RValue<Short4> UnpackHigh(RValue<SByte8> x, RValue<SByte8> y)
2785         {
2786                 int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};   // Real type is v16i8
2787                 auto lowHigh = RValue<Byte16>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
2788                 return As<Short4>(Swizzle(As<Int4>(lowHigh), 0xEE));
2789         }
2790
2791         RValue<Int> SignMask(RValue<SByte8> x)
2792         {
2793                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
2794                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2795                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2796                 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
2797                 movmsk->addArg(x.value);
2798                 ::basicBlock->appendInst(movmsk);
2799
2800                 return RValue<Int>(V(result));
2801         }
2802
2803         RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
2804         {
2805                 return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Sgt, x.value, y.value));
2806         }
2807
2808         RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
2809         {
2810                 return RValue<Byte8>(Nucleus::createICmpEQ(x.value, y.value));
2811         }
2812
2813         Type *SByte8::getType()
2814         {
2815                 return T(Type_v8i8);
2816         }
2817
2818         Byte16::Byte16(RValue<Byte16> rhs)
2819         {
2820                 storeValue(rhs.value);
2821         }
2822
2823         Byte16::Byte16(const Byte16 &rhs)
2824         {
2825                 Value *value = rhs.loadValue();
2826                 storeValue(value);
2827         }
2828
2829         Byte16::Byte16(const Reference<Byte16> &rhs)
2830         {
2831                 Value *value = rhs.loadValue();
2832                 storeValue(value);
2833         }
2834
2835         RValue<Byte16> Byte16::operator=(RValue<Byte16> rhs)
2836         {
2837                 storeValue(rhs.value);
2838
2839                 return rhs;
2840         }
2841
2842         RValue<Byte16> Byte16::operator=(const Byte16 &rhs)
2843         {
2844                 Value *value = rhs.loadValue();
2845                 storeValue(value);
2846
2847                 return RValue<Byte16>(value);
2848         }
2849
2850         RValue<Byte16> Byte16::operator=(const Reference<Byte16> &rhs)
2851         {
2852                 Value *value = rhs.loadValue();
2853                 storeValue(value);
2854
2855                 return RValue<Byte16>(value);
2856         }
2857
2858         Type *Byte16::getType()
2859         {
2860                 return T(Ice::IceType_v16i8);
2861         }
2862
2863         Type *SByte16::getType()
2864         {
2865                 return T(Ice::IceType_v16i8);
2866         }
2867
2868         Short2::Short2(RValue<Short4> cast)
2869         {
2870                 storeValue(Nucleus::createBitCast(cast.value, getType()));
2871         }
2872
2873         Type *Short2::getType()
2874         {
2875                 return T(Type_v2i16);
2876         }
2877
2878         UShort2::UShort2(RValue<UShort4> cast)
2879         {
2880                 storeValue(Nucleus::createBitCast(cast.value, getType()));
2881         }
2882
2883         Type *UShort2::getType()
2884         {
2885                 return T(Type_v2i16);
2886         }
2887
2888         Short4::Short4(RValue<Int> cast)
2889         {
2890                 Value *vector = loadValue();
2891                 Value *element = Nucleus::createTrunc(cast.value, Short::getType());
2892                 Value *insert = Nucleus::createInsertElement(vector, element, 0);
2893                 Value *swizzle = Swizzle(RValue<Short4>(insert), 0x00).value;
2894
2895                 storeValue(swizzle);
2896         }
2897
2898         Short4::Short4(RValue<Int4> cast)
2899         {
2900                 int pshufb[16] = {0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13};
2901                 Value *byte16 = Nucleus::createBitCast(cast.value, Byte16::getType());
2902                 Value *packed = Nucleus::createShuffleVector(byte16, byte16, pshufb);
2903
2904                 Value *int2 = RValue<Int2>(Int2(RValue<Int4>(packed))).value;
2905                 Value *short4 = Nucleus::createBitCast(int2, Short4::getType());
2906
2907                 storeValue(short4);
2908         }
2909
2910 //      Short4::Short4(RValue<Float> cast)
2911 //      {
2912 //      }
2913
2914         Short4::Short4(RValue<Float4> cast)
2915         {
2916                 assert(false && "UNIMPLEMENTED");
2917         }
2918
2919         Short4::Short4(short xyzw)
2920         {
2921                 int64_t constantVector[4] = {xyzw, xyzw, xyzw, xyzw};
2922                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
2923         }
2924
2925         Short4::Short4(short x, short y, short z, short w)
2926         {
2927                 int64_t constantVector[4] = {x, y, z, w};
2928                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
2929         }
2930
2931         Short4::Short4(RValue<Short4> rhs)
2932         {
2933                 storeValue(rhs.value);
2934         }
2935
2936         Short4::Short4(const Short4 &rhs)
2937         {
2938                 Value *value = rhs.loadValue();
2939                 storeValue(value);
2940         }
2941
2942         Short4::Short4(const Reference<Short4> &rhs)
2943         {
2944                 Value *value = rhs.loadValue();
2945                 storeValue(value);
2946         }
2947
2948         Short4::Short4(RValue<UShort4> rhs)
2949         {
2950                 storeValue(rhs.value);
2951         }
2952
2953         Short4::Short4(const UShort4 &rhs)
2954         {
2955                 storeValue(rhs.loadValue());
2956         }
2957
2958         Short4::Short4(const Reference<UShort4> &rhs)
2959         {
2960                 storeValue(rhs.loadValue());
2961         }
2962
2963         RValue<Short4> Short4::operator=(RValue<Short4> rhs)
2964         {
2965                 storeValue(rhs.value);
2966
2967                 return rhs;
2968         }
2969
2970         RValue<Short4> Short4::operator=(const Short4 &rhs)
2971         {
2972                 Value *value = rhs.loadValue();
2973                 storeValue(value);
2974
2975                 return RValue<Short4>(value);
2976         }
2977
2978         RValue<Short4> Short4::operator=(const Reference<Short4> &rhs)
2979         {
2980                 Value *value = rhs.loadValue();
2981                 storeValue(value);
2982
2983                 return RValue<Short4>(value);
2984         }
2985
2986         RValue<Short4> Short4::operator=(RValue<UShort4> rhs)
2987         {
2988                 storeValue(rhs.value);
2989
2990                 return RValue<Short4>(rhs);
2991         }
2992
2993         RValue<Short4> Short4::operator=(const UShort4 &rhs)
2994         {
2995                 Value *value = rhs.loadValue();
2996                 storeValue(value);
2997
2998                 return RValue<Short4>(value);
2999         }
3000
3001         RValue<Short4> Short4::operator=(const Reference<UShort4> &rhs)
3002         {
3003                 Value *value = rhs.loadValue();
3004                 storeValue(value);
3005
3006                 return RValue<Short4>(value);
3007         }
3008
3009         RValue<Short4> operator+(RValue<Short4> lhs, RValue<Short4> rhs)
3010         {
3011                 return RValue<Short4>(Nucleus::createAdd(lhs.value, rhs.value));
3012         }
3013
3014         RValue<Short4> operator-(RValue<Short4> lhs, RValue<Short4> rhs)
3015         {
3016                 return RValue<Short4>(Nucleus::createSub(lhs.value, rhs.value));
3017         }
3018
3019         RValue<Short4> operator*(RValue<Short4> lhs, RValue<Short4> rhs)
3020         {
3021                 return RValue<Short4>(Nucleus::createMul(lhs.value, rhs.value));
3022         }
3023
3024 //      RValue<Short4> operator/(RValue<Short4> lhs, RValue<Short4> rhs)
3025 //      {
3026 //              return RValue<Short4>(Nucleus::createSDiv(lhs.value, rhs.value));
3027 //      }
3028
3029 //      RValue<Short4> operator%(RValue<Short4> lhs, RValue<Short4> rhs)
3030 //      {
3031 //              return RValue<Short4>(Nucleus::createSRem(lhs.value, rhs.value));
3032 //      }
3033
3034         RValue<Short4> operator&(RValue<Short4> lhs, RValue<Short4> rhs)
3035         {
3036                 return RValue<Short4>(Nucleus::createAnd(lhs.value, rhs.value));
3037         }
3038
3039         RValue<Short4> operator|(RValue<Short4> lhs, RValue<Short4> rhs)
3040         {
3041                 return RValue<Short4>(Nucleus::createOr(lhs.value, rhs.value));
3042         }
3043
3044         RValue<Short4> operator^(RValue<Short4> lhs, RValue<Short4> rhs)
3045         {
3046                 return RValue<Short4>(Nucleus::createXor(lhs.value, rhs.value));
3047         }
3048
3049         RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
3050         {
3051                 return RValue<Short4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
3052         }
3053
3054         RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
3055         {
3056                 return RValue<Short4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
3057         }
3058
3059         RValue<Short4> operator+=(Short4 &lhs, RValue<Short4> rhs)
3060         {
3061                 return lhs = lhs + rhs;
3062         }
3063
3064         RValue<Short4> operator-=(Short4 &lhs, RValue<Short4> rhs)
3065         {
3066                 return lhs = lhs - rhs;
3067         }
3068
3069         RValue<Short4> operator*=(Short4 &lhs, RValue<Short4> rhs)
3070         {
3071                 return lhs = lhs * rhs;
3072         }
3073
3074 //      RValue<Short4> operator/=(Short4 &lhs, RValue<Short4> rhs)
3075 //      {
3076 //              return lhs = lhs / rhs;
3077 //      }
3078
3079 //      RValue<Short4> operator%=(Short4 &lhs, RValue<Short4> rhs)
3080 //      {
3081 //              return lhs = lhs % rhs;
3082 //      }
3083
3084         RValue<Short4> operator&=(Short4 &lhs, RValue<Short4> rhs)
3085         {
3086                 return lhs = lhs & rhs;
3087         }
3088
3089         RValue<Short4> operator|=(Short4 &lhs, RValue<Short4> rhs)
3090         {
3091                 return lhs = lhs | rhs;
3092         }
3093
3094         RValue<Short4> operator^=(Short4 &lhs, RValue<Short4> rhs)
3095         {
3096                 return lhs = lhs ^ rhs;
3097         }
3098
3099         RValue<Short4> operator<<=(Short4 &lhs, unsigned char rhs)
3100         {
3101                 return lhs = lhs << rhs;
3102         }
3103
3104         RValue<Short4> operator>>=(Short4 &lhs, unsigned char rhs)
3105         {
3106                 return lhs = lhs >> rhs;
3107         }
3108
3109 //      RValue<Short4> operator+(RValue<Short4> val)
3110 //      {
3111 //              return val;
3112 //      }
3113
3114         RValue<Short4> operator-(RValue<Short4> val)
3115         {
3116                 return RValue<Short4>(Nucleus::createNeg(val.value));
3117         }
3118
3119         RValue<Short4> operator~(RValue<Short4> val)
3120         {
3121                 return RValue<Short4>(Nucleus::createNot(val.value));
3122         }
3123
3124         RValue<Short4> RoundShort4(RValue<Float4> cast)
3125         {
3126                 RValue<Int4> int4 = RoundInt(cast);
3127                 return As<Short4>(Pack(int4, int4));
3128         }
3129
3130         RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
3131         {
3132                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
3133                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value, y.value);
3134                 ::basicBlock->appendInst(cmp);
3135
3136                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3137                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3138                 ::basicBlock->appendInst(select);
3139
3140                 return RValue<Short4>(V(result));
3141         }
3142
3143         RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
3144         {
3145                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
3146                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value, y.value);
3147                 ::basicBlock->appendInst(cmp);
3148
3149                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3150                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3151                 ::basicBlock->appendInst(select);
3152
3153                 return RValue<Short4>(V(result));
3154         }
3155
3156         RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
3157         {
3158                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3159                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3160                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3161                 auto paddsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3162                 paddsw->addArg(x.value);
3163                 paddsw->addArg(y.value);
3164                 ::basicBlock->appendInst(paddsw);
3165
3166                 return RValue<Short4>(V(result));
3167         }
3168
3169         RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
3170         {
3171                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3172                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3173                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3174                 auto psubsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3175                 psubsw->addArg(x.value);
3176                 psubsw->addArg(y.value);
3177                 ::basicBlock->appendInst(psubsw);
3178
3179                 return RValue<Short4>(V(result));
3180         }
3181
3182         RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
3183         {
3184                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3185                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::MultiplyHighSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3186                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3187                 auto pmulhw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3188                 pmulhw->addArg(x.value);
3189                 pmulhw->addArg(y.value);
3190                 ::basicBlock->appendInst(pmulhw);
3191
3192                 return RValue<Short4>(V(result));
3193         }
3194
3195         RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
3196         {
3197                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3198                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::MultiplyAddPairs, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3199                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3200                 auto pmaddwd = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3201                 pmaddwd->addArg(x.value);
3202                 pmaddwd->addArg(y.value);
3203                 ::basicBlock->appendInst(pmaddwd);
3204
3205                 return RValue<Int2>(V(result));
3206         }
3207
3208         RValue<SByte8> Pack(RValue<Short4> x, RValue<Short4> y)
3209         {
3210                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
3211                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3212                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3213                 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3214                 pack->addArg(x.value);
3215                 pack->addArg(y.value);
3216                 ::basicBlock->appendInst(pack);
3217
3218                 return As<SByte8>(Swizzle(As<Int4>(V(result)), 0x88));
3219         }
3220
3221         RValue<Int2> UnpackLow(RValue<Short4> x, RValue<Short4> y)
3222         {
3223                 int shuffle[8] = {0, 8, 1, 9, 2, 10, 3, 11};   // Real type is v8i16
3224                 return RValue<Int2>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
3225         }
3226
3227         RValue<Int2> UnpackHigh(RValue<Short4> x, RValue<Short4> y)
3228         {
3229                 int shuffle[8] = {0, 8, 1, 9, 2, 10, 3, 11};   // Real type is v8i16
3230                 auto lowHigh = RValue<Short8>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
3231                 return As<Int2>(Swizzle(As<Int4>(lowHigh), 0xEE));
3232         }
3233
3234         RValue<Short4> Swizzle(RValue<Short4> x, unsigned char select)
3235         {
3236                 // Real type is v8i16
3237                 int shuffle[8] =
3238                 {
3239                         (select >> 0) & 0x03,
3240                         (select >> 2) & 0x03,
3241                         (select >> 4) & 0x03,
3242                         (select >> 6) & 0x03,
3243                         (select >> 0) & 0x03,
3244                         (select >> 2) & 0x03,
3245                         (select >> 4) & 0x03,
3246                         (select >> 6) & 0x03,
3247                 };
3248
3249                 return RValue<Short4>(Nucleus::createShuffleVector(x.value, x.value, shuffle));
3250         }
3251
3252         RValue<Short4> Insert(RValue<Short4> val, RValue<Short> element, int i)
3253         {
3254                 return RValue<Short4>(Nucleus::createInsertElement(val.value, element.value, i));
3255         }
3256
3257         RValue<Short> Extract(RValue<Short4> val, int i)
3258         {
3259                 return RValue<Short>(Nucleus::createExtractElement(val.value, Short::getType(), i));
3260         }
3261
3262         RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
3263         {
3264                 return RValue<Short4>(createIntCompare(Ice::InstIcmp::Sgt, x.value, y.value));
3265         }
3266
3267         RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
3268         {
3269                 return RValue<Short4>(Nucleus::createICmpEQ(x.value, y.value));
3270         }
3271
3272         Type *Short4::getType()
3273         {
3274                 return T(Type_v4i16);
3275         }
3276
3277         UShort4::UShort4(RValue<Int4> cast)
3278         {
3279                 *this = Short4(cast);
3280         }
3281
3282         UShort4::UShort4(RValue<Float4> cast, bool saturate)
3283         {
3284                 if(saturate)
3285                 {
3286                         if(true)   // SSE 4.1
3287                         {
3288                                 Int4 int4(Min(cast, Float4(0xFFFF)));   // packusdw takes care of 0x0000 saturation
3289                                 *this = As<Short4>(Pack(As<UInt4>(int4), As<UInt4>(int4)));
3290                         }
3291                         else
3292                         {
3293                                 *this = Short4(Int4(Max(Min(cast, Float4(0xFFFF)), Float4(0x0000))));
3294                         }
3295                 }
3296                 else
3297                 {
3298                         *this = Short4(Int4(cast));
3299                 }
3300         }
3301
3302         UShort4::UShort4(unsigned short xyzw)
3303         {
3304                 int64_t constantVector[4] = {xyzw, xyzw, xyzw, xyzw};
3305                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3306         }
3307
3308         UShort4::UShort4(unsigned short x, unsigned short y, unsigned short z, unsigned short w)
3309         {
3310                 int64_t constantVector[4] = {x, y, z, w};
3311                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3312         }
3313
3314         UShort4::UShort4(RValue<UShort4> rhs)
3315         {
3316                 storeValue(rhs.value);
3317         }
3318
3319         UShort4::UShort4(const UShort4 &rhs)
3320         {
3321                 Value *value = rhs.loadValue();
3322                 storeValue(value);
3323         }
3324
3325         UShort4::UShort4(const Reference<UShort4> &rhs)
3326         {
3327                 Value *value = rhs.loadValue();
3328                 storeValue(value);
3329         }
3330
3331         UShort4::UShort4(RValue<Short4> rhs)
3332         {
3333                 storeValue(rhs.value);
3334         }
3335
3336         UShort4::UShort4(const Short4 &rhs)
3337         {
3338                 Value *value = rhs.loadValue();
3339                 storeValue(value);
3340         }
3341
3342         UShort4::UShort4(const Reference<Short4> &rhs)
3343         {
3344                 Value *value = rhs.loadValue();
3345                 storeValue(value);
3346         }
3347
3348         RValue<UShort4> UShort4::operator=(RValue<UShort4> rhs)
3349         {
3350                 storeValue(rhs.value);
3351
3352                 return rhs;
3353         }
3354
3355         RValue<UShort4> UShort4::operator=(const UShort4 &rhs)
3356         {
3357                 Value *value = rhs.loadValue();
3358                 storeValue(value);
3359
3360                 return RValue<UShort4>(value);
3361         }
3362
3363         RValue<UShort4> UShort4::operator=(const Reference<UShort4> &rhs)
3364         {
3365                 Value *value = rhs.loadValue();
3366                 storeValue(value);
3367
3368                 return RValue<UShort4>(value);
3369         }
3370
3371         RValue<UShort4> UShort4::operator=(RValue<Short4> rhs)
3372         {
3373                 storeValue(rhs.value);
3374
3375                 return RValue<UShort4>(rhs);
3376         }
3377
3378         RValue<UShort4> UShort4::operator=(const Short4 &rhs)
3379         {
3380                 Value *value = rhs.loadValue();
3381                 storeValue(value);
3382
3383                 return RValue<UShort4>(value);
3384         }
3385
3386         RValue<UShort4> UShort4::operator=(const Reference<Short4> &rhs)
3387         {
3388                 Value *value = rhs.loadValue();
3389                 storeValue(value);
3390
3391                 return RValue<UShort4>(value);
3392         }
3393
3394         RValue<UShort4> operator+(RValue<UShort4> lhs, RValue<UShort4> rhs)
3395         {
3396                 return RValue<UShort4>(Nucleus::createAdd(lhs.value, rhs.value));
3397         }
3398
3399         RValue<UShort4> operator-(RValue<UShort4> lhs, RValue<UShort4> rhs)
3400         {
3401                 return RValue<UShort4>(Nucleus::createSub(lhs.value, rhs.value));
3402         }
3403
3404         RValue<UShort4> operator*(RValue<UShort4> lhs, RValue<UShort4> rhs)
3405         {
3406                 return RValue<UShort4>(Nucleus::createMul(lhs.value, rhs.value));
3407         }
3408
3409         RValue<UShort4> operator&(RValue<UShort4> lhs, RValue<UShort4> rhs)
3410         {
3411                 return RValue<UShort4>(Nucleus::createAnd(lhs.value, rhs.value));
3412         }
3413
3414         RValue<UShort4> operator|(RValue<UShort4> lhs, RValue<UShort4> rhs)
3415         {
3416                 return RValue<UShort4>(Nucleus::createOr(lhs.value, rhs.value));
3417         }
3418
3419         RValue<UShort4> operator^(RValue<UShort4> lhs, RValue<UShort4> rhs)
3420         {
3421                 return RValue<UShort4>(Nucleus::createXor(lhs.value, rhs.value));
3422         }
3423
3424         RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
3425         {
3426                 return RValue<UShort4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
3427         }
3428
3429         RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
3430         {
3431                 return RValue<UShort4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
3432         }
3433
3434         RValue<UShort4> operator<<=(UShort4 &lhs, unsigned char rhs)
3435         {
3436                 return lhs = lhs << rhs;
3437         }
3438
3439         RValue<UShort4> operator>>=(UShort4 &lhs, unsigned char rhs)
3440         {
3441                 return lhs = lhs >> rhs;
3442         }
3443
3444         RValue<UShort4> operator~(RValue<UShort4> val)
3445         {
3446                 return RValue<UShort4>(Nucleus::createNot(val.value));
3447         }
3448
3449         RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
3450         {
3451                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
3452                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value, y.value);
3453                 ::basicBlock->appendInst(cmp);
3454
3455                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3456                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3457                 ::basicBlock->appendInst(select);
3458
3459                 return RValue<UShort4>(V(result));
3460         }
3461
3462         RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
3463         {
3464                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
3465                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value, y.value);
3466                 ::basicBlock->appendInst(cmp);
3467
3468                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3469                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3470                 ::basicBlock->appendInst(select);
3471
3472                 return RValue<UShort4>(V(result));
3473         }
3474
3475         RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
3476         {
3477                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3478                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3479                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3480                 auto paddusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3481                 paddusw->addArg(x.value);
3482                 paddusw->addArg(y.value);
3483                 ::basicBlock->appendInst(paddusw);
3484
3485                 return RValue<UShort4>(V(result));
3486         }
3487
3488         RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
3489         {
3490                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3491                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3492                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3493                 auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3494                 psubusw->addArg(x.value);
3495                 psubusw->addArg(y.value);
3496                 ::basicBlock->appendInst(psubusw);
3497
3498                 return RValue<UShort4>(V(result));
3499         }
3500
3501         RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
3502         {
3503                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3504                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::MultiplyHighUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3505                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3506                 auto pmulhuw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3507                 pmulhuw->addArg(x.value);
3508                 pmulhuw->addArg(y.value);
3509                 ::basicBlock->appendInst(pmulhuw);
3510
3511                 return RValue<UShort4>(V(result));
3512         }
3513
3514         RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
3515         {
3516                 assert(false && "UNIMPLEMENTED"); return RValue<UShort4>(V(nullptr));
3517         }
3518
3519         RValue<Byte8> Pack(RValue<UShort4> x, RValue<UShort4> y)
3520         {
3521                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
3522                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3523                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3524                 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3525                 pack->addArg(x.value);
3526                 pack->addArg(y.value);
3527                 ::basicBlock->appendInst(pack);
3528
3529                 return As<Byte8>(Swizzle(As<Int4>(V(result)), 0x88));
3530         }
3531
3532         Type *UShort4::getType()
3533         {
3534                 return T(Type_v4i16);
3535         }
3536
3537         Short8::Short8(short c0, short c1, short c2, short c3, short c4, short c5, short c6, short c7)
3538         {
3539                 int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7};
3540                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3541         }
3542
3543         Short8::Short8(RValue<Short8> rhs)
3544         {
3545                 storeValue(rhs.value);
3546         }
3547
3548         Short8::Short8(const Reference<Short8> &rhs)
3549         {
3550                 Value *value = rhs.loadValue();
3551                 storeValue(value);
3552         }
3553
3554         Short8::Short8(RValue<Short4> lo, RValue<Short4> hi)
3555         {
3556                 int shuffle[8] = {0, 1, 2, 3, 8, 9, 10, 11};   // Real type is v8i16
3557                 Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
3558
3559                 storeValue(packed);
3560         }
3561
3562         RValue<Short8> operator+(RValue<Short8> lhs, RValue<Short8> rhs)
3563         {
3564                 return RValue<Short8>(Nucleus::createAdd(lhs.value, rhs.value));
3565         }
3566
3567         RValue<Short8> operator&(RValue<Short8> lhs, RValue<Short8> rhs)
3568         {
3569                 return RValue<Short8>(Nucleus::createAnd(lhs.value, rhs.value));
3570         }
3571
3572         RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
3573         {
3574                 return RValue<Short8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
3575         }
3576
3577         RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
3578         {
3579                 return RValue<Short8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
3580         }
3581
3582         RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
3583         {
3584                 assert(false && "UNIMPLEMENTED"); return RValue<Int4>(V(nullptr));
3585         }
3586
3587         RValue<Int4> Abs(RValue<Int4> x)
3588         {
3589                 auto negative = x >> 31;
3590                 return (x ^ negative) - negative;
3591         }
3592
3593         RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
3594         {
3595                 assert(false && "UNIMPLEMENTED"); return RValue<Short8>(V(nullptr));
3596         }
3597
3598         Type *Short8::getType()
3599         {
3600                 return T(Ice::IceType_v8i16);
3601         }
3602
3603         UShort8::UShort8(unsigned short c0, unsigned short c1, unsigned short c2, unsigned short c3, unsigned short c4, unsigned short c5, unsigned short c6, unsigned short c7)
3604         {
3605                 int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7};
3606                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3607         }
3608
3609         UShort8::UShort8(RValue<UShort8> rhs)
3610         {
3611                 storeValue(rhs.value);
3612         }
3613
3614         UShort8::UShort8(const Reference<UShort8> &rhs)
3615         {
3616                 Value *value = rhs.loadValue();
3617                 storeValue(value);
3618         }
3619
3620         UShort8::UShort8(RValue<UShort4> lo, RValue<UShort4> hi)
3621         {
3622                 int shuffle[8] = {0, 1, 2, 3, 8, 9, 10, 11};   // Real type is v8i16
3623                 Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
3624
3625                 storeValue(packed);
3626         }
3627
3628         RValue<UShort8> UShort8::operator=(RValue<UShort8> rhs)
3629         {
3630                 storeValue(rhs.value);
3631
3632                 return rhs;
3633         }
3634
3635         RValue<UShort8> UShort8::operator=(const UShort8 &rhs)
3636         {
3637                 Value *value = rhs.loadValue();
3638                 storeValue(value);
3639
3640                 return RValue<UShort8>(value);
3641         }
3642
3643         RValue<UShort8> UShort8::operator=(const Reference<UShort8> &rhs)
3644         {
3645                 Value *value = rhs.loadValue();
3646                 storeValue(value);
3647
3648                 return RValue<UShort8>(value);
3649         }
3650
3651         RValue<UShort8> operator&(RValue<UShort8> lhs, RValue<UShort8> rhs)
3652         {
3653                 return RValue<UShort8>(Nucleus::createAnd(lhs.value, rhs.value));
3654         }
3655
3656         RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
3657         {
3658                 return RValue<UShort8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
3659         }
3660
3661         RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
3662         {
3663                 return RValue<UShort8>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
3664         }
3665
3666         RValue<UShort8> operator+(RValue<UShort8> lhs, RValue<UShort8> rhs)
3667         {
3668                 return RValue<UShort8>(Nucleus::createAdd(lhs.value, rhs.value));
3669         }
3670
3671         RValue<UShort8> operator*(RValue<UShort8> lhs, RValue<UShort8> rhs)
3672         {
3673                 return RValue<UShort8>(Nucleus::createMul(lhs.value, rhs.value));
3674         }
3675
3676         RValue<UShort8> operator+=(UShort8 &lhs, RValue<UShort8> rhs)
3677         {
3678                 return lhs = lhs + rhs;
3679         }
3680
3681         RValue<UShort8> operator~(RValue<UShort8> val)
3682         {
3683                 return RValue<UShort8>(Nucleus::createNot(val.value));
3684         }
3685
3686         RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7)
3687         {
3688                 assert(false && "UNIMPLEMENTED"); return RValue<UShort8>(V(nullptr));
3689         }
3690
3691         RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
3692         {
3693                 assert(false && "UNIMPLEMENTED"); return RValue<UShort8>(V(nullptr));
3694         }
3695
3696         // FIXME: Implement as Shuffle(x, y, Select(i0, ..., i16)) and Shuffle(x, y, SELECT_PACK_REPEAT(element))
3697 //      RValue<UShort8> PackRepeat(RValue<Byte16> x, RValue<Byte16> y, int element)
3698 //      {
3699 //              assert(false && "UNIMPLEMENTED"); return RValue<UShort8>(V(nullptr));
3700 //      }
3701
3702         Type *UShort8::getType()
3703         {
3704                 return T(Ice::IceType_v8i16);
3705         }
3706
3707         Int::Int(Argument<Int> argument)
3708         {
3709                 storeValue(argument.value);
3710         }
3711
3712         Int::Int(RValue<Byte> cast)
3713         {
3714                 Value *integer = Nucleus::createZExt(cast.value, Int::getType());
3715
3716                 storeValue(integer);
3717         }
3718
3719         Int::Int(RValue<SByte> cast)
3720         {
3721                 Value *integer = Nucleus::createSExt(cast.value, Int::getType());
3722
3723                 storeValue(integer);
3724         }
3725
3726         Int::Int(RValue<Short> cast)
3727         {
3728                 Value *integer = Nucleus::createSExt(cast.value, Int::getType());
3729
3730                 storeValue(integer);
3731         }
3732
3733         Int::Int(RValue<UShort> cast)
3734         {
3735                 Value *integer = Nucleus::createZExt(cast.value, Int::getType());
3736
3737                 storeValue(integer);
3738         }
3739
3740         Int::Int(RValue<Int2> cast)
3741         {
3742                 *this = Extract(cast, 0);
3743         }
3744
3745         Int::Int(RValue<Long> cast)
3746         {
3747                 Value *integer = Nucleus::createTrunc(cast.value, Int::getType());
3748
3749                 storeValue(integer);
3750         }
3751
3752         Int::Int(RValue<Float> cast)
3753         {
3754                 Value *integer = Nucleus::createFPToSI(cast.value, Int::getType());
3755
3756                 storeValue(integer);
3757         }
3758
3759         Int::Int(int x)
3760         {
3761                 storeValue(Nucleus::createConstantInt(x));
3762         }
3763
3764         Int::Int(RValue<Int> rhs)
3765         {
3766                 storeValue(rhs.value);
3767         }
3768
3769         Int::Int(RValue<UInt> rhs)
3770         {
3771                 storeValue(rhs.value);
3772         }
3773
3774         Int::Int(const Int &rhs)
3775         {
3776                 Value *value = rhs.loadValue();
3777                 storeValue(value);
3778         }
3779
3780         Int::Int(const Reference<Int> &rhs)
3781         {
3782                 Value *value = rhs.loadValue();
3783                 storeValue(value);
3784         }
3785
3786         Int::Int(const UInt &rhs)
3787         {
3788                 Value *value = rhs.loadValue();
3789                 storeValue(value);
3790         }
3791
3792         Int::Int(const Reference<UInt> &rhs)
3793         {
3794                 Value *value = rhs.loadValue();
3795                 storeValue(value);
3796         }
3797
3798         RValue<Int> Int::operator=(int rhs)
3799         {
3800                 return RValue<Int>(storeValue(Nucleus::createConstantInt(rhs)));
3801         }
3802
3803         RValue<Int> Int::operator=(RValue<Int> rhs)
3804         {
3805                 storeValue(rhs.value);
3806
3807                 return rhs;
3808         }
3809
3810         RValue<Int> Int::operator=(RValue<UInt> rhs)
3811         {
3812                 storeValue(rhs.value);
3813
3814                 return RValue<Int>(rhs);
3815         }
3816
3817         RValue<Int> Int::operator=(const Int &rhs)
3818         {
3819                 Value *value = rhs.loadValue();
3820                 storeValue(value);
3821
3822                 return RValue<Int>(value);
3823         }
3824
3825         RValue<Int> Int::operator=(const Reference<Int> &rhs)
3826         {
3827                 Value *value = rhs.loadValue();
3828                 storeValue(value);
3829
3830                 return RValue<Int>(value);
3831         }
3832
3833         RValue<Int> Int::operator=(const UInt &rhs)
3834         {
3835                 Value *value = rhs.loadValue();
3836                 storeValue(value);
3837
3838                 return RValue<Int>(value);
3839         }
3840
3841         RValue<Int> Int::operator=(const Reference<UInt> &rhs)
3842         {
3843                 Value *value = rhs.loadValue();
3844                 storeValue(value);
3845
3846                 return RValue<Int>(value);
3847         }
3848
3849         RValue<Int> operator+(RValue<Int> lhs, RValue<Int> rhs)
3850         {
3851                 return RValue<Int>(Nucleus::createAdd(lhs.value, rhs.value));
3852         }
3853
3854         RValue<Int> operator-(RValue<Int> lhs, RValue<Int> rhs)
3855         {
3856                 return RValue<Int>(Nucleus::createSub(lhs.value, rhs.value));
3857         }
3858
3859         RValue<Int> operator*(RValue<Int> lhs, RValue<Int> rhs)
3860         {
3861                 return RValue<Int>(Nucleus::createMul(lhs.value, rhs.value));
3862         }
3863
3864         RValue<Int> operator/(RValue<Int> lhs, RValue<Int> rhs)
3865         {
3866                 return RValue<Int>(Nucleus::createSDiv(lhs.value, rhs.value));
3867         }
3868
3869         RValue<Int> operator%(RValue<Int> lhs, RValue<Int> rhs)
3870         {
3871                 return RValue<Int>(Nucleus::createSRem(lhs.value, rhs.value));
3872         }
3873
3874         RValue<Int> operator&(RValue<Int> lhs, RValue<Int> rhs)
3875         {
3876                 return RValue<Int>(Nucleus::createAnd(lhs.value, rhs.value));
3877         }
3878
3879         RValue<Int> operator|(RValue<Int> lhs, RValue<Int> rhs)
3880         {
3881                 return RValue<Int>(Nucleus::createOr(lhs.value, rhs.value));
3882         }
3883
3884         RValue<Int> operator^(RValue<Int> lhs, RValue<Int> rhs)
3885         {
3886                 return RValue<Int>(Nucleus::createXor(lhs.value, rhs.value));
3887         }
3888
3889         RValue<Int> operator<<(RValue<Int> lhs, RValue<Int> rhs)
3890         {
3891                 return RValue<Int>(Nucleus::createShl(lhs.value, rhs.value));
3892         }
3893
3894         RValue<Int> operator>>(RValue<Int> lhs, RValue<Int> rhs)
3895         {
3896                 return RValue<Int>(Nucleus::createAShr(lhs.value, rhs.value));
3897         }
3898
3899         RValue<Int> operator+=(Int &lhs, RValue<Int> rhs)
3900         {
3901                 return lhs = lhs + rhs;
3902         }
3903
3904         RValue<Int> operator-=(Int &lhs, RValue<Int> rhs)
3905         {
3906                 return lhs = lhs - rhs;
3907         }
3908
3909         RValue<Int> operator*=(Int &lhs, RValue<Int> rhs)
3910         {
3911                 return lhs = lhs * rhs;
3912         }
3913
3914         RValue<Int> operator/=(Int &lhs, RValue<Int> rhs)
3915         {
3916                 return lhs = lhs / rhs;
3917         }
3918
3919         RValue<Int> operator%=(Int &lhs, RValue<Int> rhs)
3920         {
3921                 return lhs = lhs % rhs;
3922         }
3923
3924         RValue<Int> operator&=(Int &lhs, RValue<Int> rhs)
3925         {
3926                 return lhs = lhs & rhs;
3927         }
3928
3929         RValue<Int> operator|=(Int &lhs, RValue<Int> rhs)
3930         {
3931                 return lhs = lhs | rhs;
3932         }
3933
3934         RValue<Int> operator^=(Int &lhs, RValue<Int> rhs)
3935         {
3936                 return lhs = lhs ^ rhs;
3937         }
3938
3939         RValue<Int> operator<<=(Int &lhs, RValue<Int> rhs)
3940         {
3941                 return lhs = lhs << rhs;
3942         }
3943
3944         RValue<Int> operator>>=(Int &lhs, RValue<Int> rhs)
3945         {
3946                 return lhs = lhs >> rhs;
3947         }
3948
3949         RValue<Int> operator+(RValue<Int> val)
3950         {
3951                 return val;
3952         }
3953
3954         RValue<Int> operator-(RValue<Int> val)
3955         {
3956                 return RValue<Int>(Nucleus::createNeg(val.value));
3957         }
3958
3959         RValue<Int> operator~(RValue<Int> val)
3960         {
3961                 return RValue<Int>(Nucleus::createNot(val.value));
3962         }
3963
3964         RValue<Int> operator++(Int &val, int)   // Post-increment
3965         {
3966                 RValue<Int> res = val;
3967                 val += 1;
3968                 return res;
3969         }
3970
3971         const Int &operator++(Int &val)   // Pre-increment
3972         {
3973                 val += 1;
3974                 return val;
3975         }
3976
3977         RValue<Int> operator--(Int &val, int)   // Post-decrement
3978         {
3979                 RValue<Int> res = val;
3980                 val -= 1;
3981                 return res;
3982         }
3983
3984         const Int &operator--(Int &val)   // Pre-decrement
3985         {
3986                 val -= 1;
3987                 return val;
3988         }
3989
3990         RValue<Bool> operator<(RValue<Int> lhs, RValue<Int> rhs)
3991         {
3992                 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
3993         }
3994
3995         RValue<Bool> operator<=(RValue<Int> lhs, RValue<Int> rhs)
3996         {
3997                 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
3998         }
3999
4000         RValue<Bool> operator>(RValue<Int> lhs, RValue<Int> rhs)
4001         {
4002                 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
4003         }
4004
4005         RValue<Bool> operator>=(RValue<Int> lhs, RValue<Int> rhs)
4006         {
4007                 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
4008         }
4009
4010         RValue<Bool> operator!=(RValue<Int> lhs, RValue<Int> rhs)
4011         {
4012                 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
4013         }
4014
4015         RValue<Bool> operator==(RValue<Int> lhs, RValue<Int> rhs)
4016         {
4017                 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
4018         }
4019
4020         RValue<Int> Max(RValue<Int> x, RValue<Int> y)
4021         {
4022                 return IfThenElse(x > y, x, y);
4023         }
4024
4025         RValue<Int> Min(RValue<Int> x, RValue<Int> y)
4026         {
4027                 return IfThenElse(x < y, x, y);
4028         }
4029
4030         RValue<Int> Clamp(RValue<Int> x, RValue<Int> min, RValue<Int> max)
4031         {
4032                 return Min(Max(x, min), max);
4033         }
4034
4035         RValue<Int> RoundInt(RValue<Float> cast)
4036         {
4037                 RValue<Float> rounded = Round(cast);
4038
4039                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
4040                 auto round = Ice::InstCast::create(::function, Ice::InstCast::Fptosi, result, rounded.value);
4041                 ::basicBlock->appendInst(round);
4042
4043                 return RValue<Int>(V(result));
4044         }
4045
4046         Type *Int::getType()
4047         {
4048                 return T(Ice::IceType_i32);
4049         }
4050
4051         Long::Long(RValue<Int> cast)
4052         {
4053                 Value *integer = Nucleus::createSExt(cast.value, Long::getType());
4054
4055                 storeValue(integer);
4056         }
4057
4058         Long::Long(RValue<UInt> cast)
4059         {
4060                 Value *integer = Nucleus::createZExt(cast.value, Long::getType());
4061
4062                 storeValue(integer);
4063         }
4064
4065         Long::Long(RValue<Long> rhs)
4066         {
4067                 storeValue(rhs.value);
4068         }
4069
4070         RValue<Long> Long::operator=(int64_t rhs)
4071         {
4072                 return RValue<Long>(storeValue(Nucleus::createConstantLong(rhs)));
4073         }
4074
4075         RValue<Long> Long::operator=(RValue<Long> rhs)
4076         {
4077                 storeValue(rhs.value);
4078
4079                 return rhs;
4080         }
4081
4082         RValue<Long> Long::operator=(const Long &rhs)
4083         {
4084                 Value *value = rhs.loadValue();
4085                 storeValue(value);
4086
4087                 return RValue<Long>(value);
4088         }
4089
4090         RValue<Long> Long::operator=(const Reference<Long> &rhs)
4091         {
4092                 Value *value = rhs.loadValue();
4093                 storeValue(value);
4094
4095                 return RValue<Long>(value);
4096         }
4097
4098         RValue<Long> operator+(RValue<Long> lhs, RValue<Long> rhs)
4099         {
4100                 return RValue<Long>(Nucleus::createAdd(lhs.value, rhs.value));
4101         }
4102
4103         RValue<Long> operator-(RValue<Long> lhs, RValue<Long> rhs)
4104         {
4105                 return RValue<Long>(Nucleus::createSub(lhs.value, rhs.value));
4106         }
4107
4108         RValue<Long> operator+=(Long &lhs, RValue<Long> rhs)
4109         {
4110                 return lhs = lhs + rhs;
4111         }
4112
4113         RValue<Long> operator-=(Long &lhs, RValue<Long> rhs)
4114         {
4115                 return lhs = lhs - rhs;
4116         }
4117
4118         RValue<Long> AddAtomic(RValue<Pointer<Long> > x, RValue<Long> y)
4119         {
4120                 return RValue<Long>(Nucleus::createAtomicAdd(x.value, y.value));
4121         }
4122
4123         Type *Long::getType()
4124         {
4125                 return T(Ice::IceType_i64);
4126         }
4127
4128         UInt::UInt(Argument<UInt> argument)
4129         {
4130                 storeValue(argument.value);
4131         }
4132
4133         UInt::UInt(RValue<UShort> cast)
4134         {
4135                 Value *integer = Nucleus::createZExt(cast.value, UInt::getType());
4136
4137                 storeValue(integer);
4138         }
4139
4140         UInt::UInt(RValue<Long> cast)
4141         {
4142                 Value *integer = Nucleus::createTrunc(cast.value, UInt::getType());
4143
4144                 storeValue(integer);
4145         }
4146
4147         UInt::UInt(RValue<Float> cast)
4148         {
4149                 // Smallest positive value representable in UInt, but not in Int
4150                 const unsigned int ustart = 0x80000000u;
4151                 const float ustartf = float(ustart);
4152
4153                 // If the value is negative, store 0, otherwise store the result of the conversion
4154                 storeValue((~(As<Int>(cast) >> 31) &
4155                 // Check if the value can be represented as an Int
4156                         IfThenElse(cast >= ustartf,
4157                 // If the value is too large, subtract ustart and re-add it after conversion.
4158                                 As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)),
4159                 // Otherwise, just convert normally
4160                                 Int(cast))).value);
4161         }
4162
4163         UInt::UInt(int x)
4164         {
4165                 storeValue(Nucleus::createConstantInt(x));
4166         }
4167
4168         UInt::UInt(unsigned int x)
4169         {
4170                 storeValue(Nucleus::createConstantInt(x));
4171         }
4172
4173         UInt::UInt(RValue<UInt> rhs)
4174         {
4175                 storeValue(rhs.value);
4176         }
4177
4178         UInt::UInt(RValue<Int> rhs)
4179         {
4180                 storeValue(rhs.value);
4181         }
4182
4183         UInt::UInt(const UInt &rhs)
4184         {
4185                 Value *value = rhs.loadValue();
4186                 storeValue(value);
4187         }
4188
4189         UInt::UInt(const Reference<UInt> &rhs)
4190         {
4191                 Value *value = rhs.loadValue();
4192                 storeValue(value);
4193         }
4194
4195         UInt::UInt(const Int &rhs)
4196         {
4197                 Value *value = rhs.loadValue();
4198                 storeValue(value);
4199         }
4200
4201         UInt::UInt(const Reference<Int> &rhs)
4202         {
4203                 Value *value = rhs.loadValue();
4204                 storeValue(value);
4205         }
4206
4207         RValue<UInt> UInt::operator=(unsigned int rhs)
4208         {
4209                 return RValue<UInt>(storeValue(Nucleus::createConstantInt(rhs)));
4210         }
4211
4212         RValue<UInt> UInt::operator=(RValue<UInt> rhs)
4213         {
4214                 storeValue(rhs.value);
4215
4216                 return rhs;
4217         }
4218
4219         RValue<UInt> UInt::operator=(RValue<Int> rhs)
4220         {
4221                 storeValue(rhs.value);
4222
4223                 return RValue<UInt>(rhs);
4224         }
4225
4226         RValue<UInt> UInt::operator=(const UInt &rhs)
4227         {
4228                 Value *value = rhs.loadValue();
4229                 storeValue(value);
4230
4231                 return RValue<UInt>(value);
4232         }
4233
4234         RValue<UInt> UInt::operator=(const Reference<UInt> &rhs)
4235         {
4236                 Value *value = rhs.loadValue();
4237                 storeValue(value);
4238
4239                 return RValue<UInt>(value);
4240         }
4241
4242         RValue<UInt> UInt::operator=(const Int &rhs)
4243         {
4244                 Value *value = rhs.loadValue();
4245                 storeValue(value);
4246
4247                 return RValue<UInt>(value);
4248         }
4249
4250         RValue<UInt> UInt::operator=(const Reference<Int> &rhs)
4251         {
4252                 Value *value = rhs.loadValue();
4253                 storeValue(value);
4254
4255                 return RValue<UInt>(value);
4256         }
4257
4258         RValue<UInt> operator+(RValue<UInt> lhs, RValue<UInt> rhs)
4259         {
4260                 return RValue<UInt>(Nucleus::createAdd(lhs.value, rhs.value));
4261         }
4262
4263         RValue<UInt> operator-(RValue<UInt> lhs, RValue<UInt> rhs)
4264         {
4265                 return RValue<UInt>(Nucleus::createSub(lhs.value, rhs.value));
4266         }
4267
4268         RValue<UInt> operator*(RValue<UInt> lhs, RValue<UInt> rhs)
4269         {
4270                 return RValue<UInt>(Nucleus::createMul(lhs.value, rhs.value));
4271         }
4272
4273         RValue<UInt> operator/(RValue<UInt> lhs, RValue<UInt> rhs)
4274         {
4275                 return RValue<UInt>(Nucleus::createUDiv(lhs.value, rhs.value));
4276         }
4277
4278         RValue<UInt> operator%(RValue<UInt> lhs, RValue<UInt> rhs)
4279         {
4280                 return RValue<UInt>(Nucleus::createURem(lhs.value, rhs.value));
4281         }
4282
4283         RValue<UInt> operator&(RValue<UInt> lhs, RValue<UInt> rhs)
4284         {
4285                 return RValue<UInt>(Nucleus::createAnd(lhs.value, rhs.value));
4286         }
4287
4288         RValue<UInt> operator|(RValue<UInt> lhs, RValue<UInt> rhs)
4289         {
4290                 return RValue<UInt>(Nucleus::createOr(lhs.value, rhs.value));
4291         }
4292
4293         RValue<UInt> operator^(RValue<UInt> lhs, RValue<UInt> rhs)
4294         {
4295                 return RValue<UInt>(Nucleus::createXor(lhs.value, rhs.value));
4296         }
4297
4298         RValue<UInt> operator<<(RValue<UInt> lhs, RValue<UInt> rhs)
4299         {
4300                 return RValue<UInt>(Nucleus::createShl(lhs.value, rhs.value));
4301         }
4302
4303         RValue<UInt> operator>>(RValue<UInt> lhs, RValue<UInt> rhs)
4304         {
4305                 return RValue<UInt>(Nucleus::createLShr(lhs.value, rhs.value));
4306         }
4307
4308         RValue<UInt> operator+=(UInt &lhs, RValue<UInt> rhs)
4309         {
4310                 return lhs = lhs + rhs;
4311         }
4312
4313         RValue<UInt> operator-=(UInt &lhs, RValue<UInt> rhs)
4314         {
4315                 return lhs = lhs - rhs;
4316         }
4317
4318         RValue<UInt> operator*=(UInt &lhs, RValue<UInt> rhs)
4319         {
4320                 return lhs = lhs * rhs;
4321         }
4322
4323         RValue<UInt> operator/=(UInt &lhs, RValue<UInt> rhs)
4324         {
4325                 return lhs = lhs / rhs;
4326         }
4327
4328         RValue<UInt> operator%=(UInt &lhs, RValue<UInt> rhs)
4329         {
4330                 return lhs = lhs % rhs;
4331         }
4332
4333         RValue<UInt> operator&=(UInt &lhs, RValue<UInt> rhs)
4334         {
4335                 return lhs = lhs & rhs;
4336         }
4337
4338         RValue<UInt> operator|=(UInt &lhs, RValue<UInt> rhs)
4339         {
4340                 return lhs = lhs | rhs;
4341         }
4342
4343         RValue<UInt> operator^=(UInt &lhs, RValue<UInt> rhs)
4344         {
4345                 return lhs = lhs ^ rhs;
4346         }
4347
4348         RValue<UInt> operator<<=(UInt &lhs, RValue<UInt> rhs)
4349         {
4350                 return lhs = lhs << rhs;
4351         }
4352
4353         RValue<UInt> operator>>=(UInt &lhs, RValue<UInt> rhs)
4354         {
4355                 return lhs = lhs >> rhs;
4356         }
4357
4358         RValue<UInt> operator+(RValue<UInt> val)
4359         {
4360                 return val;
4361         }
4362
4363         RValue<UInt> operator-(RValue<UInt> val)
4364         {
4365                 return RValue<UInt>(Nucleus::createNeg(val.value));
4366         }
4367
4368         RValue<UInt> operator~(RValue<UInt> val)
4369         {
4370                 return RValue<UInt>(Nucleus::createNot(val.value));
4371         }
4372
4373         RValue<UInt> operator++(UInt &val, int)   // Post-increment
4374         {
4375                 RValue<UInt> res = val;
4376                 val += 1;
4377                 return res;
4378         }
4379
4380         const UInt &operator++(UInt &val)   // Pre-increment
4381         {
4382                 val += 1;
4383                 return val;
4384         }
4385
4386         RValue<UInt> operator--(UInt &val, int)   // Post-decrement
4387         {
4388                 RValue<UInt> res = val;
4389                 val -= 1;
4390                 return res;
4391         }
4392
4393         const UInt &operator--(UInt &val)   // Pre-decrement
4394         {
4395                 val -= 1;
4396                 return val;
4397         }
4398
4399         RValue<UInt> Max(RValue<UInt> x, RValue<UInt> y)
4400         {
4401                 return IfThenElse(x > y, x, y);
4402         }
4403
4404         RValue<UInt> Min(RValue<UInt> x, RValue<UInt> y)
4405         {
4406                 return IfThenElse(x < y, x, y);
4407         }
4408
4409         RValue<UInt> Clamp(RValue<UInt> x, RValue<UInt> min, RValue<UInt> max)
4410         {
4411                 return Min(Max(x, min), max);
4412         }
4413
4414         RValue<Bool> operator<(RValue<UInt> lhs, RValue<UInt> rhs)
4415         {
4416                 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
4417         }
4418
4419         RValue<Bool> operator<=(RValue<UInt> lhs, RValue<UInt> rhs)
4420         {
4421                 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
4422         }
4423
4424         RValue<Bool> operator>(RValue<UInt> lhs, RValue<UInt> rhs)
4425         {
4426                 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
4427         }
4428
4429         RValue<Bool> operator>=(RValue<UInt> lhs, RValue<UInt> rhs)
4430         {
4431                 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
4432         }
4433
4434         RValue<Bool> operator!=(RValue<UInt> lhs, RValue<UInt> rhs)
4435         {
4436                 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
4437         }
4438
4439         RValue<Bool> operator==(RValue<UInt> lhs, RValue<UInt> rhs)
4440         {
4441                 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
4442         }
4443
4444 //      RValue<UInt> RoundUInt(RValue<Float> cast)
4445 //      {
4446 //              assert(false && "UNIMPLEMENTED"); return RValue<UInt>(V(nullptr));
4447 //      }
4448
4449         Type *UInt::getType()
4450         {
4451                 return T(Ice::IceType_i32);
4452         }
4453
4454 //      Int2::Int2(RValue<Int> cast)
4455 //      {
4456 //              Value *extend = Nucleus::createZExt(cast.value, Long::getType());
4457 //              Value *vector = Nucleus::createBitCast(extend, Int2::getType());
4458 //
4459 //              Constant *shuffle[2];
4460 //              shuffle[0] = Nucleus::createConstantInt(0);
4461 //              shuffle[1] = Nucleus::createConstantInt(0);
4462 //
4463 //              Value *replicate = Nucleus::createShuffleVector(vector, UndefValue::get(Int2::getType()), Nucleus::createConstantVector(shuffle, 2));
4464 //
4465 //              storeValue(replicate);
4466 //      }
4467
4468         Int2::Int2(RValue<Int4> cast)
4469         {
4470                 storeValue(Nucleus::createBitCast(cast.value, getType()));
4471         }
4472
4473         Int2::Int2(int x, int y)
4474         {
4475                 int64_t constantVector[2] = {x, y};
4476                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
4477         }
4478
4479         Int2::Int2(RValue<Int2> rhs)
4480         {
4481                 storeValue(rhs.value);
4482         }
4483
4484         Int2::Int2(const Int2 &rhs)
4485         {
4486                 Value *value = rhs.loadValue();
4487                 storeValue(value);
4488         }
4489
4490         Int2::Int2(const Reference<Int2> &rhs)
4491         {
4492                 Value *value = rhs.loadValue();
4493                 storeValue(value);
4494         }
4495
4496         Int2::Int2(RValue<Int> lo, RValue<Int> hi)
4497         {
4498                 int shuffle[4] = {0, 4, 1, 5};
4499                 Value *packed = Nucleus::createShuffleVector(Int4(lo).loadValue(), Int4(hi).loadValue(), shuffle);
4500
4501                 storeValue(Nucleus::createBitCast(packed, Int2::getType()));
4502         }
4503
4504         RValue<Int2> Int2::operator=(RValue<Int2> rhs)
4505         {
4506                 storeValue(rhs.value);
4507
4508                 return rhs;
4509         }
4510
4511         RValue<Int2> Int2::operator=(const Int2 &rhs)
4512         {
4513                 Value *value = rhs.loadValue();
4514                 storeValue(value);
4515
4516                 return RValue<Int2>(value);
4517         }
4518
4519         RValue<Int2> Int2::operator=(const Reference<Int2> &rhs)
4520         {
4521                 Value *value = rhs.loadValue();
4522                 storeValue(value);
4523
4524                 return RValue<Int2>(value);
4525         }
4526
4527         RValue<Int2> operator+(RValue<Int2> lhs, RValue<Int2> rhs)
4528         {
4529                 return RValue<Int2>(Nucleus::createAdd(lhs.value, rhs.value));
4530         }
4531
4532         RValue<Int2> operator-(RValue<Int2> lhs, RValue<Int2> rhs)
4533         {
4534                 return RValue<Int2>(Nucleus::createSub(lhs.value, rhs.value));
4535         }
4536
4537 //      RValue<Int2> operator*(RValue<Int2> lhs, RValue<Int2> rhs)
4538 //      {
4539 //              return RValue<Int2>(Nucleus::createMul(lhs.value, rhs.value));
4540 //      }
4541
4542 //      RValue<Int2> operator/(RValue<Int2> lhs, RValue<Int2> rhs)
4543 //      {
4544 //              return RValue<Int2>(Nucleus::createSDiv(lhs.value, rhs.value));
4545 //      }
4546
4547 //      RValue<Int2> operator%(RValue<Int2> lhs, RValue<Int2> rhs)
4548 //      {
4549 //              return RValue<Int2>(Nucleus::createSRem(lhs.value, rhs.value));
4550 //      }
4551
4552         RValue<Int2> operator&(RValue<Int2> lhs, RValue<Int2> rhs)
4553         {
4554                 return RValue<Int2>(Nucleus::createAnd(lhs.value, rhs.value));
4555         }
4556
4557         RValue<Int2> operator|(RValue<Int2> lhs, RValue<Int2> rhs)
4558         {
4559                 return RValue<Int2>(Nucleus::createOr(lhs.value, rhs.value));
4560         }
4561
4562         RValue<Int2> operator^(RValue<Int2> lhs, RValue<Int2> rhs)
4563         {
4564                 return RValue<Int2>(Nucleus::createXor(lhs.value, rhs.value));
4565         }
4566
4567         RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
4568         {
4569                 return RValue<Int2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
4570         }
4571
4572         RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
4573         {
4574                 return RValue<Int2>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
4575         }
4576
4577         RValue<Int2> operator+=(Int2 &lhs, RValue<Int2> rhs)
4578         {
4579                 return lhs = lhs + rhs;
4580         }
4581
4582         RValue<Int2> operator-=(Int2 &lhs, RValue<Int2> rhs)
4583         {
4584                 return lhs = lhs - rhs;
4585         }
4586
4587 //      RValue<Int2> operator*=(Int2 &lhs, RValue<Int2> rhs)
4588 //      {
4589 //              return lhs = lhs * rhs;
4590 //      }
4591
4592 //      RValue<Int2> operator/=(Int2 &lhs, RValue<Int2> rhs)
4593 //      {
4594 //              return lhs = lhs / rhs;
4595 //      }
4596
4597 //      RValue<Int2> operator%=(Int2 &lhs, RValue<Int2> rhs)
4598 //      {
4599 //              return lhs = lhs % rhs;
4600 //      }
4601
4602         RValue<Int2> operator&=(Int2 &lhs, RValue<Int2> rhs)
4603         {
4604                 return lhs = lhs & rhs;
4605         }
4606
4607         RValue<Int2> operator|=(Int2 &lhs, RValue<Int2> rhs)
4608         {
4609                 return lhs = lhs | rhs;
4610         }
4611
4612         RValue<Int2> operator^=(Int2 &lhs, RValue<Int2> rhs)
4613         {
4614                 return lhs = lhs ^ rhs;
4615         }
4616
4617         RValue<Int2> operator<<=(Int2 &lhs, unsigned char rhs)
4618         {
4619                 return lhs = lhs << rhs;
4620         }
4621
4622         RValue<Int2> operator>>=(Int2 &lhs, unsigned char rhs)
4623         {
4624                 return lhs = lhs >> rhs;
4625         }
4626
4627 //      RValue<Int2> operator+(RValue<Int2> val)
4628 //      {
4629 //              return val;
4630 //      }
4631
4632 //      RValue<Int2> operator-(RValue<Int2> val)
4633 //      {
4634 //              return RValue<Int2>(Nucleus::createNeg(val.value));
4635 //      }
4636
4637         RValue<Int2> operator~(RValue<Int2> val)
4638         {
4639                 return RValue<Int2>(Nucleus::createNot(val.value));
4640         }
4641
4642         RValue<Short4> UnpackLow(RValue<Int2> x, RValue<Int2> y)
4643         {
4644                 int shuffle[4] = {0, 4, 1, 5};   // Real type is v4i32
4645                 return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
4646         }
4647
4648         RValue<Short4> UnpackHigh(RValue<Int2> x, RValue<Int2> y)
4649         {
4650                 int shuffle[16] = {0, 4, 1, 5};   // Real type is v4i32
4651                 auto lowHigh = RValue<Int4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
4652                 return As<Short4>(Swizzle(lowHigh, 0xEE));
4653         }
4654
4655         RValue<Int> Extract(RValue<Int2> val, int i)
4656         {
4657                 return RValue<Int>(Nucleus::createExtractElement(val.value, Int::getType(), i));
4658         }
4659
4660         RValue<Int2> Insert(RValue<Int2> val, RValue<Int> element, int i)
4661         {
4662                 return RValue<Int2>(Nucleus::createInsertElement(val.value, element.value, i));
4663         }
4664
4665         Type *Int2::getType()
4666         {
4667                 return T(Type_v2i32);
4668         }
4669
4670         UInt2::UInt2(unsigned int x, unsigned int y)
4671         {
4672                 int64_t constantVector[2] = {x, y};
4673                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
4674         }
4675
4676         UInt2::UInt2(RValue<UInt2> rhs)
4677         {
4678                 storeValue(rhs.value);
4679         }
4680
4681         UInt2::UInt2(const UInt2 &rhs)
4682         {
4683                 Value *value = rhs.loadValue();
4684                 storeValue(value);
4685         }
4686
4687         UInt2::UInt2(const Reference<UInt2> &rhs)
4688         {
4689                 Value *value = rhs.loadValue();
4690                 storeValue(value);
4691         }
4692
4693         RValue<UInt2> UInt2::operator=(RValue<UInt2> rhs)
4694         {
4695                 storeValue(rhs.value);
4696
4697                 return rhs;
4698         }
4699
4700         RValue<UInt2> UInt2::operator=(const UInt2 &rhs)
4701         {
4702                 Value *value = rhs.loadValue();
4703                 storeValue(value);
4704
4705                 return RValue<UInt2>(value);
4706         }
4707
4708         RValue<UInt2> UInt2::operator=(const Reference<UInt2> &rhs)
4709         {
4710                 Value *value = rhs.loadValue();
4711                 storeValue(value);
4712
4713                 return RValue<UInt2>(value);
4714         }
4715
4716         RValue<UInt2> operator+(RValue<UInt2> lhs, RValue<UInt2> rhs)
4717         {
4718                 return RValue<UInt2>(Nucleus::createAdd(lhs.value, rhs.value));
4719         }
4720
4721         RValue<UInt2> operator-(RValue<UInt2> lhs, RValue<UInt2> rhs)
4722         {
4723                 return RValue<UInt2>(Nucleus::createSub(lhs.value, rhs.value));
4724         }
4725
4726 //      RValue<UInt2> operator*(RValue<UInt2> lhs, RValue<UInt2> rhs)
4727 //      {
4728 //              return RValue<UInt2>(Nucleus::createMul(lhs.value, rhs.value));
4729 //      }
4730
4731 //      RValue<UInt2> operator/(RValue<UInt2> lhs, RValue<UInt2> rhs)
4732 //      {
4733 //              return RValue<UInt2>(Nucleus::createUDiv(lhs.value, rhs.value));
4734 //      }
4735
4736 //      RValue<UInt2> operator%(RValue<UInt2> lhs, RValue<UInt2> rhs)
4737 //      {
4738 //              return RValue<UInt2>(Nucleus::createURem(lhs.value, rhs.value));
4739 //      }
4740
4741         RValue<UInt2> operator&(RValue<UInt2> lhs, RValue<UInt2> rhs)
4742         {
4743                 return RValue<UInt2>(Nucleus::createAnd(lhs.value, rhs.value));
4744         }
4745
4746         RValue<UInt2> operator|(RValue<UInt2> lhs, RValue<UInt2> rhs)
4747         {
4748                 return RValue<UInt2>(Nucleus::createOr(lhs.value, rhs.value));
4749         }
4750
4751         RValue<UInt2> operator^(RValue<UInt2> lhs, RValue<UInt2> rhs)
4752         {
4753                 return RValue<UInt2>(Nucleus::createXor(lhs.value, rhs.value));
4754         }
4755
4756         RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
4757         {
4758                 return RValue<UInt2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
4759         }
4760
4761         RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
4762         {
4763                 return RValue<UInt2>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
4764         }
4765
4766         RValue<UInt2> operator+=(UInt2 &lhs, RValue<UInt2> rhs)
4767         {
4768                 return lhs = lhs + rhs;
4769         }
4770
4771         RValue<UInt2> operator-=(UInt2 &lhs, RValue<UInt2> rhs)
4772         {
4773                 return lhs = lhs - rhs;
4774         }
4775
4776 //      RValue<UInt2> operator*=(UInt2 &lhs, RValue<UInt2> rhs)
4777 //      {
4778 //              return lhs = lhs * rhs;
4779 //      }
4780
4781 //      RValue<UInt2> operator/=(UInt2 &lhs, RValue<UInt2> rhs)
4782 //      {
4783 //              return lhs = lhs / rhs;
4784 //      }
4785
4786 //      RValue<UInt2> operator%=(UInt2 &lhs, RValue<UInt2> rhs)
4787 //      {
4788 //              return lhs = lhs % rhs;
4789 //      }
4790
4791         RValue<UInt2> operator&=(UInt2 &lhs, RValue<UInt2> rhs)
4792         {
4793                 return lhs = lhs & rhs;
4794         }
4795
4796         RValue<UInt2> operator|=(UInt2 &lhs, RValue<UInt2> rhs)
4797         {
4798                 return lhs = lhs | rhs;
4799         }
4800
4801         RValue<UInt2> operator^=(UInt2 &lhs, RValue<UInt2> rhs)
4802         {
4803                 return lhs = lhs ^ rhs;
4804         }
4805
4806         RValue<UInt2> operator<<=(UInt2 &lhs, unsigned char rhs)
4807         {
4808                 return lhs = lhs << rhs;
4809         }
4810
4811         RValue<UInt2> operator>>=(UInt2 &lhs, unsigned char rhs)
4812         {
4813                 return lhs = lhs >> rhs;
4814         }
4815
4816 //      RValue<UInt2> operator+(RValue<UInt2> val)
4817 //      {
4818 //              return val;
4819 //      }
4820
4821 //      RValue<UInt2> operator-(RValue<UInt2> val)
4822 //      {
4823 //              return RValue<UInt2>(Nucleus::createNeg(val.value));
4824 //      }
4825
4826         RValue<UInt2> operator~(RValue<UInt2> val)
4827         {
4828                 return RValue<UInt2>(Nucleus::createNot(val.value));
4829         }
4830
4831         Type *UInt2::getType()
4832         {
4833                 return T(Type_v2i32);
4834         }
4835
4836         Int4::Int4(RValue<Byte4> cast)
4837         {
4838                 Value *x = Nucleus::createBitCast(cast.value, Int::getType());
4839                 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
4840
4841                 Value *e;
4842                 int swizzle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};
4843                 Value *b = Nucleus::createBitCast(a, Byte16::getType());
4844                 Value *c = Nucleus::createShuffleVector(b, V(Nucleus::createNullValue(Byte16::getType())), swizzle);
4845
4846                 int swizzle2[8] = {0, 8, 1, 9, 2, 10, 3, 11};
4847                 Value *d = Nucleus::createBitCast(c, Short8::getType());
4848                 e = Nucleus::createShuffleVector(d, V(Nucleus::createNullValue(Short8::getType())), swizzle2);
4849
4850                 Value *f = Nucleus::createBitCast(e, Int4::getType());
4851                 storeValue(f);
4852         }
4853
4854         Int4::Int4(RValue<SByte4> cast)
4855         {
4856                 Value *x = Nucleus::createBitCast(cast.value, Int::getType());
4857                 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
4858
4859                 Value *e;
4860                 int swizzle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7};
4861                 Value *b = Nucleus::createBitCast(a, Byte16::getType());
4862                 Value *c = Nucleus::createShuffleVector(b, b, swizzle);
4863
4864                 int swizzle2[8] = {0, 0, 1, 1, 2, 2, 3, 3};
4865                 Value *d = Nucleus::createBitCast(c, Short8::getType());
4866                 e = Nucleus::createShuffleVector(d, d, swizzle2);
4867
4868                 Value *f = Nucleus::createBitCast(e, Int4::getType());
4869                 Value *g = Nucleus::createAShr(f, V(::context->getConstantInt32(24)));
4870                 storeValue(g);
4871         }
4872
4873         Int4::Int4(RValue<Float4> cast)
4874         {
4875                 Value *xyzw = Nucleus::createFPToSI(cast.value, Int4::getType());
4876
4877                 storeValue(xyzw);
4878         }
4879
4880         Int4::Int4(RValue<Short4> cast)
4881         {
4882                 int swizzle[8] = {0, 0, 1, 1, 2, 2, 3, 3};
4883                 Value *c = Nucleus::createShuffleVector(cast.value, cast.value, swizzle);
4884                 Value *d = Nucleus::createBitCast(c, Int4::getType());
4885                 Value *e = Nucleus::createAShr(d, V(::context->getConstantInt32(16)));
4886                 storeValue(e);
4887         }
4888
4889         Int4::Int4(RValue<UShort4> cast)
4890         {
4891                 int swizzle[8] = {0, 8, 1, 9, 2, 10, 3, 11};
4892                 Value *c = Nucleus::createShuffleVector(cast.value, Short8(0, 0, 0, 0, 0, 0, 0, 0).loadValue(), swizzle);
4893                 Value *d = Nucleus::createBitCast(c, Int4::getType());
4894                 storeValue(d);
4895         }
4896
4897         Int4::Int4(int xyzw)
4898         {
4899                 constant(xyzw, xyzw, xyzw, xyzw);
4900         }
4901
4902         Int4::Int4(int x, int yzw)
4903         {
4904                 constant(x, yzw, yzw, yzw);
4905         }
4906
4907         Int4::Int4(int x, int y, int zw)
4908         {
4909                 constant(x, y, zw, zw);
4910         }
4911
4912         Int4::Int4(int x, int y, int z, int w)
4913         {
4914                 constant(x, y, z, w);
4915         }
4916
4917         void Int4::constant(int x, int y, int z, int w)
4918         {
4919                 int64_t constantVector[4] = {x, y, z, w};
4920                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
4921         }
4922
4923         Int4::Int4(RValue<Int4> rhs)
4924         {
4925                 storeValue(rhs.value);
4926         }
4927
4928         Int4::Int4(const Int4 &rhs)
4929         {
4930                 Value *value = rhs.loadValue();
4931                 storeValue(value);
4932         }
4933
4934         Int4::Int4(const Reference<Int4> &rhs)
4935         {
4936                 Value *value = rhs.loadValue();
4937                 storeValue(value);
4938         }
4939
4940         Int4::Int4(RValue<UInt4> rhs)
4941         {
4942                 storeValue(rhs.value);
4943         }
4944
4945         Int4::Int4(const UInt4 &rhs)
4946         {
4947                 Value *value = rhs.loadValue();
4948                 storeValue(value);
4949         }
4950
4951         Int4::Int4(const Reference<UInt4> &rhs)
4952         {
4953                 Value *value = rhs.loadValue();
4954                 storeValue(value);
4955         }
4956
4957         Int4::Int4(RValue<Int2> lo, RValue<Int2> hi)
4958         {
4959                 int shuffle[4] = {0, 1, 4, 5};   // Real type is v4i32
4960                 Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
4961
4962                 storeValue(packed);
4963         }
4964
4965         Int4::Int4(RValue<Int> rhs)
4966         {
4967                 Value *vector = loadValue();
4968                 Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
4969
4970                 int swizzle[4] = {0, 0, 0, 0};
4971                 Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle);
4972
4973                 storeValue(replicate);
4974         }
4975
4976         Int4::Int4(const Int &rhs)
4977         {
4978                 *this = RValue<Int>(rhs.loadValue());
4979         }
4980
4981         Int4::Int4(const Reference<Int> &rhs)
4982         {
4983                 *this = RValue<Int>(rhs.loadValue());
4984         }
4985
4986         RValue<Int4> Int4::operator=(RValue<Int4> rhs)
4987         {
4988                 storeValue(rhs.value);
4989
4990                 return rhs;
4991         }
4992
4993         RValue<Int4> Int4::operator=(const Int4 &rhs)
4994         {
4995                 Value *value = rhs.loadValue();
4996                 storeValue(value);
4997
4998                 return RValue<Int4>(value);
4999         }
5000
5001         RValue<Int4> Int4::operator=(const Reference<Int4> &rhs)
5002         {
5003                 Value *value = rhs.loadValue();
5004                 storeValue(value);
5005
5006                 return RValue<Int4>(value);
5007         }
5008
5009         RValue<Int4> operator+(RValue<Int4> lhs, RValue<Int4> rhs)
5010         {
5011                 return RValue<Int4>(Nucleus::createAdd(lhs.value, rhs.value));
5012         }
5013
5014         RValue<Int4> operator-(RValue<Int4> lhs, RValue<Int4> rhs)
5015         {
5016                 return RValue<Int4>(Nucleus::createSub(lhs.value, rhs.value));
5017         }
5018
5019         RValue<Int4> operator*(RValue<Int4> lhs, RValue<Int4> rhs)
5020         {
5021                 return RValue<Int4>(Nucleus::createMul(lhs.value, rhs.value));
5022         }
5023
5024         RValue<Int4> operator/(RValue<Int4> lhs, RValue<Int4> rhs)
5025         {
5026                 return RValue<Int4>(Nucleus::createSDiv(lhs.value, rhs.value));
5027         }
5028
5029         RValue<Int4> operator%(RValue<Int4> lhs, RValue<Int4> rhs)
5030         {
5031                 return RValue<Int4>(Nucleus::createSRem(lhs.value, rhs.value));
5032         }
5033
5034         RValue<Int4> operator&(RValue<Int4> lhs, RValue<Int4> rhs)
5035         {
5036                 return RValue<Int4>(Nucleus::createAnd(lhs.value, rhs.value));
5037         }
5038
5039         RValue<Int4> operator|(RValue<Int4> lhs, RValue<Int4> rhs)
5040         {
5041                 return RValue<Int4>(Nucleus::createOr(lhs.value, rhs.value));
5042         }
5043
5044         RValue<Int4> operator^(RValue<Int4> lhs, RValue<Int4> rhs)
5045         {
5046                 return RValue<Int4>(Nucleus::createXor(lhs.value, rhs.value));
5047         }
5048
5049         RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
5050         {
5051                 return RValue<Int4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
5052         }
5053
5054         RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
5055         {
5056                 return RValue<Int4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
5057         }
5058
5059         RValue<Int4> operator<<(RValue<Int4> lhs, RValue<Int4> rhs)
5060         {
5061                 return RValue<Int4>(Nucleus::createShl(lhs.value, rhs.value));
5062         }
5063
5064         RValue<Int4> operator>>(RValue<Int4> lhs, RValue<Int4> rhs)
5065         {
5066                 return RValue<Int4>(Nucleus::createAShr(lhs.value, rhs.value));
5067         }
5068
5069         RValue<Int4> operator+=(Int4 &lhs, RValue<Int4> rhs)
5070         {
5071                 return lhs = lhs + rhs;
5072         }
5073
5074         RValue<Int4> operator-=(Int4 &lhs, RValue<Int4> rhs)
5075         {
5076                 return lhs = lhs - rhs;
5077         }
5078
5079         RValue<Int4> operator*=(Int4 &lhs, RValue<Int4> rhs)
5080         {
5081                 return lhs = lhs * rhs;
5082         }
5083
5084 //      RValue<Int4> operator/=(Int4 &lhs, RValue<Int4> rhs)
5085 //      {
5086 //              return lhs = lhs / rhs;
5087 //      }
5088
5089 //      RValue<Int4> operator%=(Int4 &lhs, RValue<Int4> rhs)
5090 //      {
5091 //              return lhs = lhs % rhs;
5092 //      }
5093
5094         RValue<Int4> operator&=(Int4 &lhs, RValue<Int4> rhs)
5095         {
5096                 return lhs = lhs & rhs;
5097         }
5098
5099         RValue<Int4> operator|=(Int4 &lhs, RValue<Int4> rhs)
5100         {
5101                 return lhs = lhs | rhs;
5102         }
5103
5104         RValue<Int4> operator^=(Int4 &lhs, RValue<Int4> rhs)
5105         {
5106                 return lhs = lhs ^ rhs;
5107         }
5108
5109         RValue<Int4> operator<<=(Int4 &lhs, unsigned char rhs)
5110         {
5111                 return lhs = lhs << rhs;
5112         }
5113
5114         RValue<Int4> operator>>=(Int4 &lhs, unsigned char rhs)
5115         {
5116                 return lhs = lhs >> rhs;
5117         }
5118
5119         RValue<Int4> operator+(RValue<Int4> val)
5120         {
5121                 return val;
5122         }
5123
5124         RValue<Int4> operator-(RValue<Int4> val)
5125         {
5126                 return RValue<Int4>(Nucleus::createNeg(val.value));
5127         }
5128
5129         RValue<Int4> operator~(RValue<Int4> val)
5130         {
5131                 return RValue<Int4>(Nucleus::createNot(val.value));
5132         }
5133
5134         RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
5135         {
5136                 return RValue<Int4>(Nucleus::createICmpEQ(x.value, y.value));
5137         }
5138
5139         RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
5140         {
5141                 return RValue<Int4>(Nucleus::createICmpSLT(x.value, y.value));
5142         }
5143
5144         RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
5145         {
5146                 return RValue<Int4>(Nucleus::createICmpSLE(x.value, y.value));
5147         }
5148
5149         RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
5150         {
5151                 return RValue<Int4>(Nucleus::createICmpNE(x.value, y.value));
5152         }
5153
5154         RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
5155         {
5156                 return RValue<Int4>(Nucleus::createICmpSGE(x.value, y.value));
5157         }
5158
5159         RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
5160         {
5161                 return RValue<Int4>(Nucleus::createICmpSGT(x.value, y.value));
5162         }
5163
5164         RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
5165         {
5166                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
5167                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value, y.value);
5168                 ::basicBlock->appendInst(cmp);
5169
5170                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
5171                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
5172                 ::basicBlock->appendInst(select);
5173
5174                 return RValue<Int4>(V(result));
5175         }
5176
5177         RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
5178         {
5179                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
5180                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value, y.value);
5181                 ::basicBlock->appendInst(cmp);
5182
5183                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
5184                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
5185                 ::basicBlock->appendInst(select);
5186
5187                 return RValue<Int4>(V(result));
5188         }
5189
5190         RValue<Int4> RoundInt(RValue<Float4> cast)
5191         {
5192                 RValue<Float4> rounded = Round(cast);
5193
5194                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
5195                 auto round = Ice::InstCast::create(::function, Ice::InstCast::Fptosi, result, rounded.value);
5196                 ::basicBlock->appendInst(round);
5197
5198                 return RValue<Int4>(V(result));
5199         }
5200
5201         RValue<Short8> Pack(RValue<Int4> x, RValue<Int4> y)
5202         {
5203                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
5204                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
5205                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
5206                 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
5207                 pack->addArg(x.value);
5208                 pack->addArg(y.value);
5209                 ::basicBlock->appendInst(pack);
5210
5211                 return RValue<Short8>(V(result));
5212         }
5213
5214         RValue<Int> Extract(RValue<Int4> x, int i)
5215         {
5216                 return RValue<Int>(Nucleus::createExtractElement(x.value, Int::getType(), i));
5217         }
5218
5219         RValue<Int4> Insert(RValue<Int4> x, RValue<Int> element, int i)
5220         {
5221                 return RValue<Int4>(Nucleus::createInsertElement(x.value, element.value, i));
5222         }
5223
5224         RValue<Int> SignMask(RValue<Int4> x)
5225         {
5226                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
5227                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
5228                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
5229                 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
5230                 movmsk->addArg(x.value);
5231                 ::basicBlock->appendInst(movmsk);
5232
5233                 return RValue<Int>(V(result));
5234         }
5235
5236         RValue<Int4> Swizzle(RValue<Int4> x, unsigned char select)
5237         {
5238                 return RValue<Int4>(createSwizzle4(x.value, select));
5239         }
5240
5241         Type *Int4::getType()
5242         {
5243                 return T(Ice::IceType_v4i32);
5244         }
5245
5246         UInt4::UInt4(RValue<Float4> cast)
5247         {
5248                 // Smallest positive value representable in UInt, but not in Int
5249                 const unsigned int ustart = 0x80000000u;
5250                 const float ustartf = float(ustart);
5251
5252                 // Check if the value can be represented as an Int
5253                 Int4 uiValue = CmpNLT(cast, Float4(ustartf));
5254                 // If the value is too large, subtract ustart and re-add it after conversion.
5255                 uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) |
5256                 // Otherwise, just convert normally
5257                           (~uiValue & Int4(cast));
5258                 // If the value is negative, store 0, otherwise store the result of the conversion
5259                 storeValue((~(As<Int4>(cast) >> 31) & uiValue).value);
5260         }
5261
5262         UInt4::UInt4(int xyzw)
5263         {
5264                 constant(xyzw, xyzw, xyzw, xyzw);
5265         }
5266
5267         UInt4::UInt4(int x, int yzw)
5268         {
5269                 constant(x, yzw, yzw, yzw);
5270         }
5271
5272         UInt4::UInt4(int x, int y, int zw)
5273         {
5274                 constant(x, y, zw, zw);
5275         }
5276
5277         UInt4::UInt4(int x, int y, int z, int w)
5278         {
5279                 constant(x, y, z, w);
5280         }
5281
5282         void UInt4::constant(int x, int y, int z, int w)
5283         {
5284                 int64_t constantVector[4] = {x, y, z, w};
5285                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
5286         }
5287
5288         UInt4::UInt4(RValue<UInt4> rhs)
5289         {
5290                 storeValue(rhs.value);
5291         }
5292
5293         UInt4::UInt4(const UInt4 &rhs)
5294         {
5295                 Value *value = rhs.loadValue();
5296                 storeValue(value);
5297         }
5298
5299         UInt4::UInt4(const Reference<UInt4> &rhs)
5300         {
5301                 Value *value = rhs.loadValue();
5302                 storeValue(value);
5303         }
5304
5305         UInt4::UInt4(RValue<Int4> rhs)
5306         {
5307                 storeValue(rhs.value);
5308         }
5309
5310         UInt4::UInt4(const Int4 &rhs)
5311         {
5312                 Value *value = rhs.loadValue();
5313                 storeValue(value);
5314         }
5315
5316         UInt4::UInt4(const Reference<Int4> &rhs)
5317         {
5318                 Value *value = rhs.loadValue();
5319                 storeValue(value);
5320         }
5321
5322         UInt4::UInt4(RValue<UInt2> lo, RValue<UInt2> hi)
5323         {
5324                 int shuffle[4] = {0, 1, 4, 5};   // Real type is v4i32
5325                 Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
5326
5327                 storeValue(packed);
5328         }
5329
5330         RValue<UInt4> UInt4::operator=(RValue<UInt4> rhs)
5331         {
5332                 storeValue(rhs.value);
5333
5334                 return rhs;
5335         }
5336
5337         RValue<UInt4> UInt4::operator=(const UInt4 &rhs)
5338         {
5339                 Value *value = rhs.loadValue();
5340                 storeValue(value);
5341
5342                 return RValue<UInt4>(value);
5343         }
5344
5345         RValue<UInt4> UInt4::operator=(const Reference<UInt4> &rhs)
5346         {
5347                 Value *value = rhs.loadValue();
5348                 storeValue(value);
5349
5350                 return RValue<UInt4>(value);
5351         }
5352
5353         RValue<UInt4> operator+(RValue<UInt4> lhs, RValue<UInt4> rhs)
5354         {
5355                 return RValue<UInt4>(Nucleus::createAdd(lhs.value, rhs.value));
5356         }
5357
5358         RValue<UInt4> operator-(RValue<UInt4> lhs, RValue<UInt4> rhs)
5359         {
5360                 return RValue<UInt4>(Nucleus::createSub(lhs.value, rhs.value));
5361         }
5362
5363         RValue<UInt4> operator*(RValue<UInt4> lhs, RValue<UInt4> rhs)
5364         {
5365                 return RValue<UInt4>(Nucleus::createMul(lhs.value, rhs.value));
5366         }
5367
5368         RValue<UInt4> operator/(RValue<UInt4> lhs, RValue<UInt4> rhs)
5369         {
5370                 return RValue<UInt4>(Nucleus::createUDiv(lhs.value, rhs.value));
5371         }
5372
5373         RValue<UInt4> operator%(RValue<UInt4> lhs, RValue<UInt4> rhs)
5374         {
5375                 return RValue<UInt4>(Nucleus::createURem(lhs.value, rhs.value));
5376         }
5377
5378         RValue<UInt4> operator&(RValue<UInt4> lhs, RValue<UInt4> rhs)
5379         {
5380                 return RValue<UInt4>(Nucleus::createAnd(lhs.value, rhs.value));
5381         }
5382
5383         RValue<UInt4> operator|(RValue<UInt4> lhs, RValue<UInt4> rhs)
5384         {
5385                 return RValue<UInt4>(Nucleus::createOr(lhs.value, rhs.value));
5386         }
5387
5388         RValue<UInt4> operator^(RValue<UInt4> lhs, RValue<UInt4> rhs)
5389         {
5390                 return RValue<UInt4>(Nucleus::createXor(lhs.value, rhs.value));
5391         }
5392
5393         RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
5394         {
5395                 return RValue<UInt4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
5396         }
5397
5398         RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
5399         {
5400                 return RValue<UInt4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
5401         }
5402
5403         RValue<UInt4> operator<<(RValue<UInt4> lhs, RValue<UInt4> rhs)
5404         {
5405                 return RValue<UInt4>(Nucleus::createShl(lhs.value, rhs.value));
5406         }
5407
5408         RValue<UInt4> operator>>(RValue<UInt4> lhs, RValue<UInt4> rhs)
5409         {
5410                 return RValue<UInt4>(Nucleus::createLShr(lhs.value, rhs.value));
5411         }
5412
5413         RValue<UInt4> operator+=(UInt4 &lhs, RValue<UInt4> rhs)
5414         {
5415                 return lhs = lhs + rhs;
5416         }
5417
5418         RValue<UInt4> operator-=(UInt4 &lhs, RValue<UInt4> rhs)
5419         {
5420                 return lhs = lhs - rhs;
5421         }
5422
5423         RValue<UInt4> operator*=(UInt4 &lhs, RValue<UInt4> rhs)
5424         {
5425                 return lhs = lhs * rhs;
5426         }
5427
5428 //      RValue<UInt4> operator/=(UInt4 &lhs, RValue<UInt4> rhs)
5429 //      {
5430 //              return lhs = lhs / rhs;
5431 //      }
5432
5433 //      RValue<UInt4> operator%=(UInt4 &lhs, RValue<UInt4> rhs)
5434 //      {
5435 //              return lhs = lhs % rhs;
5436 //      }
5437
5438         RValue<UInt4> operator&=(UInt4 &lhs, RValue<UInt4> rhs)
5439         {
5440                 return lhs = lhs & rhs;
5441         }
5442
5443         RValue<UInt4> operator|=(UInt4 &lhs, RValue<UInt4> rhs)
5444         {
5445                 return lhs = lhs | rhs;
5446         }
5447
5448         RValue<UInt4> operator^=(UInt4 &lhs, RValue<UInt4> rhs)
5449         {
5450                 return lhs = lhs ^ rhs;
5451         }
5452
5453         RValue<UInt4> operator<<=(UInt4 &lhs, unsigned char rhs)
5454         {
5455                 return lhs = lhs << rhs;
5456         }
5457
5458         RValue<UInt4> operator>>=(UInt4 &lhs, unsigned char rhs)
5459         {
5460                 return lhs = lhs >> rhs;
5461         }
5462
5463         RValue<UInt4> operator+(RValue<UInt4> val)
5464         {
5465                 return val;
5466         }
5467
5468         RValue<UInt4> operator-(RValue<UInt4> val)
5469         {
5470                 return RValue<UInt4>(Nucleus::createNeg(val.value));
5471         }
5472
5473         RValue<UInt4> operator~(RValue<UInt4> val)
5474         {
5475                 return RValue<UInt4>(Nucleus::createNot(val.value));
5476         }
5477
5478         RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
5479         {
5480                 return RValue<UInt4>(Nucleus::createICmpEQ(x.value, y.value));
5481         }
5482
5483         RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
5484         {
5485                 return RValue<UInt4>(Nucleus::createICmpULT(x.value, y.value));
5486         }
5487
5488         RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
5489         {
5490                 return RValue<UInt4>(Nucleus::createICmpULE(x.value, y.value));
5491         }
5492
5493         RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
5494         {
5495                 return RValue<UInt4>(Nucleus::createICmpNE(x.value, y.value));
5496         }
5497
5498         RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
5499         {
5500                 return RValue<UInt4>(Nucleus::createICmpUGE(x.value, y.value));
5501         }
5502
5503         RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
5504         {
5505                 return RValue<UInt4>(Nucleus::createICmpUGT(x.value, y.value));
5506         }
5507
5508         RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
5509         {
5510                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
5511                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value, y.value);
5512                 ::basicBlock->appendInst(cmp);
5513
5514                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
5515                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
5516                 ::basicBlock->appendInst(select);
5517
5518                 return RValue<UInt4>(V(result));
5519         }
5520
5521         RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
5522         {
5523                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
5524                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value, y.value);
5525                 ::basicBlock->appendInst(cmp);
5526
5527                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
5528                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
5529                 ::basicBlock->appendInst(select);
5530
5531                 return RValue<UInt4>(V(result));
5532         }
5533
5534         RValue<UShort8> Pack(RValue<UInt4> x, RValue<UInt4> y)
5535         {
5536                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
5537                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
5538                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
5539                 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
5540                 pack->addArg(x.value);
5541                 pack->addArg(y.value);
5542                 ::basicBlock->appendInst(pack);
5543
5544                 return RValue<UShort8>(V(result));
5545         }
5546
5547         Type *UInt4::getType()
5548         {
5549                 return T(Ice::IceType_v4i32);
5550         }
5551
5552         Float::Float(RValue<Int> cast)
5553         {
5554                 Value *integer = Nucleus::createSIToFP(cast.value, Float::getType());
5555
5556                 storeValue(integer);
5557         }
5558
5559         Float::Float(float x)
5560         {
5561                 storeValue(Nucleus::createConstantFloat(x));
5562         }
5563
5564         Float::Float(RValue<Float> rhs)
5565         {
5566                 storeValue(rhs.value);
5567         }
5568
5569         Float::Float(const Float &rhs)
5570         {
5571                 Value *value = rhs.loadValue();
5572                 storeValue(value);
5573         }
5574
5575         Float::Float(const Reference<Float> &rhs)
5576         {
5577                 Value *value = rhs.loadValue();
5578                 storeValue(value);
5579         }
5580
5581         RValue<Float> Float::operator=(RValue<Float> rhs)
5582         {
5583                 storeValue(rhs.value);
5584
5585                 return rhs;
5586         }
5587
5588         RValue<Float> Float::operator=(const Float &rhs)
5589         {
5590                 Value *value = rhs.loadValue();
5591                 storeValue(value);
5592
5593                 return RValue<Float>(value);
5594         }
5595
5596         RValue<Float> Float::operator=(const Reference<Float> &rhs)
5597         {
5598                 Value *value = rhs.loadValue();
5599                 storeValue(value);
5600
5601                 return RValue<Float>(value);
5602         }
5603
5604         RValue<Float> operator+(RValue<Float> lhs, RValue<Float> rhs)
5605         {
5606                 return RValue<Float>(Nucleus::createFAdd(lhs.value, rhs.value));
5607         }
5608
5609         RValue<Float> operator-(RValue<Float> lhs, RValue<Float> rhs)
5610         {
5611                 return RValue<Float>(Nucleus::createFSub(lhs.value, rhs.value));
5612         }
5613
5614         RValue<Float> operator*(RValue<Float> lhs, RValue<Float> rhs)
5615         {
5616                 return RValue<Float>(Nucleus::createFMul(lhs.value, rhs.value));
5617         }
5618
5619         RValue<Float> operator/(RValue<Float> lhs, RValue<Float> rhs)
5620         {
5621                 return RValue<Float>(Nucleus::createFDiv(lhs.value, rhs.value));
5622         }
5623
5624         RValue<Float> operator+=(Float &lhs, RValue<Float> rhs)
5625         {
5626                 return lhs = lhs + rhs;
5627         }
5628
5629         RValue<Float> operator-=(Float &lhs, RValue<Float> rhs)
5630         {
5631                 return lhs = lhs - rhs;
5632         }
5633
5634         RValue<Float> operator*=(Float &lhs, RValue<Float> rhs)
5635         {
5636                 return lhs = lhs * rhs;
5637         }
5638
5639         RValue<Float> operator/=(Float &lhs, RValue<Float> rhs)
5640         {
5641                 return lhs = lhs / rhs;
5642         }
5643
5644         RValue<Float> operator+(RValue<Float> val)
5645         {
5646                 return val;
5647         }
5648
5649         RValue<Float> operator-(RValue<Float> val)
5650         {
5651                 return RValue<Float>(Nucleus::createFNeg(val.value));
5652         }
5653
5654         RValue<Bool> operator<(RValue<Float> lhs, RValue<Float> rhs)
5655         {
5656                 return RValue<Bool>(Nucleus::createFCmpOLT(lhs.value, rhs.value));
5657         }
5658
5659         RValue<Bool> operator<=(RValue<Float> lhs, RValue<Float> rhs)
5660         {
5661                 return RValue<Bool>(Nucleus::createFCmpOLE(lhs.value, rhs.value));
5662         }
5663
5664         RValue<Bool> operator>(RValue<Float> lhs, RValue<Float> rhs)
5665         {
5666                 return RValue<Bool>(Nucleus::createFCmpOGT(lhs.value, rhs.value));
5667         }
5668
5669         RValue<Bool> operator>=(RValue<Float> lhs, RValue<Float> rhs)
5670         {
5671                 return RValue<Bool>(Nucleus::createFCmpOGE(lhs.value, rhs.value));
5672         }
5673
5674         RValue<Bool> operator!=(RValue<Float> lhs, RValue<Float> rhs)
5675         {
5676                 return RValue<Bool>(Nucleus::createFCmpONE(lhs.value, rhs.value));
5677         }
5678
5679         RValue<Bool> operator==(RValue<Float> lhs, RValue<Float> rhs)
5680         {
5681                 return RValue<Bool>(Nucleus::createFCmpOEQ(lhs.value, rhs.value));
5682         }
5683
5684         RValue<Float> Abs(RValue<Float> x)
5685         {
5686                 return IfThenElse(x > 0.0f, x, -x);
5687         }
5688
5689         RValue<Float> Max(RValue<Float> x, RValue<Float> y)
5690         {
5691                 return IfThenElse(x > y, x, y);
5692         }
5693
5694         RValue<Float> Min(RValue<Float> x, RValue<Float> y)
5695         {
5696                 return IfThenElse(x < y, x, y);
5697         }
5698
5699         RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
5700         {
5701                 return 1.0f / x;
5702         }
5703
5704         RValue<Float> RcpSqrt_pp(RValue<Float> x)
5705         {
5706                 return Rcp_pp(Sqrt(x));
5707         }
5708
5709         RValue<Float> Sqrt(RValue<Float> x)
5710         {
5711                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_f32);
5712                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
5713                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
5714                 auto sqrt = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
5715                 sqrt->addArg(x.value);
5716                 ::basicBlock->appendInst(sqrt);
5717
5718                 return RValue<Float>(V(result));
5719         }
5720
5721         RValue<Float> Round(RValue<Float> x)
5722         {
5723                 return Float4(Round(Float4(x))).x;
5724         }
5725
5726         RValue<Float> Trunc(RValue<Float> x)
5727         {
5728                 return Float4(Trunc(Float4(x))).x;
5729         }
5730
5731         RValue<Float> Frac(RValue<Float> x)
5732         {
5733                 return Float4(Frac(Float4(x))).x;
5734         }
5735
5736         RValue<Float> Floor(RValue<Float> x)
5737         {
5738                 return Float4(Floor(Float4(x))).x;
5739         }
5740
5741         RValue<Float> Ceil(RValue<Float> x)
5742         {
5743                 return Float4(Ceil(Float4(x))).x;
5744         }
5745
5746         Type *Float::getType()
5747         {
5748                 return T(Ice::IceType_f32);
5749         }
5750
5751         Float2::Float2(RValue<Float4> cast)
5752         {
5753                 storeValue(Nucleus::createBitCast(cast.value, getType()));
5754         }
5755
5756         Type *Float2::getType()
5757         {
5758                 return T(Type_v2f32);
5759         }
5760
5761         Float4::Float4(RValue<Byte4> cast) : FloatXYZW(this)
5762         {
5763                 Value *a = Int4(cast).loadValue();
5764                 Value *xyzw = Nucleus::createSIToFP(a, Float4::getType());
5765
5766                 storeValue(xyzw);
5767         }
5768
5769         Float4::Float4(RValue<SByte4> cast) : FloatXYZW(this)
5770         {
5771                 Value *a = Int4(cast).loadValue();
5772                 Value *xyzw = Nucleus::createSIToFP(a, Float4::getType());
5773
5774                 storeValue(xyzw);
5775         }
5776
5777         Float4::Float4(RValue<Short4> cast) : FloatXYZW(this)
5778         {
5779                 Int4 c(cast);
5780                 storeValue(Nucleus::createSIToFP(RValue<Int4>(c).value, Float4::getType()));
5781         }
5782
5783         Float4::Float4(RValue<UShort4> cast) : FloatXYZW(this)
5784         {
5785                 Int4 c(cast);
5786                 storeValue(Nucleus::createSIToFP(RValue<Int4>(c).value, Float4::getType()));
5787         }
5788
5789         Float4::Float4(RValue<Int4> cast) : FloatXYZW(this)
5790         {
5791                 Value *xyzw = Nucleus::createSIToFP(cast.value, Float4::getType());
5792
5793                 storeValue(xyzw);
5794         }
5795
5796         Float4::Float4(RValue<UInt4> cast) : FloatXYZW(this)
5797         {
5798                 RValue<Float4> result = Float4(Int4(cast & UInt4(0x7FFFFFFF))) +
5799                                         As<Float4>((As<Int4>(cast) >> 31) & As<Int4>(Float4(0x80000000u)));
5800
5801                 storeValue(result.value);
5802         }
5803
5804         Float4::Float4() : FloatXYZW(this)
5805         {
5806         }
5807
5808         Float4::Float4(float xyzw) : FloatXYZW(this)
5809         {
5810                 constant(xyzw, xyzw, xyzw, xyzw);
5811         }
5812
5813         Float4::Float4(float x, float yzw) : FloatXYZW(this)
5814         {
5815                 constant(x, yzw, yzw, yzw);
5816         }
5817
5818         Float4::Float4(float x, float y, float zw) : FloatXYZW(this)
5819         {
5820                 constant(x, y, zw, zw);
5821         }
5822
5823         Float4::Float4(float x, float y, float z, float w) : FloatXYZW(this)
5824         {
5825                 constant(x, y, z, w);
5826         }
5827
5828         void Float4::constant(float x, float y, float z, float w)
5829         {
5830                 double constantVector[4] = {x, y, z, w};
5831                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
5832         }
5833
5834         Float4::Float4(RValue<Float4> rhs) : FloatXYZW(this)
5835         {
5836                 storeValue(rhs.value);
5837         }
5838
5839         Float4::Float4(const Float4 &rhs) : FloatXYZW(this)
5840         {
5841                 Value *value = rhs.loadValue();
5842                 storeValue(value);
5843         }
5844
5845         Float4::Float4(const Reference<Float4> &rhs) : FloatXYZW(this)
5846         {
5847                 Value *value = rhs.loadValue();
5848                 storeValue(value);
5849         }
5850
5851         Float4::Float4(RValue<Float> rhs) : FloatXYZW(this)
5852         {
5853                 Value *vector = loadValue();
5854                 Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
5855
5856                 int swizzle[4] = {0, 0, 0, 0};
5857                 Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle);
5858
5859                 storeValue(replicate);
5860         }
5861
5862         Float4::Float4(const Float &rhs) : FloatXYZW(this)
5863         {
5864                 *this = RValue<Float>(rhs.loadValue());
5865         }
5866
5867         Float4::Float4(const Reference<Float> &rhs) : FloatXYZW(this)
5868         {
5869                 *this = RValue<Float>(rhs.loadValue());
5870         }
5871
5872         RValue<Float4> Float4::operator=(float x)
5873         {
5874                 return *this = Float4(x, x, x, x);
5875         }
5876
5877         RValue<Float4> Float4::operator=(RValue<Float4> rhs)
5878         {
5879                 storeValue(rhs.value);
5880
5881                 return rhs;
5882         }
5883
5884         RValue<Float4> Float4::operator=(const Float4 &rhs)
5885         {
5886                 Value *value = rhs.loadValue();
5887                 storeValue(value);
5888
5889                 return RValue<Float4>(value);
5890         }
5891
5892         RValue<Float4> Float4::operator=(const Reference<Float4> &rhs)
5893         {
5894                 Value *value = rhs.loadValue();
5895                 storeValue(value);
5896
5897                 return RValue<Float4>(value);
5898         }
5899
5900         RValue<Float4> Float4::operator=(RValue<Float> rhs)
5901         {
5902                 return *this = Float4(rhs);
5903         }
5904
5905         RValue<Float4> Float4::operator=(const Float &rhs)
5906         {
5907                 return *this = Float4(rhs);
5908         }
5909
5910         RValue<Float4> Float4::operator=(const Reference<Float> &rhs)
5911         {
5912                 return *this = Float4(rhs);
5913         }
5914
5915         RValue<Float4> operator+(RValue<Float4> lhs, RValue<Float4> rhs)
5916         {
5917                 return RValue<Float4>(Nucleus::createFAdd(lhs.value, rhs.value));
5918         }
5919
5920         RValue<Float4> operator-(RValue<Float4> lhs, RValue<Float4> rhs)
5921         {
5922                 return RValue<Float4>(Nucleus::createFSub(lhs.value, rhs.value));
5923         }
5924
5925         RValue<Float4> operator*(RValue<Float4> lhs, RValue<Float4> rhs)
5926         {
5927                 return RValue<Float4>(Nucleus::createFMul(lhs.value, rhs.value));
5928         }
5929
5930         RValue<Float4> operator/(RValue<Float4> lhs, RValue<Float4> rhs)
5931         {
5932                 return RValue<Float4>(Nucleus::createFDiv(lhs.value, rhs.value));
5933         }
5934
5935         RValue<Float4> operator%(RValue<Float4> lhs, RValue<Float4> rhs)
5936         {
5937                 return RValue<Float4>(Nucleus::createFRem(lhs.value, rhs.value));
5938         }
5939
5940         RValue<Float4> operator+=(Float4 &lhs, RValue<Float4> rhs)
5941         {
5942                 return lhs = lhs + rhs;
5943         }
5944
5945         RValue<Float4> operator-=(Float4 &lhs, RValue<Float4> rhs)
5946         {
5947                 return lhs = lhs - rhs;
5948         }
5949
5950         RValue<Float4> operator*=(Float4 &lhs, RValue<Float4> rhs)
5951         {
5952                 return lhs = lhs * rhs;
5953         }
5954
5955         RValue<Float4> operator/=(Float4 &lhs, RValue<Float4> rhs)
5956         {
5957                 return lhs = lhs / rhs;
5958         }
5959
5960         RValue<Float4> operator%=(Float4 &lhs, RValue<Float4> rhs)
5961         {
5962                 return lhs = lhs % rhs;
5963         }
5964
5965         RValue<Float4> operator+(RValue<Float4> val)
5966         {
5967                 return val;
5968         }
5969
5970         RValue<Float4> operator-(RValue<Float4> val)
5971         {
5972                 return RValue<Float4>(Nucleus::createFNeg(val.value));
5973         }
5974
5975         RValue<Float4> Abs(RValue<Float4> x)
5976         {
5977                 Value *vector = Nucleus::createBitCast(x.value, Int4::getType());
5978                 int64_t constantVector[4] = {0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF};
5979                 Value *result = Nucleus::createAnd(vector, V(Nucleus::createConstantVector(constantVector, Int4::getType())));
5980
5981                 return As<Float4>(result);
5982         }
5983
5984         RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
5985         {
5986                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
5987                 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Ule, condition, x.value, y.value);
5988                 ::basicBlock->appendInst(cmp);
5989
5990                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
5991                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
5992                 ::basicBlock->appendInst(select);
5993
5994                 return RValue<Float4>(V(result));
5995         }
5996
5997         RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
5998         {
5999                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
6000                 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Ugt, condition, x.value, y.value);
6001                 ::basicBlock->appendInst(cmp);
6002
6003                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
6004                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
6005                 ::basicBlock->appendInst(select);
6006
6007                 return RValue<Float4>(V(result));
6008         }
6009
6010         RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
6011         {
6012                 return Float4(1.0f) / x;
6013         }
6014
6015         RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
6016         {
6017                 return Rcp_pp(Sqrt(x));
6018         }
6019
6020         RValue<Float4> Sqrt(RValue<Float4> x)
6021         {
6022                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
6023                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
6024                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
6025                 auto sqrt = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
6026                 sqrt->addArg(x.value);
6027                 ::basicBlock->appendInst(sqrt);
6028
6029                 return RValue<Float4>(V(result));
6030         }
6031
6032         RValue<Float4> Insert(RValue<Float4> x, RValue<Float> element, int i)
6033         {
6034                 return RValue<Float4>(Nucleus::createInsertElement(x.value, element.value, i));
6035         }
6036
6037         RValue<Float> Extract(RValue<Float4> x, int i)
6038         {
6039                 return RValue<Float>(Nucleus::createExtractElement(x.value, Float::getType(), i));
6040         }
6041
6042         RValue<Float4> Swizzle(RValue<Float4> x, unsigned char select)
6043         {
6044                 return RValue<Float4>(createSwizzle4(x.value, select));
6045         }
6046
6047         RValue<Float4> ShuffleLowHigh(RValue<Float4> x, RValue<Float4> y, unsigned char imm)
6048         {
6049                 int shuffle[4] =
6050                 {
6051                         ((imm >> 0) & 0x03) + 0,
6052                         ((imm >> 2) & 0x03) + 0,
6053                         ((imm >> 4) & 0x03) + 4,
6054                         ((imm >> 6) & 0x03) + 4,
6055                 };
6056
6057                 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
6058         }
6059
6060         RValue<Float4> UnpackLow(RValue<Float4> x, RValue<Float4> y)
6061         {
6062                 int shuffle[4] = {0, 4, 1, 5};
6063                 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
6064         }
6065
6066         RValue<Float4> UnpackHigh(RValue<Float4> x, RValue<Float4> y)
6067         {
6068                 int shuffle[4] = {2, 6, 3, 7};
6069                 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
6070         }
6071
6072         RValue<Float4> Mask(Float4 &lhs, RValue<Float4> rhs, unsigned char select)
6073         {
6074                 Value *vector = lhs.loadValue();
6075                 Value *result = createMask4(vector, rhs.value, select);
6076                 lhs.storeValue(result);
6077
6078                 return RValue<Float4>(result);
6079         }
6080
6081         RValue<Int> SignMask(RValue<Float4> x)
6082         {
6083                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
6084                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
6085                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
6086                 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
6087                 movmsk->addArg(x.value);
6088                 ::basicBlock->appendInst(movmsk);
6089
6090                 return RValue<Int>(V(result));
6091         }
6092
6093         RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
6094         {
6095                 return RValue<Int4>(Nucleus::createFCmpOEQ(x.value, y.value));
6096         }
6097
6098         RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
6099         {
6100                 return RValue<Int4>(Nucleus::createFCmpOLT(x.value, y.value));
6101         }
6102
6103         RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
6104         {
6105                 return RValue<Int4>(Nucleus::createFCmpOLE(x.value, y.value));
6106         }
6107
6108         RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
6109         {
6110                 return RValue<Int4>(Nucleus::createFCmpONE(x.value, y.value));
6111         }
6112
6113         RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
6114         {
6115                 return RValue<Int4>(Nucleus::createFCmpOGE(x.value, y.value));
6116         }
6117
6118         RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
6119         {
6120                 return RValue<Int4>(Nucleus::createFCmpOGT(x.value, y.value));
6121         }
6122
6123         RValue<Float4> Round(RValue<Float4> x)
6124         {
6125                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
6126                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
6127                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
6128                 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
6129                 round->addArg(x.value);
6130                 round->addArg(::context->getConstantInt32(0));
6131                 ::basicBlock->appendInst(round);
6132
6133                 return RValue<Float4>(V(result));
6134         }
6135
6136         RValue<Float4> Trunc(RValue<Float4> x)
6137         {
6138                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
6139                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
6140                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
6141                 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
6142                 round->addArg(x.value);
6143                 round->addArg(::context->getConstantInt32(3));
6144                 ::basicBlock->appendInst(round);
6145
6146                 return RValue<Float4>(V(result));
6147         }
6148
6149         RValue<Float4> Frac(RValue<Float4> x)
6150         {
6151                 return x - Floor(x);
6152         }
6153
6154         RValue<Float4> Floor(RValue<Float4> x)
6155         {
6156                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
6157                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
6158                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
6159                 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
6160                 round->addArg(x.value);
6161                 round->addArg(::context->getConstantInt32(1));
6162                 ::basicBlock->appendInst(round);
6163
6164                 return RValue<Float4>(V(result));
6165         }
6166
6167         RValue<Float4> Ceil(RValue<Float4> x)
6168         {
6169                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
6170                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
6171                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
6172                 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
6173                 round->addArg(x.value);
6174                 round->addArg(::context->getConstantInt32(2));
6175                 ::basicBlock->appendInst(round);
6176
6177                 return RValue<Float4>(V(result));
6178         }
6179
6180         Type *Float4::getType()
6181         {
6182                 return T(Ice::IceType_v4f32);
6183         }
6184
6185         RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, int offset)
6186         {
6187                 return lhs + RValue<Int>(Nucleus::createConstantInt(offset));
6188         }
6189
6190         RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<Int> offset)
6191         {
6192                 return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), offset.value));
6193         }
6194
6195         RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<UInt> offset)
6196         {
6197                 return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), offset.value));
6198         }
6199
6200         RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, int offset)
6201         {
6202                 return lhs = lhs + offset;
6203         }
6204
6205         RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, RValue<Int> offset)
6206         {
6207                 return lhs = lhs + offset;
6208         }
6209
6210         RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, RValue<UInt> offset)
6211         {
6212                 return lhs = lhs + offset;
6213         }
6214
6215         RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, int offset)
6216         {
6217                 return lhs + -offset;
6218         }
6219
6220         RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, RValue<Int> offset)
6221         {
6222                 return lhs + -offset;
6223         }
6224
6225         RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, RValue<UInt> offset)
6226         {
6227                 return lhs + -offset;
6228         }
6229
6230         RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, int offset)
6231         {
6232                 return lhs = lhs - offset;
6233         }
6234
6235         RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, RValue<Int> offset)
6236         {
6237                 return lhs = lhs - offset;
6238         }
6239
6240         RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, RValue<UInt> offset)
6241         {
6242                 return lhs = lhs - offset;
6243         }
6244
6245         void Return()
6246         {
6247                 Nucleus::createRetVoid();
6248                 Nucleus::setInsertBlock(Nucleus::createBasicBlock());
6249                 Nucleus::createUnreachable();
6250         }
6251
6252         void Return(RValue<Int> ret)
6253         {
6254                 Nucleus::createRet(ret.value);
6255                 Nucleus::setInsertBlock(Nucleus::createBasicBlock());
6256                 Nucleus::createUnreachable();
6257         }
6258
6259         bool branch(RValue<Bool> cmp, BasicBlock *bodyBB, BasicBlock *endBB)
6260         {
6261                 Nucleus::createCondBr(cmp.value, bodyBB, endBB);
6262                 Nucleus::setInsertBlock(bodyBB);
6263
6264                 return true;
6265         }
6266
6267         RValue<Long> Ticks()
6268         {
6269                 assert(false && "UNIMPLEMENTED"); return RValue<Long>(V(nullptr));
6270         }
6271 }