src/Reactor/SubzeroReactor.cpp

   1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
   2 //
   3 // Licensed under the Apache License, Version 2.0 (the "License");
   4 // you may not use this file except in compliance with the License.
   5 // You may obtain a copy of the License at
   6 //
   7 //    http://www.apache.org/licenses/LICENSE-2.0
   8 //
   9 // Unless required by applicable law or agreed to in writing, software
  10 // distributed under the License is distributed on an "AS IS" BASIS,
  11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 // See the License for the specific language governing permissions and
  13 // limitations under the License.
  14
  15 #include "Nucleus.hpp"
  16
  17 #include "Reactor.hpp"
  18 #include "Routine.hpp"
  19
  20 #include "Optimizer.hpp"
  21
  22 #include "src/IceTypes.h"
  23 #include "src/IceCfg.h"
  24 #include "src/IceELFStreamer.h"
  25 #include "src/IceGlobalContext.h"
  26 #include "src/IceCfgNode.h"
  27 #include "src/IceELFObjectWriter.h"
  28 #include "src/IceGlobalInits.h"
  29
  30 #include "llvm/Support/FileSystem.h"
  31 #include "llvm/Support/raw_os_ostream.h"
  32
  33 #if defined(_WIN32)
  34 #ifndef WIN32_LEAN_AND_MEAN
  35 #define WIN32_LEAN_AND_MEAN
  36 #endif // !WIN32_LEAN_AND_MEAN
  37 #ifndef NOMINMAX
  38 #define NOMINMAX
  39 #endif // !NOMINMAX
  40 #include <Windows.h>
  41 #else
  42 #include <sys/mman.h>
  43 #if !defined(MAP_ANONYMOUS)
  44 #define MAP_ANONYMOUS MAP_ANON
  45 #endif
  46 #endif
  47
  48 #include <mutex>
  49 #include <limits>
  50 #include <iostream>
  51 #include <cassert>
  52
  53 namespace
  54 {
  55         Ice::GlobalContext *context = nullptr;
  56         Ice::Cfg *function = nullptr;
  57         Ice::CfgNode *basicBlock = nullptr;
  58         Ice::CfgLocalAllocatorScope *allocator = nullptr;
  59         sw::Routine *routine = nullptr;
  60
  61         std::mutex codegenMutex;
  62
  63         Ice::ELFFileStreamer *elfFile = nullptr;
  64         Ice::Fdstream *out = nullptr;
  65 }
  66
  67 namespace
  68 {
  69         #if !defined(__i386__) && defined(_M_IX86)
  70                 #define __i386__ 1
  71         #endif
  72
  73         #if !defined(__x86_64__) && (defined(_M_AMD64) || defined (_M_X64))
  74                 #define __x86_64__ 1
  75         #endif
  76
  77         class CPUID
  78         {
  79         public:
  80                 const static bool ARM;
  81                 const static bool SSE4_1;
  82
  83         private:
  84                 static void cpuid(int registers[4], int info)
  85                 {
  86                         #if defined(__i386__) || defined(__x86_64__)
  87                                 #if defined(_WIN32)
  88                                         __cpuid(registers, info);
  89                                 #else
  90                                         __asm volatile("cpuid": "=a" (registers[0]), "=b" (registers[1]), "=c" (registers[2]), "=d" (registers[3]): "a" (info));
  91                                 #endif
  92                         #else
  93                                 registers[0] = 0;
  94                                 registers[1] = 0;
  95                                 registers[2] = 0;
  96                                 registers[3] = 0;
  97                         #endif
  98                 }
  99
 100                 static bool detectARM()
 101                 {
 102                         #if defined(__arm__)
 103                                 return true;
 104                         #elif defined(__i386__) || defined(__x86_64__)
 105                                 return false;
 106                         #else
 107                                 #error "Unknown architecture"
 108                         #endif
 109                 }
 110
 111                 static bool detectSSE4_1()
 112                 {
 113                         #if defined(__i386__) || defined(__x86_64__)
 114                                 int registers[4];
 115                                 cpuid(registers, 1);
 116                                 return (registers[2] & 0x00080000) != 0;
 117                         #else
 118                                 return false;
 119                         #endif
 120                 }
 121         };
 122
 123         const bool CPUID::ARM = CPUID::detectARM();
 124         const bool CPUID::SSE4_1 = CPUID::detectSSE4_1();
 125         const bool emulateIntrinsics = CPUID::ARM;
 126 }
 127
 128 namespace sw
 129 {
 130         enum EmulatedType
 131         {
 132                 EmulatedShift = 16,
 133                 EmulatedV2 = 2 << EmulatedShift,
 134                 EmulatedV4 = 4 << EmulatedShift,
 135                 EmulatedV8 = 8 << EmulatedShift,
 136                 EmulatedBits = EmulatedV2 | EmulatedV4 | EmulatedV8,
 137
 138                 Type_v2i32 = Ice::IceType_v4i32 | EmulatedV2,
 139                 Type_v4i16 = Ice::IceType_v8i16 | EmulatedV4,
 140                 Type_v2i16 = Ice::IceType_v8i16 | EmulatedV2,
 141                 Type_v8i8 =  Ice::IceType_v16i8 | EmulatedV8,
 142                 Type_v4i8 =  Ice::IceType_v16i8 | EmulatedV4,
 143                 Type_v2f32 = Ice::IceType_v4f32 | EmulatedV2,
 144         };
 145
 146         class Value : public Ice::Operand {};
 147         class SwitchCases : public Ice::InstSwitch {};
 148         class BasicBlock : public Ice::CfgNode {};
 149
 150         Ice::Type T(Type *t)
 151         {
 152                 static_assert(static_cast<unsigned int>(Ice::IceType_NUM) < static_cast<unsigned int>(EmulatedBits), "Ice::Type overlaps with our emulated types!");
 153                 return (Ice::Type)(reinterpret_cast<std::intptr_t>(t) & ~EmulatedBits);
 154         }
 155
 156         Type *T(Ice::Type t)
 157         {
 158                 return reinterpret_cast<Type*>(t);
 159         }
 160
 161         Type *T(EmulatedType t)
 162         {
 163                 return reinterpret_cast<Type*>(t);
 164         }
 165
 166         Value *V(Ice::Operand *v)
 167         {
 168                 return reinterpret_cast<Value*>(v);
 169         }
 170
 171         BasicBlock *B(Ice::CfgNode *b)
 172         {
 173                 return reinterpret_cast<BasicBlock*>(b);
 174         }
 175
 176         static size_t typeSize(Type *type)
 177         {
 178                 if(reinterpret_cast<std::intptr_t>(type) & EmulatedBits)
 179                 {
 180                         switch(reinterpret_cast<std::intptr_t>(type))
 181                         {
 182                         case Type_v2i32: return 8;
 183                         case Type_v4i16: return 8;
 184                         case Type_v2i16: return 4;
 185                         case Type_v8i8:  return 8;
 186                         case Type_v4i8:  return 4;
 187                         case Type_v2f32: return 8;
 188                         default: assert(false);
 189                         }
 190                 }
 191
 192                 return Ice::typeWidthInBytes(T(type));
 193         }
 194
 195         Optimization optimization[10] = {InstructionCombining, Disabled};
 196
 197         using ElfHeader = std::conditional<sizeof(void*) == 8, Elf64_Ehdr, Elf32_Ehdr>::type;
 198         using SectionHeader = std::conditional<sizeof(void*) == 8, Elf64_Shdr, Elf32_Shdr>::type;
 199
 200         inline const SectionHeader *sectionHeader(const ElfHeader *elfHeader)
 201         {
 202                 return reinterpret_cast<const SectionHeader*>((intptr_t)elfHeader + elfHeader->e_shoff);
 203         }
 204
 205         inline const SectionHeader *elfSection(const ElfHeader *elfHeader, int index)
 206         {
 207                 return &sectionHeader(elfHeader)[index];
 208         }
 209
 210         static void *relocateSymbol(const ElfHeader *elfHeader, const Elf32_Rel &relocation, const SectionHeader &relocationTable)
 211         {
 212                 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
 213
 214                 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
 215                 int32_t *patchSite = (int*)(address + relocation.r_offset);
 216                 uint32_t index = relocation.getSymbol();
 217                 int table = relocationTable.sh_link;
 218                 void *symbolValue = nullptr;
 219
 220                 if(index != SHN_UNDEF)
 221                 {
 222                         if(table == SHN_UNDEF) return nullptr;
 223                         const SectionHeader *symbolTable = elfSection(elfHeader, table);
 224
 225                         uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
 226                         if(index >= symtab_entries)
 227                         {
 228                                 assert(index < symtab_entries && "Symbol Index out of range");
 229                                 return nullptr;
 230                         }
 231
 232                         intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
 233                         Elf32_Sym &symbol = ((Elf32_Sym*)symbolAddress)[index];
 234                         uint16_t section = symbol.st_shndx;
 235
 236                         if(section != SHN_UNDEF && section < SHN_LORESERVE)
 237                         {
 238                                 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
 239                                 symbolValue = reinterpret_cast<void*>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
 240                         }
 241                         else
 242                         {
 243                                 return nullptr;
 244                         }
 245                 }
 246
 247                 if(CPUID::ARM)
 248                 {
 249                         switch(relocation.getType())
 250                         {
 251                         case R_ARM_NONE:
 252                                 // No relocation
 253                                 break;
 254                         case R_ARM_MOVW_ABS_NC:
 255                                 {
 256                                         uint32_t thumb = 0;   // Calls to Thumb code not supported.
 257                                         uint32_t lo = (uint32_t)(intptr_t)symbolValue | thumb;
 258                                         *patchSite = (*patchSite & 0xFFF0F000) | ((lo & 0xF000) << 4) | (lo & 0x0FFF);
 259                                 }
 260                                 break;
 261                         case R_ARM_MOVT_ABS:
 262                                 {
 263                                         uint32_t hi = (uint32_t)(intptr_t)(symbolValue) >> 16;
 264                                         *patchSite = (*patchSite & 0xFFF0F000) | ((hi & 0xF000) << 4) | (hi & 0x0FFF);
 265                                 }
 266                                 break;
 267                         default:
 268                                 assert(false && "Unsupported relocation type");
 269                                 return nullptr;
 270                         }
 271                 }
 272                 else
 273                 {
 274                         switch(relocation.getType())
 275                         {
 276                         case R_386_NONE:
 277                                 // No relocation
 278                                 break;
 279                         case R_386_32:
 280                                 *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite);
 281                                 break;
 282                 //      case R_386_PC32:
 283                 //              *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite - (intptr_t)patchSite);
 284                 //              break;
 285                         default:
 286                                 assert(false && "Unsupported relocation type");
 287                                 return nullptr;
 288                         }
 289                 }
 290
 291
 292                 return symbolValue;
 293         }
 294
 295         static void *relocateSymbol(const ElfHeader *elfHeader, const Elf64_Rela &relocation, const SectionHeader &relocationTable)
 296         {
 297                 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
 298
 299                 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
 300                 int32_t *patchSite = (int*)(address + relocation.r_offset);
 301                 uint32_t index = relocation.getSymbol();
 302                 int table = relocationTable.sh_link;
 303                 void *symbolValue = nullptr;
 304
 305                 if(index != SHN_UNDEF)
 306                 {
 307                         if(table == SHN_UNDEF) return nullptr;
 308                         const SectionHeader *symbolTable = elfSection(elfHeader, table);
 309
 310                         uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
 311                         if(index >= symtab_entries)
 312                         {
 313                                 assert(index < symtab_entries && "Symbol Index out of range");
 314                                 return nullptr;
 315                         }
 316
 317                         intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
 318                         Elf64_Sym &symbol = ((Elf64_Sym*)symbolAddress)[index];
 319                         uint16_t section = symbol.st_shndx;
 320
 321                         if(section != SHN_UNDEF && section < SHN_LORESERVE)
 322                         {
 323                                 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
 324                                 symbolValue = reinterpret_cast<void*>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
 325                         }
 326                         else
 327                         {
 328                                 return nullptr;
 329                         }
 330                 }
 331
 332                 switch(relocation.getType())
 333                 {
 334                 case R_X86_64_NONE:
 335                         // No relocation
 336                         break;
 337                 case R_X86_64_64:
 338                         *(int64_t*)patchSite = (int64_t)((intptr_t)symbolValue + *(int64_t*)patchSite) + relocation.r_addend;
 339                         break;
 340                 case R_X86_64_PC32:
 341                         *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite - (intptr_t)patchSite) + relocation.r_addend;
 342                         break;
 343                 case R_X86_64_32S:
 344                         *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite) + relocation.r_addend;
 345                         break;
 346                 default:
 347                         assert(false && "Unsupported relocation type");
 348                         return nullptr;
 349                 }
 350
 351                 return symbolValue;
 352         }
 353
 354         void *loadImage(uint8_t *const elfImage, size_t &codeSize)
 355         {
 356                 ElfHeader *elfHeader = (ElfHeader*)elfImage;
 357
 358                 if(!elfHeader->checkMagic())
 359                 {
 360                         return nullptr;
 361                 }
 362
 363                 // Expect ELF bitness to match platform
 364                 assert(sizeof(void*) == 8 ? elfHeader->getFileClass() == ELFCLASS64 : elfHeader->getFileClass() == ELFCLASS32);
 365                 #if defined(__i386__)
 366                         assert(sizeof(void*) == 4 && elfHeader->e_machine == EM_386);
 367                 #elif defined(__x86_64__)
 368                         assert(sizeof(void*) == 8 && elfHeader->e_machine == EM_X86_64);
 369                 #elif defined(__arm__)
 370                         assert(sizeof(void*) == 4 && elfHeader->e_machine == EM_ARM);
 371                 #else
 372                         #error "Unsupported platform"
 373                 #endif
 374
 375                 SectionHeader *sectionHeader = (SectionHeader*)(elfImage + elfHeader->e_shoff);
 376                 void *entry = nullptr;
 377
 378                 for(int i = 0; i < elfHeader->e_shnum; i++)
 379                 {
 380                         if(sectionHeader[i].sh_type == SHT_PROGBITS)
 381                         {
 382                                 if(sectionHeader[i].sh_flags & SHF_EXECINSTR)
 383                                 {
 384                                         entry = elfImage + sectionHeader[i].sh_offset;
 385                                         codeSize = sectionHeader[i].sh_size;
 386                                 }
 387                         }
 388                         else if(sectionHeader[i].sh_type == SHT_REL)
 389                         {
 390                                 assert(sizeof(void*) == 4 && "UNIMPLEMENTED");   // Only expected/implemented for 32-bit code
 391
 392                                 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
 393                                 {
 394                                         const Elf32_Rel &relocation = ((const Elf32_Rel*)(elfImage + sectionHeader[i].sh_offset))[index];
 395                                         relocateSymbol(elfHeader, relocation, sectionHeader[i]);
 396                                 }
 397                         }
 398                         else if(sectionHeader[i].sh_type == SHT_RELA)
 399                         {
 400                                 assert(sizeof(void*) == 8 && "UNIMPLEMENTED");   // Only expected/implemented for 64-bit code
 401
 402                                 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
 403                                 {
 404                                         const Elf64_Rela &relocation = ((const Elf64_Rela*)(elfImage + sectionHeader[i].sh_offset))[index];
 405                                         relocateSymbol(elfHeader, relocation, sectionHeader[i]);
 406                                 }
 407                         }
 408                 }
 409
 410                 return entry;
 411         }
 412
 413         template<typename T>
 414         struct ExecutableAllocator
 415         {
 416                 ExecutableAllocator() {};
 417                 template<class U> ExecutableAllocator(const ExecutableAllocator<U> &other) {};
 418
 419                 using value_type = T;
 420                 using size_type = std::size_t;
 421
 422                 T *allocate(size_type n)
 423                 {
 424                         #if defined(_WIN32)
 425                                 return (T*)VirtualAlloc(NULL, sizeof(T) * n, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
 426                         #else
 427                                 return (T*)mmap(nullptr, sizeof(T) * n, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
 428                         #endif
 429                 }
 430
 431                 void deallocate(T *p, size_type n)
 432                 {
 433                         #if defined(_WIN32)
 434                                 VirtualFree(p, 0, MEM_RELEASE);
 435                         #else
 436                                 munmap(p, sizeof(T) * n);
 437                         #endif
 438                 }
 439         };
 440
 441         class ELFMemoryStreamer : public Ice::ELFStreamer, public Routine
 442         {
 443                 ELFMemoryStreamer(const ELFMemoryStreamer &) = delete;
 444                 ELFMemoryStreamer &operator=(const ELFMemoryStreamer &) = delete;
 445
 446         public:
 447                 ELFMemoryStreamer() : Routine(), entry(nullptr)
 448                 {
 449                         position = 0;
 450                         buffer.reserve(0x1000);
 451                 }
 452
 453                 virtual ~ELFMemoryStreamer()
 454                 {
 455                         #if defined(_WIN32)
 456                                 if(buffer.size() != 0)
 457                                 {
 458                                         DWORD exeProtection;
 459                                         VirtualProtect(&buffer[0], buffer.size(), oldProtection, &exeProtection);
 460                                 }
 461                         #endif
 462                 }
 463
 464                 void write8(uint8_t Value) override
 465                 {
 466                         if(position == (uint64_t)buffer.size())
 467                         {
 468                                 buffer.push_back(Value);
 469                                 position++;
 470                         }
 471                         else if(position < (uint64_t)buffer.size())
 472                         {
 473                                 buffer[position] = Value;
 474                                 position++;
 475                         }
 476                         else assert(false && "UNIMPLEMENTED");
 477                 }
 478
 479                 void writeBytes(llvm::StringRef Bytes) override
 480                 {
 481                         std::size_t oldSize = buffer.size();
 482                         buffer.resize(oldSize + Bytes.size());
 483                         memcpy(&buffer[oldSize], Bytes.begin(), Bytes.size());
 484                         position += Bytes.size();
 485                 }
 486
 487                 uint64_t tell() const override { return position; }
 488
 489                 void seek(uint64_t Off) override { position = Off; }
 490
 491                 const void *getEntry() override
 492                 {
 493                         if(!entry)
 494                         {
 495                                 #if defined(_WIN32)
 496                                         VirtualProtect(&buffer[0], buffer.size(), PAGE_EXECUTE_READWRITE, &oldProtection);
 497                                 #else
 498                                         mprotect(&buffer[0], buffer.size(), PROT_READ | PROT_WRITE | PROT_EXEC);
 499                                 #endif
 500
 501                                 position = std::numeric_limits<std::size_t>::max();   // Can't stream more data after this
 502
 503                                 size_t codeSize = 0;
 504                                 entry = loadImage(&buffer[0], codeSize);
 505
 506                                 #if defined(_WIN32)
 507                                         FlushInstructionCache(GetCurrentProcess(), NULL, 0);
 508                                 #else
 509                                         __builtin___clear_cache((char*)entry, (char*)entry + codeSize);
 510                                 #endif
 511                         }
 512
 513                         return entry;
 514                 }
 515
 516         private:
 517                 void *entry;
 518                 std::vector<uint8_t, ExecutableAllocator<uint8_t>> buffer;
 519                 std::size_t position;
 520
 521                 #if defined(_WIN32)
 522                 DWORD oldProtection;
 523                 #endif
 524         };
 525
 526         Nucleus::Nucleus()
 527         {
 528                 ::codegenMutex.lock();   // Reactor is currently not thread safe
 529
 530                 Ice::ClFlags &Flags = Ice::ClFlags::Flags;
 531                 Ice::ClFlags::getParsedClFlags(Flags);
 532
 533                 #if defined(__arm__)
 534                         Flags.setTargetArch(Ice::Target_ARM32);
 535                         Flags.setTargetInstructionSet(Ice::ARM32InstructionSet_HWDivArm);
 536                 #else   // x86
 537                         Flags.setTargetArch(sizeof(void*) == 8 ? Ice::Target_X8664 : Ice::Target_X8632);
 538                         Flags.setTargetInstructionSet(CPUID::SSE4_1 ? Ice::X86InstructionSet_SSE4_1 : Ice::X86InstructionSet_SSE2);
 539                 #endif
 540                 Flags.setOutFileType(Ice::FT_Elf);
 541                 Flags.setOptLevel(Ice::Opt_2);
 542                 Flags.setApplicationBinaryInterface(Ice::ABI_Platform);
 543                 Flags.setVerbose(false ? Ice::IceV_Most : Ice::IceV_None);
 544                 Flags.setDisableHybridAssembly(true);
 545
 546                 static llvm::raw_os_ostream cout(std::cout);
 547                 static llvm::raw_os_ostream cerr(std::cerr);
 548
 549                 if(false)   // Write out to a file
 550                 {
 551                         std::error_code errorCode;
 552                         ::out = new Ice::Fdstream("out.o", errorCode, llvm::sys::fs::F_None);
 553                         ::elfFile = new Ice::ELFFileStreamer(*out);
 554                         ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfFile);
 555                 }
 556                 else
 557                 {
 558                         ELFMemoryStreamer *elfMemory = new ELFMemoryStreamer();
 559                         ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfMemory);
 560                         ::routine = elfMemory;
 561                 }
 562         }
 563
 564         Nucleus::~Nucleus()
 565         {
 566                 delete ::allocator;
 567                 delete ::function;
 568                 delete ::context;
 569
 570                 delete ::elfFile;
 571                 delete ::out;
 572
 573                 ::codegenMutex.unlock();
 574         }
 575
 576         Routine *Nucleus::acquireRoutine(const wchar_t *name, bool runOptimizations)
 577         {
 578                 if(basicBlock->getInsts().empty() || basicBlock->getInsts().back().getKind() != Ice::Inst::Ret)
 579                 {
 580                         createRetVoid();
 581                 }
 582
 583                 std::wstring wideName(name);
 584                 std::string asciiName(wideName.begin(), wideName.end());
 585                 ::function->setFunctionName(Ice::GlobalString::createWithString(::context, asciiName));
 586
 587                 optimize();
 588
 589                 ::function->translate();
 590                 assert(!::function->hasError());
 591
 592                 auto *globals = ::function->getGlobalInits().release();
 593
 594                 if(globals && !globals->empty())
 595                 {
 596                         ::context->getGlobals()->merge(globals);
 597                 }
 598
 599                 ::context->emitFileHeader();
 600                 ::function->emitIAS();
 601                 auto assembler = ::function->releaseAssembler();
 602                 auto objectWriter = ::context->getObjectWriter();
 603                 assembler->alignFunction();
 604                 objectWriter->writeFunctionCode(::function->getFunctionName(), false, assembler.get());
 605                 ::context->lowerGlobals("last");
 606                 ::context->lowerConstants();
 607                 ::context->lowerJumpTables();
 608                 objectWriter->setUndefinedSyms(::context->getConstantExternSyms());
 609                 objectWriter->writeNonUserSections();
 610
 611                 return ::routine;
 612         }
 613
 614         void Nucleus::optimize()
 615         {
 616                 sw::optimize(::function);
 617         }
 618
 619         Value *Nucleus::allocateStackVariable(Type *t, int arraySize)
 620         {
 621                 Ice::Type type = T(t);
 622                 int typeSize = Ice::typeWidthInBytes(type);
 623                 int totalSize = typeSize * (arraySize ? arraySize : 1);
 624
 625                 auto bytes = Ice::ConstantInteger32::create(::context, type, totalSize);
 626                 auto address = ::function->makeVariable(T(getPointerType(t)));
 627                 auto alloca = Ice::InstAlloca::create(::function, address, bytes, typeSize);
 628                 ::function->getEntryNode()->getInsts().push_front(alloca);
 629
 630                 return V(address);
 631         }
 632
 633         BasicBlock *Nucleus::createBasicBlock()
 634         {
 635                 return B(::function->makeNode());
 636         }
 637
 638         BasicBlock *Nucleus::getInsertBlock()
 639         {
 640                 return B(::basicBlock);
 641         }
 642
 643         void Nucleus::setInsertBlock(BasicBlock *basicBlock)
 644         {
 645         //      assert(::basicBlock->getInsts().back().getTerminatorEdges().size() >= 0 && "Previous basic block must have a terminator");
 646                 ::basicBlock = basicBlock;
 647         }
 648
 649         void Nucleus::createFunction(Type *ReturnType, std::vector<Type*> &Params)
 650         {
 651                 uint32_t sequenceNumber = 0;
 652                 ::function = Ice::Cfg::create(::context, sequenceNumber).release();
 653                 ::allocator = new Ice::CfgLocalAllocatorScope(::function);
 654
 655                 for(Type *type : Params)
 656                 {
 657                         Ice::Variable *arg = ::function->makeVariable(T(type));
 658                         ::function->addArg(arg);
 659                 }
 660
 661                 Ice::CfgNode *node = ::function->makeNode();
 662                 ::function->setEntryNode(node);
 663                 ::basicBlock = node;
 664         }
 665
 666         Value *Nucleus::getArgument(unsigned int index)
 667         {
 668                 return V(::function->getArgs()[index]);
 669         }
 670
 671         void Nucleus::createRetVoid()
 672         {
 673                 Ice::InstRet *ret = Ice::InstRet::create(::function);
 674                 ::basicBlock->appendInst(ret);
 675         }
 676
 677         void Nucleus::createRet(Value *v)
 678         {
 679                 Ice::InstRet *ret = Ice::InstRet::create(::function, v);
 680                 ::basicBlock->appendInst(ret);
 681         }
 682
 683         void Nucleus::createBr(BasicBlock *dest)
 684         {
 685                 auto br = Ice::InstBr::create(::function, dest);
 686                 ::basicBlock->appendInst(br);
 687         }
 688
 689         void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
 690         {
 691                 auto br = Ice::InstBr::create(::function, cond, ifTrue, ifFalse);
 692                 ::basicBlock->appendInst(br);
 693         }
 694
 695         static bool isCommutative(Ice::InstArithmetic::OpKind op)
 696         {
 697                 switch(op)
 698                 {
 699                 case Ice::InstArithmetic::Add:
 700                 case Ice::InstArithmetic::Fadd:
 701                 case Ice::InstArithmetic::Mul:
 702                 case Ice::InstArithmetic::Fmul:
 703                 case Ice::InstArithmetic::And:
 704                 case Ice::InstArithmetic::Or:
 705                 case Ice::InstArithmetic::Xor:
 706                         return true;
 707                 default:
 708                         return false;
 709                 }
 710         }
 711
 712         static Value *createArithmetic(Ice::InstArithmetic::OpKind op, Value *lhs, Value *rhs)
 713         {
 714                 assert(lhs->getType() == rhs->getType() || (llvm::isa<Ice::Constant>(rhs) && (op == Ice::InstArithmetic::Shl || Ice::InstArithmetic::Lshr || Ice::InstArithmetic::Ashr)));
 715
 716                 bool swapOperands = llvm::isa<Ice::Constant>(lhs) && isCommutative(op);
 717
 718                 Ice::Variable *result = ::function->makeVariable(lhs->getType());
 719                 Ice::InstArithmetic *arithmetic = Ice::InstArithmetic::create(::function, op, result, swapOperands ? rhs : lhs, swapOperands ? lhs : rhs);
 720                 ::basicBlock->appendInst(arithmetic);
 721
 722                 return V(result);
 723         }
 724
 725         Value *Nucleus::createAdd(Value *lhs, Value *rhs)
 726         {
 727                 return createArithmetic(Ice::InstArithmetic::Add, lhs, rhs);
 728         }
 729
 730         Value *Nucleus::createSub(Value *lhs, Value *rhs)
 731         {
 732                 return createArithmetic(Ice::InstArithmetic::Sub, lhs, rhs);
 733         }
 734
 735         Value *Nucleus::createMul(Value *lhs, Value *rhs)
 736         {
 737                 return createArithmetic(Ice::InstArithmetic::Mul, lhs, rhs);
 738         }
 739
 740         Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
 741         {
 742                 return createArithmetic(Ice::InstArithmetic::Udiv, lhs, rhs);
 743         }
 744
 745         Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
 746         {
 747                 return createArithmetic(Ice::InstArithmetic::Sdiv, lhs, rhs);
 748         }
 749
 750         Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
 751         {
 752                 return createArithmetic(Ice::InstArithmetic::Fadd, lhs, rhs);
 753         }
 754
 755         Value *Nucleus::createFSub(Value *lhs, Value *rhs)
 756         {
 757                 return createArithmetic(Ice::InstArithmetic::Fsub, lhs, rhs);
 758         }
 759
 760         Value *Nucleus::createFMul(Value *lhs, Value *rhs)
 761         {
 762                 return createArithmetic(Ice::InstArithmetic::Fmul, lhs, rhs);
 763         }
 764
 765         Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
 766         {
 767                 return createArithmetic(Ice::InstArithmetic::Fdiv, lhs, rhs);
 768         }
 769
 770         Value *Nucleus::createURem(Value *lhs, Value *rhs)
 771         {
 772                 return createArithmetic(Ice::InstArithmetic::Urem, lhs, rhs);
 773         }
 774
 775         Value *Nucleus::createSRem(Value *lhs, Value *rhs)
 776         {
 777                 return createArithmetic(Ice::InstArithmetic::Srem, lhs, rhs);
 778         }
 779
 780         Value *Nucleus::createFRem(Value *lhs, Value *rhs)
 781         {
 782                 return createArithmetic(Ice::InstArithmetic::Frem, lhs, rhs);
 783         }
 784
 785         Value *Nucleus::createShl(Value *lhs, Value *rhs)
 786         {
 787                 return createArithmetic(Ice::InstArithmetic::Shl, lhs, rhs);
 788         }
 789
 790         Value *Nucleus::createLShr(Value *lhs, Value *rhs)
 791         {
 792                 return createArithmetic(Ice::InstArithmetic::Lshr, lhs, rhs);
 793         }
 794
 795         Value *Nucleus::createAShr(Value *lhs, Value *rhs)
 796         {
 797                 return createArithmetic(Ice::InstArithmetic::Ashr, lhs, rhs);
 798         }
 799
 800         Value *Nucleus::createAnd(Value *lhs, Value *rhs)
 801         {
 802                 return createArithmetic(Ice::InstArithmetic::And, lhs, rhs);
 803         }
 804
 805         Value *Nucleus::createOr(Value *lhs, Value *rhs)
 806         {
 807                 return createArithmetic(Ice::InstArithmetic::Or, lhs, rhs);
 808         }
 809
 810         Value *Nucleus::createXor(Value *lhs, Value *rhs)
 811         {
 812                 return createArithmetic(Ice::InstArithmetic::Xor, lhs, rhs);
 813         }
 814
 815         Value *Nucleus::createNeg(Value *v)
 816         {
 817                 return createSub(createNullValue(T(v->getType())), v);
 818         }
 819
 820         Value *Nucleus::createFNeg(Value *v)
 821         {
 822                 double c[4] = {-0.0, -0.0, -0.0, -0.0};
 823                 Value *negativeZero = Ice::isVectorType(v->getType()) ?
 824                                       createConstantVector(c, T(v->getType())) :
 825                                       V(::context->getConstantFloat(-0.0f));
 826
 827                 return createFSub(negativeZero, v);
 828         }
 829
 830         Value *Nucleus::createNot(Value *v)
 831         {
 832                 if(Ice::isScalarIntegerType(v->getType()))
 833                 {
 834                         return createXor(v, V(::context->getConstantInt(v->getType(), -1)));
 835                 }
 836                 else   // Vector
 837                 {
 838                         int64_t c[4] = {-1, -1, -1, -1};
 839                         return createXor(v, createConstantVector(c, T(v->getType())));
 840                 }
 841         }
 842
 843         Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int align)
 844         {
 845                 int valueType = (int)reinterpret_cast<intptr_t>(type);
 846                 Ice::Variable *result = ::function->makeVariable(T(type));
 847
 848                 if(valueType & EmulatedBits)
 849                 {
 850                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::LoadSubVector, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
 851                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
 852                         auto load = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
 853                         load->addArg(ptr);
 854                         load->addArg(::context->getConstantInt32(typeSize(type)));
 855                         ::basicBlock->appendInst(load);
 856                 }
 857                 else
 858                 {
 859                         auto load = Ice::InstLoad::create(::function, result, ptr, align);
 860                         ::basicBlock->appendInst(load);
 861                 }
 862
 863                 return V(result);
 864         }
 865
 866         Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int align)
 867         {
 868                 int valueType = (int)reinterpret_cast<intptr_t>(type);
 869
 870                 if(valueType & EmulatedBits)
 871                 {
 872                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::StoreSubVector, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T};
 873                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
 874                         auto store = Ice::InstIntrinsicCall::create(::function, 3, nullptr, target, intrinsic);
 875                         store->addArg(value);
 876                         store->addArg(ptr);
 877                         store->addArg(::context->getConstantInt32(typeSize(type)));
 878                         ::basicBlock->appendInst(store);
 879                 }
 880                 else
 881                 {
 882                         assert(T(value->getType()) == type);
 883
 884                         auto store = Ice::InstStore::create(::function, value, ptr, align);
 885                         ::basicBlock->appendInst(store);
 886                 }
 887
 888                 return value;
 889         }
 890
 891         Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex)
 892         {
 893                 assert(index->getType() == Ice::IceType_i32);
 894
 895                 if(auto *constant = llvm::dyn_cast<Ice::ConstantInteger32>(index))
 896                 {
 897                         int32_t offset = constant->getValue() * (int)typeSize(type);
 898
 899                         if(offset == 0)
 900                         {
 901                                 return ptr;
 902                         }
 903
 904                         return createAdd(ptr, createConstantInt(offset));
 905                 }
 906
 907                 if(!Ice::isByteSizedType(T(type)))
 908                 {
 909                         index = createMul(index, createConstantInt((int)typeSize(type)));
 910                 }
 911
 912                 if(sizeof(void*) == 8)
 913                 {
 914                         if(unsignedIndex)
 915                         {
 916                                 index = createZExt(index, T(Ice::IceType_i64));
 917                         }
 918                         else
 919                         {
 920                                 index = createSExt(index, T(Ice::IceType_i64));
 921                         }
 922                 }
 923
 924                 return createAdd(ptr, index);
 925         }
 926
 927         Value *Nucleus::createAtomicAdd(Value *ptr, Value *value)
 928         {
 929                 assert(false && "UNIMPLEMENTED"); return nullptr;
 930         }
 931
 932         static Value *createCast(Ice::InstCast::OpKind op, Value *v, Type *destType)
 933         {
 934                 if(v->getType() == T(destType))
 935                 {
 936                         return v;
 937                 }
 938
 939                 Ice::Variable *result = ::function->makeVariable(T(destType));
 940                 Ice::InstCast *cast = Ice::InstCast::create(::function, op, result, v);
 941                 ::basicBlock->appendInst(cast);
 942
 943                 return V(result);
 944         }
 945
 946         Value *Nucleus::createTrunc(Value *v, Type *destType)
 947         {
 948                 return createCast(Ice::InstCast::Trunc, v, destType);
 949         }
 950
 951         Value *Nucleus::createZExt(Value *v, Type *destType)
 952         {
 953                 return createCast(Ice::InstCast::Zext, v, destType);
 954         }
 955
 956         Value *Nucleus::createSExt(Value *v, Type *destType)
 957         {
 958                 return createCast(Ice::InstCast::Sext, v, destType);
 959         }
 960
 961         Value *Nucleus::createFPToSI(Value *v, Type *destType)
 962         {
 963                 return createCast(Ice::InstCast::Fptosi, v, destType);
 964         }
 965
 966         Value *Nucleus::createSIToFP(Value *v, Type *destType)
 967         {
 968                 return createCast(Ice::InstCast::Sitofp, v, destType);
 969         }
 970
 971         Value *Nucleus::createFPTrunc(Value *v, Type *destType)
 972         {
 973                 return createCast(Ice::InstCast::Fptrunc, v, destType);
 974         }
 975
 976         Value *Nucleus::createFPExt(Value *v, Type *destType)
 977         {
 978                 return createCast(Ice::InstCast::Fpext, v, destType);
 979         }
 980
 981         Value *Nucleus::createBitCast(Value *v, Type *destType)
 982         {
 983                 return createCast(Ice::InstCast::Bitcast, v, destType);
 984         }
 985
 986         static Value *createIntCompare(Ice::InstIcmp::ICond condition, Value *lhs, Value *rhs)
 987         {
 988                 assert(lhs->getType() == rhs->getType());
 989
 990                 auto result = ::function->makeVariable(Ice::isScalarIntegerType(lhs->getType()) ? Ice::IceType_i1 : lhs->getType());
 991                 auto cmp = Ice::InstIcmp::create(::function, condition, result, lhs, rhs);
 992                 ::basicBlock->appendInst(cmp);
 993
 994                 return V(result);
 995         }
 996
 997         Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
 998         {
 999                 return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
1000         }
1001
1002         Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
1003         {
1004                 return createIntCompare(Ice::InstIcmp::Ne, lhs, rhs);
1005         }
1006
1007         Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
1008         {
1009                 return createIntCompare(Ice::InstIcmp::Ugt, lhs, rhs);
1010         }
1011
1012         Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
1013         {
1014                 return createIntCompare(Ice::InstIcmp::Uge, lhs, rhs);
1015         }
1016
1017         Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
1018         {
1019                 return createIntCompare(Ice::InstIcmp::Ult, lhs, rhs);
1020         }
1021
1022         Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
1023         {
1024                 return createIntCompare(Ice::InstIcmp::Ule, lhs, rhs);
1025         }
1026
1027         Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
1028         {
1029                 return createIntCompare(Ice::InstIcmp::Sgt, lhs, rhs);
1030         }
1031
1032         Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
1033         {
1034                 return createIntCompare(Ice::InstIcmp::Sge, lhs, rhs);
1035         }
1036
1037         Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
1038         {
1039                 return createIntCompare(Ice::InstIcmp::Slt, lhs, rhs);
1040         }
1041
1042         Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
1043         {
1044                 return createIntCompare(Ice::InstIcmp::Sle, lhs, rhs);
1045         }
1046
1047         static Value *createFloatCompare(Ice::InstFcmp::FCond condition, Value *lhs, Value *rhs)
1048         {
1049                 assert(lhs->getType() == rhs->getType());
1050                 assert(Ice::isScalarFloatingType(lhs->getType()) || lhs->getType() == Ice::IceType_v4f32);
1051
1052                 auto result = ::function->makeVariable(Ice::isScalarFloatingType(lhs->getType()) ? Ice::IceType_i1 : Ice::IceType_v4i32);
1053                 auto cmp = Ice::InstFcmp::create(::function, condition, result, lhs, rhs);
1054                 ::basicBlock->appendInst(cmp);
1055
1056                 return V(result);
1057         }
1058
1059         Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
1060         {
1061                 return createFloatCompare(Ice::InstFcmp::Oeq, lhs, rhs);
1062         }
1063
1064         Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
1065         {
1066                 return createFloatCompare(Ice::InstFcmp::Ogt, lhs, rhs);
1067         }
1068
1069         Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
1070         {
1071                 return createFloatCompare(Ice::InstFcmp::Oge, lhs, rhs);
1072         }
1073
1074         Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
1075         {
1076                 return createFloatCompare(Ice::InstFcmp::Olt, lhs, rhs);
1077         }
1078
1079         Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
1080         {
1081                 return createFloatCompare(Ice::InstFcmp::Ole, lhs, rhs);
1082         }
1083
1084         Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
1085         {
1086                 return createFloatCompare(Ice::InstFcmp::One, lhs, rhs);
1087         }
1088
1089         Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
1090         {
1091                 return createFloatCompare(Ice::InstFcmp::Ord, lhs, rhs);
1092         }
1093
1094         Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
1095         {
1096                 return createFloatCompare(Ice::InstFcmp::Uno, lhs, rhs);
1097         }
1098
1099         Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
1100         {
1101                 return createFloatCompare(Ice::InstFcmp::Ueq, lhs, rhs);
1102         }
1103
1104         Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
1105         {
1106                 return createFloatCompare(Ice::InstFcmp::Ugt, lhs, rhs);
1107         }
1108
1109         Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
1110         {
1111                 return createFloatCompare(Ice::InstFcmp::Uge, lhs, rhs);
1112         }
1113
1114         Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
1115         {
1116                 return createFloatCompare(Ice::InstFcmp::Ult, lhs, rhs);
1117         }
1118
1119         Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
1120         {
1121                 return createFloatCompare(Ice::InstFcmp::Ule, lhs, rhs);
1122         }
1123
1124         Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
1125         {
1126                 return createFloatCompare(Ice::InstFcmp::Une, lhs, rhs);
1127         }
1128
1129         Value *Nucleus::createExtractElement(Value *vector, Type *type, int index)
1130         {
1131                 auto result = ::function->makeVariable(T(type));
1132                 auto extract = Ice::InstExtractElement::create(::function, result, vector, ::context->getConstantInt32(index));
1133                 ::basicBlock->appendInst(extract);
1134
1135                 return V(result);
1136         }
1137
1138         Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
1139         {
1140                 auto result = ::function->makeVariable(vector->getType());
1141                 auto insert = Ice::InstInsertElement::create(::function, result, vector, element, ::context->getConstantInt32(index));
1142                 ::basicBlock->appendInst(insert);
1143
1144                 return V(result);
1145         }
1146
1147         Value *Nucleus::createShuffleVector(Value *V1, Value *V2, const int *select)
1148         {
1149                 assert(V1->getType() == V2->getType());
1150
1151                 int size = Ice::typeNumElements(V1->getType());
1152                 auto result = ::function->makeVariable(V1->getType());
1153                 auto shuffle = Ice::InstShuffleVector::create(::function, result, V1, V2);
1154
1155                 for(int i = 0; i < size; i++)
1156                 {
1157                         shuffle->addIndex(llvm::cast<Ice::ConstantInteger32>(::context->getConstantInt32(select[i])));
1158                 }
1159
1160                 ::basicBlock->appendInst(shuffle);
1161
1162                 return V(result);
1163         }
1164
1165         Value *Nucleus::createSelect(Value *C, Value *ifTrue, Value *ifFalse)
1166         {
1167                 assert(ifTrue->getType() == ifFalse->getType());
1168
1169                 auto result = ::function->makeVariable(ifTrue->getType());
1170                 auto *select = Ice::InstSelect::create(::function, result, C, ifTrue, ifFalse);
1171                 ::basicBlock->appendInst(select);
1172
1173                 return V(result);
1174         }
1175
1176         SwitchCases *Nucleus::createSwitch(Value *control, BasicBlock *defaultBranch, unsigned numCases)
1177         {
1178                 auto switchInst = Ice::InstSwitch::create(::function, numCases, control, defaultBranch);
1179                 ::basicBlock->appendInst(switchInst);
1180
1181                 return reinterpret_cast<SwitchCases*>(switchInst);
1182         }
1183
1184         void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch)
1185         {
1186                 switchCases->addBranch(label, label, branch);
1187         }
1188
1189         void Nucleus::createUnreachable()
1190         {
1191                 Ice::InstUnreachable *unreachable = Ice::InstUnreachable::create(::function);
1192                 ::basicBlock->appendInst(unreachable);
1193         }
1194
1195         static Value *createSwizzle4(Value *val, unsigned char select)
1196         {
1197                 int swizzle[4] =
1198                 {
1199                         (select >> 0) & 0x03,
1200                         (select >> 2) & 0x03,
1201                         (select >> 4) & 0x03,
1202                         (select >> 6) & 0x03,
1203                 };
1204
1205                 return Nucleus::createShuffleVector(val, val, swizzle);
1206         }
1207
1208         static Value *createMask4(Value *lhs, Value *rhs, unsigned char select)
1209         {
1210                 int64_t mask[4] = {0, 0, 0, 0};
1211
1212                 mask[(select >> 0) & 0x03] = -1;
1213                 mask[(select >> 2) & 0x03] = -1;
1214                 mask[(select >> 4) & 0x03] = -1;
1215                 mask[(select >> 6) & 0x03] = -1;
1216
1217                 Value *condition = Nucleus::createConstantVector(mask, T(Ice::IceType_v4i1));
1218                 Value *result = Nucleus::createSelect(condition, rhs, lhs);
1219
1220                 return result;
1221         }
1222
1223         Type *Nucleus::getPointerType(Type *ElementType)
1224         {
1225                 if(sizeof(void*) == 8)
1226                 {
1227                         return T(Ice::IceType_i64);
1228                 }
1229                 else
1230                 {
1231                         return T(Ice::IceType_i32);
1232                 }
1233         }
1234
1235         Value *Nucleus::createNullValue(Type *Ty)
1236         {
1237                 if(Ice::isVectorType(T(Ty)))
1238                 {
1239                         assert(Ice::typeNumElements(T(Ty)) <= 16);
1240                         int64_t c[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
1241                         return createConstantVector(c, Ty);
1242                 }
1243                 else
1244                 {
1245                         return V(::context->getConstantZero(T(Ty)));
1246                 }
1247         }
1248
1249         Value *Nucleus::createConstantLong(int64_t i)
1250         {
1251                 return V(::context->getConstantInt64(i));
1252         }
1253
1254         Value *Nucleus::createConstantInt(int i)
1255         {
1256                 return V(::context->getConstantInt32(i));
1257         }
1258
1259         Value *Nucleus::createConstantInt(unsigned int i)
1260         {
1261                 return V(::context->getConstantInt32(i));
1262         }
1263
1264         Value *Nucleus::createConstantBool(bool b)
1265         {
1266                 return V(::context->getConstantInt1(b));
1267         }
1268
1269         Value *Nucleus::createConstantByte(signed char i)
1270         {
1271                 return V(::context->getConstantInt8(i));
1272         }
1273
1274         Value *Nucleus::createConstantByte(unsigned char i)
1275         {
1276                 return V(::context->getConstantInt8(i));
1277         }
1278
1279         Value *Nucleus::createConstantShort(short i)
1280         {
1281                 return V(::context->getConstantInt16(i));
1282         }
1283
1284         Value *Nucleus::createConstantShort(unsigned short i)
1285         {
1286                 return V(::context->getConstantInt16(i));
1287         }
1288
1289         Value *Nucleus::createConstantFloat(float x)
1290         {
1291                 return V(::context->getConstantFloat(x));
1292         }
1293
1294         Value *Nucleus::createNullPointer(Type *Ty)
1295         {
1296                 return createNullValue(T(sizeof(void*) == 8 ? Ice::IceType_i64 : Ice::IceType_i32));
1297         }
1298
1299         Value *Nucleus::createConstantVector(const int64_t *constants, Type *type)
1300         {
1301                 const int vectorSize = 16;
1302                 assert(Ice::typeWidthInBytes(T(type)) == vectorSize);
1303                 const int alignment = vectorSize;
1304                 auto globalPool = ::function->getGlobalPool();
1305
1306                 const int64_t *i = constants;
1307                 const double *f = reinterpret_cast<const double*>(constants);
1308                 Ice::VariableDeclaration::DataInitializer *dataInitializer = nullptr;
1309
1310                 switch((int)reinterpret_cast<intptr_t>(type))
1311                 {
1312                 case Ice::IceType_v4i32:
1313                 case Ice::IceType_v4i1:
1314                         {
1315                                 const int initializer[4] = {(int)i[0], (int)i[1], (int)i[2], (int)i[3]};
1316                                 static_assert(sizeof(initializer) == vectorSize, "!");
1317                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1318                         }
1319                         break;
1320                 case Ice::IceType_v4f32:
1321                         {
1322                                 const float initializer[4] = {(float)f[0], (float)f[1], (float)f[2], (float)f[3]};
1323                                 static_assert(sizeof(initializer) == vectorSize, "!");
1324                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1325                         }
1326                         break;
1327                 case Ice::IceType_v8i16:
1328                 case Ice::IceType_v8i1:
1329                         {
1330                                 const short initializer[8] = {(short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[4], (short)i[5], (short)i[6], (short)i[7]};
1331                                 static_assert(sizeof(initializer) == vectorSize, "!");
1332                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1333                         }
1334                         break;
1335                 case Ice::IceType_v16i8:
1336                 case Ice::IceType_v16i1:
1337                         {
1338                                 const char initializer[16] = {(char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[8], (char)i[9], (char)i[10], (char)i[11], (char)i[12], (char)i[13], (char)i[14], (char)i[15]};
1339                                 static_assert(sizeof(initializer) == vectorSize, "!");
1340                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1341                         }
1342                         break;
1343                 case Type_v2i32:
1344                         {
1345                                 const int initializer[4] = {(int)i[0], (int)i[1], (int)i[0], (int)i[1]};
1346                                 static_assert(sizeof(initializer) == vectorSize, "!");
1347                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1348                         }
1349                         break;
1350                 case Type_v2f32:
1351                         {
1352                                 const float initializer[4] = {(float)f[0], (float)f[1], (float)f[0], (float)f[1]};
1353                                 static_assert(sizeof(initializer) == vectorSize, "!");
1354                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1355                         }
1356                         break;
1357                 case Type_v4i16:
1358                         {
1359                                 const short initializer[8] = {(short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[0], (short)i[1], (short)i[2], (short)i[3]};
1360                                 static_assert(sizeof(initializer) == vectorSize, "!");
1361                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1362                         }
1363                         break;
1364                 case Type_v8i8:
1365                         {
1366                                 const char initializer[16] = {(char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7]};
1367                                 static_assert(sizeof(initializer) == vectorSize, "!");
1368                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1369                         }
1370                         break;
1371                 case Type_v4i8:
1372                         {
1373                                 const char initializer[16] = {(char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3]};
1374                                 static_assert(sizeof(initializer) == vectorSize, "!");
1375                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1376                         }
1377                         break;
1378                 default:
1379                         assert(false && "Unknown constant vector type" && type);
1380                 }
1381
1382                 auto name = Ice::GlobalString::createWithoutString(::context);
1383                 auto *variableDeclaration = Ice::VariableDeclaration::create(globalPool);
1384                 variableDeclaration->setName(name);
1385                 variableDeclaration->setAlignment(alignment);
1386                 variableDeclaration->setIsConstant(true);
1387                 variableDeclaration->addInitializer(dataInitializer);
1388
1389                 ::function->addGlobal(variableDeclaration);
1390
1391                 constexpr int32_t offset = 0;
1392                 Ice::Operand *ptr = ::context->getConstantSym(offset, name);
1393
1394                 Ice::Variable *result = ::function->makeVariable(T(type));
1395                 auto load = Ice::InstLoad::create(::function, result, ptr, alignment);
1396                 ::basicBlock->appendInst(load);
1397
1398                 return V(result);
1399         }
1400
1401         Value *Nucleus::createConstantVector(const double *constants, Type *type)
1402         {
1403                 return createConstantVector((const int64_t*)constants, type);
1404         }
1405
1406         Type *Void::getType()
1407         {
1408                 return T(Ice::IceType_void);
1409         }
1410
1411         Bool::Bool(Argument<Bool> argument)
1412         {
1413                 storeValue(argument.value);
1414         }
1415
1416         Bool::Bool(bool x)
1417         {
1418                 storeValue(Nucleus::createConstantBool(x));
1419         }
1420
1421         Bool::Bool(RValue<Bool> rhs)
1422         {
1423                 storeValue(rhs.value);
1424         }
1425
1426         Bool::Bool(const Bool &rhs)
1427         {
1428                 Value *value = rhs.loadValue();
1429                 storeValue(value);
1430         }
1431
1432         Bool::Bool(const Reference<Bool> &rhs)
1433         {
1434                 Value *value = rhs.loadValue();
1435                 storeValue(value);
1436         }
1437
1438         RValue<Bool> Bool::operator=(RValue<Bool> rhs)
1439         {
1440                 storeValue(rhs.value);
1441
1442                 return rhs;
1443         }
1444
1445         RValue<Bool> Bool::operator=(const Bool &rhs)
1446         {
1447                 Value *value = rhs.loadValue();
1448                 storeValue(value);
1449
1450                 return RValue<Bool>(value);
1451         }
1452
1453         RValue<Bool> Bool::operator=(const Reference<Bool> &rhs)
1454         {
1455                 Value *value = rhs.loadValue();
1456                 storeValue(value);
1457
1458                 return RValue<Bool>(value);
1459         }
1460
1461         RValue<Bool> operator!(RValue<Bool> val)
1462         {
1463                 return RValue<Bool>(Nucleus::createNot(val.value));
1464         }
1465
1466         RValue<Bool> operator&&(RValue<Bool> lhs, RValue<Bool> rhs)
1467         {
1468                 return RValue<Bool>(Nucleus::createAnd(lhs.value, rhs.value));
1469         }
1470
1471         RValue<Bool> operator||(RValue<Bool> lhs, RValue<Bool> rhs)
1472         {
1473                 return RValue<Bool>(Nucleus::createOr(lhs.value, rhs.value));
1474         }
1475
1476         Type *Bool::getType()
1477         {
1478                 return T(Ice::IceType_i1);
1479         }
1480
1481         Byte::Byte(Argument<Byte> argument)
1482         {
1483                 storeValue(argument.value);
1484         }
1485
1486         Byte::Byte(RValue<Int> cast)
1487         {
1488                 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
1489
1490                 storeValue(integer);
1491         }
1492
1493         Byte::Byte(RValue<UInt> cast)
1494         {
1495                 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
1496
1497                 storeValue(integer);
1498         }
1499
1500         Byte::Byte(RValue<UShort> cast)
1501         {
1502                 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
1503
1504                 storeValue(integer);
1505         }
1506
1507         Byte::Byte(int x)
1508         {
1509                 storeValue(Nucleus::createConstantByte((unsigned char)x));
1510         }
1511
1512         Byte::Byte(unsigned char x)
1513         {
1514                 storeValue(Nucleus::createConstantByte(x));
1515         }
1516
1517         Byte::Byte(RValue<Byte> rhs)
1518         {
1519                 storeValue(rhs.value);
1520         }
1521
1522         Byte::Byte(const Byte &rhs)
1523         {
1524                 Value *value = rhs.loadValue();
1525                 storeValue(value);
1526         }
1527
1528         Byte::Byte(const Reference<Byte> &rhs)
1529         {
1530                 Value *value = rhs.loadValue();
1531                 storeValue(value);
1532         }
1533
1534         RValue<Byte> Byte::operator=(RValue<Byte> rhs)
1535         {
1536                 storeValue(rhs.value);
1537
1538                 return rhs;
1539         }
1540
1541         RValue<Byte> Byte::operator=(const Byte &rhs)
1542         {
1543                 Value *value = rhs.loadValue();
1544                 storeValue(value);
1545
1546                 return RValue<Byte>(value);
1547         }
1548
1549         RValue<Byte> Byte::operator=(const Reference<Byte> &rhs)
1550         {
1551                 Value *value = rhs.loadValue();
1552                 storeValue(value);
1553
1554                 return RValue<Byte>(value);
1555         }
1556
1557         RValue<Byte> operator+(RValue<Byte> lhs, RValue<Byte> rhs)
1558         {
1559                 return RValue<Byte>(Nucleus::createAdd(lhs.value, rhs.value));
1560         }
1561
1562         RValue<Byte> operator-(RValue<Byte> lhs, RValue<Byte> rhs)
1563         {
1564                 return RValue<Byte>(Nucleus::createSub(lhs.value, rhs.value));
1565         }
1566
1567         RValue<Byte> operator*(RValue<Byte> lhs, RValue<Byte> rhs)
1568         {
1569                 return RValue<Byte>(Nucleus::createMul(lhs.value, rhs.value));
1570         }
1571
1572         RValue<Byte> operator/(RValue<Byte> lhs, RValue<Byte> rhs)
1573         {
1574                 return RValue<Byte>(Nucleus::createUDiv(lhs.value, rhs.value));
1575         }
1576
1577         RValue<Byte> operator%(RValue<Byte> lhs, RValue<Byte> rhs)
1578         {
1579                 return RValue<Byte>(Nucleus::createURem(lhs.value, rhs.value));
1580         }
1581
1582         RValue<Byte> operator&(RValue<Byte> lhs, RValue<Byte> rhs)
1583         {
1584                 return RValue<Byte>(Nucleus::createAnd(lhs.value, rhs.value));
1585         }
1586
1587         RValue<Byte> operator|(RValue<Byte> lhs, RValue<Byte> rhs)
1588         {
1589                 return RValue<Byte>(Nucleus::createOr(lhs.value, rhs.value));
1590         }
1591
1592         RValue<Byte> operator^(RValue<Byte> lhs, RValue<Byte> rhs)
1593         {
1594                 return RValue<Byte>(Nucleus::createXor(lhs.value, rhs.value));
1595         }
1596
1597         RValue<Byte> operator<<(RValue<Byte> lhs, RValue<Byte> rhs)
1598         {
1599                 return RValue<Byte>(Nucleus::createShl(lhs.value, rhs.value));
1600         }
1601
1602         RValue<Byte> operator>>(RValue<Byte> lhs, RValue<Byte> rhs)
1603         {
1604                 return RValue<Byte>(Nucleus::createLShr(lhs.value, rhs.value));
1605         }
1606
1607         RValue<Byte> operator+=(Byte &lhs, RValue<Byte> rhs)
1608         {
1609                 return lhs = lhs + rhs;
1610         }
1611
1612         RValue<Byte> operator-=(Byte &lhs, RValue<Byte> rhs)
1613         {
1614                 return lhs = lhs - rhs;
1615         }
1616
1617         RValue<Byte> operator*=(Byte &lhs, RValue<Byte> rhs)
1618         {
1619                 return lhs = lhs * rhs;
1620         }
1621
1622         RValue<Byte> operator/=(Byte &lhs, RValue<Byte> rhs)
1623         {
1624                 return lhs = lhs / rhs;
1625         }
1626
1627         RValue<Byte> operator%=(Byte &lhs, RValue<Byte> rhs)
1628         {
1629                 return lhs = lhs % rhs;
1630         }
1631
1632         RValue<Byte> operator&=(Byte &lhs, RValue<Byte> rhs)
1633         {
1634                 return lhs = lhs & rhs;
1635         }
1636
1637         RValue<Byte> operator|=(Byte &lhs, RValue<Byte> rhs)
1638         {
1639                 return lhs = lhs | rhs;
1640         }
1641
1642         RValue<Byte> operator^=(Byte &lhs, RValue<Byte> rhs)
1643         {
1644                 return lhs = lhs ^ rhs;
1645         }
1646
1647         RValue<Byte> operator<<=(Byte &lhs, RValue<Byte> rhs)
1648         {
1649                 return lhs = lhs << rhs;
1650         }
1651
1652         RValue<Byte> operator>>=(Byte &lhs, RValue<Byte> rhs)
1653         {
1654                 return lhs = lhs >> rhs;
1655         }
1656
1657         RValue<Byte> operator+(RValue<Byte> val)
1658         {
1659                 return val;
1660         }
1661
1662         RValue<Byte> operator-(RValue<Byte> val)
1663         {
1664                 return RValue<Byte>(Nucleus::createNeg(val.value));
1665         }
1666
1667         RValue<Byte> operator~(RValue<Byte> val)
1668         {
1669                 return RValue<Byte>(Nucleus::createNot(val.value));
1670         }
1671
1672         RValue<Byte> operator++(Byte &val, int)   // Post-increment
1673         {
1674                 RValue<Byte> res = val;
1675                 val += Byte(1);
1676                 return res;
1677         }
1678
1679         const Byte &operator++(Byte &val)   // Pre-increment
1680         {
1681                 val += Byte(1);
1682                 return val;
1683         }
1684
1685         RValue<Byte> operator--(Byte &val, int)   // Post-decrement
1686         {
1687                 RValue<Byte> res = val;
1688                 val -= Byte(1);
1689                 return res;
1690         }
1691
1692         const Byte &operator--(Byte &val)   // Pre-decrement
1693         {
1694                 val -= Byte(1);
1695                 return val;
1696         }
1697
1698         RValue<Bool> operator<(RValue<Byte> lhs, RValue<Byte> rhs)
1699         {
1700                 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
1701         }
1702
1703         RValue<Bool> operator<=(RValue<Byte> lhs, RValue<Byte> rhs)
1704         {
1705                 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
1706         }
1707
1708         RValue<Bool> operator>(RValue<Byte> lhs, RValue<Byte> rhs)
1709         {
1710                 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
1711         }
1712
1713         RValue<Bool> operator>=(RValue<Byte> lhs, RValue<Byte> rhs)
1714         {
1715                 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
1716         }
1717
1718         RValue<Bool> operator!=(RValue<Byte> lhs, RValue<Byte> rhs)
1719         {
1720                 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
1721         }
1722
1723         RValue<Bool> operator==(RValue<Byte> lhs, RValue<Byte> rhs)
1724         {
1725                 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
1726         }
1727
1728         Type *Byte::getType()
1729         {
1730                 return T(Ice::IceType_i8);
1731         }
1732
1733         SByte::SByte(Argument<SByte> argument)
1734         {
1735                 storeValue(argument.value);
1736         }
1737
1738         SByte::SByte(RValue<Int> cast)
1739         {
1740                 Value *integer = Nucleus::createTrunc(cast.value, SByte::getType());
1741
1742                 storeValue(integer);
1743         }
1744
1745         SByte::SByte(RValue<Short> cast)
1746         {
1747                 Value *integer = Nucleus::createTrunc(cast.value, SByte::getType());
1748
1749                 storeValue(integer);
1750         }
1751
1752         SByte::SByte(signed char x)
1753         {
1754                 storeValue(Nucleus::createConstantByte(x));
1755         }
1756
1757         SByte::SByte(RValue<SByte> rhs)
1758         {
1759                 storeValue(rhs.value);
1760         }
1761
1762         SByte::SByte(const SByte &rhs)
1763         {
1764                 Value *value = rhs.loadValue();
1765                 storeValue(value);
1766         }
1767
1768         SByte::SByte(const Reference<SByte> &rhs)
1769         {
1770                 Value *value = rhs.loadValue();
1771                 storeValue(value);
1772         }
1773
1774         RValue<SByte> SByte::operator=(RValue<SByte> rhs)
1775         {
1776                 storeValue(rhs.value);
1777
1778                 return rhs;
1779         }
1780
1781         RValue<SByte> SByte::operator=(const SByte &rhs)
1782         {
1783                 Value *value = rhs.loadValue();
1784                 storeValue(value);
1785
1786                 return RValue<SByte>(value);
1787         }
1788
1789         RValue<SByte> SByte::operator=(const Reference<SByte> &rhs)
1790         {
1791                 Value *value = rhs.loadValue();
1792                 storeValue(value);
1793
1794                 return RValue<SByte>(value);
1795         }
1796
1797         RValue<SByte> operator+(RValue<SByte> lhs, RValue<SByte> rhs)
1798         {
1799                 return RValue<SByte>(Nucleus::createAdd(lhs.value, rhs.value));
1800         }
1801
1802         RValue<SByte> operator-(RValue<SByte> lhs, RValue<SByte> rhs)
1803         {
1804                 return RValue<SByte>(Nucleus::createSub(lhs.value, rhs.value));
1805         }
1806
1807         RValue<SByte> operator*(RValue<SByte> lhs, RValue<SByte> rhs)
1808         {
1809                 return RValue<SByte>(Nucleus::createMul(lhs.value, rhs.value));
1810         }
1811
1812         RValue<SByte> operator/(RValue<SByte> lhs, RValue<SByte> rhs)
1813         {
1814                 return RValue<SByte>(Nucleus::createSDiv(lhs.value, rhs.value));
1815         }
1816
1817         RValue<SByte> operator%(RValue<SByte> lhs, RValue<SByte> rhs)
1818         {
1819                 return RValue<SByte>(Nucleus::createSRem(lhs.value, rhs.value));
1820         }
1821
1822         RValue<SByte> operator&(RValue<SByte> lhs, RValue<SByte> rhs)
1823         {
1824                 return RValue<SByte>(Nucleus::createAnd(lhs.value, rhs.value));
1825         }
1826
1827         RValue<SByte> operator|(RValue<SByte> lhs, RValue<SByte> rhs)
1828         {
1829                 return RValue<SByte>(Nucleus::createOr(lhs.value, rhs.value));
1830         }
1831
1832         RValue<SByte> operator^(RValue<SByte> lhs, RValue<SByte> rhs)
1833         {
1834                 return RValue<SByte>(Nucleus::createXor(lhs.value, rhs.value));
1835         }
1836
1837         RValue<SByte> operator<<(RValue<SByte> lhs, RValue<SByte> rhs)
1838         {
1839                 return RValue<SByte>(Nucleus::createShl(lhs.value, rhs.value));
1840         }
1841
1842         RValue<SByte> operator>>(RValue<SByte> lhs, RValue<SByte> rhs)
1843         {
1844                 return RValue<SByte>(Nucleus::createAShr(lhs.value, rhs.value));
1845         }
1846
1847         RValue<SByte> operator+=(SByte &lhs, RValue<SByte> rhs)
1848         {
1849                 return lhs = lhs + rhs;
1850         }
1851
1852         RValue<SByte> operator-=(SByte &lhs, RValue<SByte> rhs)
1853         {
1854                 return lhs = lhs - rhs;
1855         }
1856
1857         RValue<SByte> operator*=(SByte &lhs, RValue<SByte> rhs)
1858         {
1859                 return lhs = lhs * rhs;
1860         }
1861
1862         RValue<SByte> operator/=(SByte &lhs, RValue<SByte> rhs)
1863         {
1864                 return lhs = lhs / rhs;
1865         }
1866
1867         RValue<SByte> operator%=(SByte &lhs, RValue<SByte> rhs)
1868         {
1869                 return lhs = lhs % rhs;
1870         }
1871
1872         RValue<SByte> operator&=(SByte &lhs, RValue<SByte> rhs)
1873         {
1874                 return lhs = lhs & rhs;
1875         }
1876
1877         RValue<SByte> operator|=(SByte &lhs, RValue<SByte> rhs)
1878         {
1879                 return lhs = lhs | rhs;
1880         }
1881
1882         RValue<SByte> operator^=(SByte &lhs, RValue<SByte> rhs)
1883         {
1884                 return lhs = lhs ^ rhs;
1885         }
1886
1887         RValue<SByte> operator<<=(SByte &lhs, RValue<SByte> rhs)
1888         {
1889                 return lhs = lhs << rhs;
1890         }
1891
1892         RValue<SByte> operator>>=(SByte &lhs, RValue<SByte> rhs)
1893         {
1894                 return lhs = lhs >> rhs;
1895         }
1896
1897         RValue<SByte> operator+(RValue<SByte> val)
1898         {
1899                 return val;
1900         }
1901
1902         RValue<SByte> operator-(RValue<SByte> val)
1903         {
1904                 return RValue<SByte>(Nucleus::createNeg(val.value));
1905         }
1906
1907         RValue<SByte> operator~(RValue<SByte> val)
1908         {
1909                 return RValue<SByte>(Nucleus::createNot(val.value));
1910         }
1911
1912         RValue<SByte> operator++(SByte &val, int)   // Post-increment
1913         {
1914                 RValue<SByte> res = val;
1915                 val += SByte(1);
1916                 return res;
1917         }
1918
1919         const SByte &operator++(SByte &val)   // Pre-increment
1920         {
1921                 val += SByte(1);
1922                 return val;
1923         }
1924
1925         RValue<SByte> operator--(SByte &val, int)   // Post-decrement
1926         {
1927                 RValue<SByte> res = val;
1928                 val -= SByte(1);
1929                 return res;
1930         }
1931
1932         const SByte &operator--(SByte &val)   // Pre-decrement
1933         {
1934                 val -= SByte(1);
1935                 return val;
1936         }
1937
1938         RValue<Bool> operator<(RValue<SByte> lhs, RValue<SByte> rhs)
1939         {
1940                 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
1941         }
1942
1943         RValue<Bool> operator<=(RValue<SByte> lhs, RValue<SByte> rhs)
1944         {
1945                 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
1946         }
1947
1948         RValue<Bool> operator>(RValue<SByte> lhs, RValue<SByte> rhs)
1949         {
1950                 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
1951         }
1952
1953         RValue<Bool> operator>=(RValue<SByte> lhs, RValue<SByte> rhs)
1954         {
1955                 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
1956         }
1957
1958         RValue<Bool> operator!=(RValue<SByte> lhs, RValue<SByte> rhs)
1959         {
1960                 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
1961         }
1962
1963         RValue<Bool> operator==(RValue<SByte> lhs, RValue<SByte> rhs)
1964         {
1965                 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
1966         }
1967
1968         Type *SByte::getType()
1969         {
1970                 return T(Ice::IceType_i8);
1971         }
1972
1973         Short::Short(Argument<Short> argument)
1974         {
1975                 storeValue(argument.value);
1976         }
1977
1978         Short::Short(RValue<Int> cast)
1979         {
1980                 Value *integer = Nucleus::createTrunc(cast.value, Short::getType());
1981
1982                 storeValue(integer);
1983         }
1984
1985         Short::Short(short x)
1986         {
1987                 storeValue(Nucleus::createConstantShort(x));
1988         }
1989
1990         Short::Short(RValue<Short> rhs)
1991         {
1992                 storeValue(rhs.value);
1993         }
1994
1995         Short::Short(const Short &rhs)
1996         {
1997                 Value *value = rhs.loadValue();
1998                 storeValue(value);
1999         }
2000
2001         Short::Short(const Reference<Short> &rhs)
2002         {
2003                 Value *value = rhs.loadValue();
2004                 storeValue(value);
2005         }
2006
2007         RValue<Short> Short::operator=(RValue<Short> rhs)
2008         {
2009                 storeValue(rhs.value);
2010
2011                 return rhs;
2012         }
2013
2014         RValue<Short> Short::operator=(const Short &rhs)
2015         {
2016                 Value *value = rhs.loadValue();
2017                 storeValue(value);
2018
2019                 return RValue<Short>(value);
2020         }
2021
2022         RValue<Short> Short::operator=(const Reference<Short> &rhs)
2023         {
2024                 Value *value = rhs.loadValue();
2025                 storeValue(value);
2026
2027                 return RValue<Short>(value);
2028         }
2029
2030         RValue<Short> operator+(RValue<Short> lhs, RValue<Short> rhs)
2031         {
2032                 return RValue<Short>(Nucleus::createAdd(lhs.value, rhs.value));
2033         }
2034
2035         RValue<Short> operator-(RValue<Short> lhs, RValue<Short> rhs)
2036         {
2037                 return RValue<Short>(Nucleus::createSub(lhs.value, rhs.value));
2038         }
2039
2040         RValue<Short> operator*(RValue<Short> lhs, RValue<Short> rhs)
2041         {
2042                 return RValue<Short>(Nucleus::createMul(lhs.value, rhs.value));
2043         }
2044
2045         RValue<Short> operator/(RValue<Short> lhs, RValue<Short> rhs)
2046         {
2047                 return RValue<Short>(Nucleus::createSDiv(lhs.value, rhs.value));
2048         }
2049
2050         RValue<Short> operator%(RValue<Short> lhs, RValue<Short> rhs)
2051         {
2052                 return RValue<Short>(Nucleus::createSRem(lhs.value, rhs.value));
2053         }
2054
2055         RValue<Short> operator&(RValue<Short> lhs, RValue<Short> rhs)
2056         {
2057                 return RValue<Short>(Nucleus::createAnd(lhs.value, rhs.value));
2058         }
2059
2060         RValue<Short> operator|(RValue<Short> lhs, RValue<Short> rhs)
2061         {
2062                 return RValue<Short>(Nucleus::createOr(lhs.value, rhs.value));
2063         }
2064
2065         RValue<Short> operator^(RValue<Short> lhs, RValue<Short> rhs)
2066         {
2067                 return RValue<Short>(Nucleus::createXor(lhs.value, rhs.value));
2068         }
2069
2070         RValue<Short> operator<<(RValue<Short> lhs, RValue<Short> rhs)
2071         {
2072                 return RValue<Short>(Nucleus::createShl(lhs.value, rhs.value));
2073         }
2074
2075         RValue<Short> operator>>(RValue<Short> lhs, RValue<Short> rhs)
2076         {
2077                 return RValue<Short>(Nucleus::createAShr(lhs.value, rhs.value));
2078         }
2079
2080         RValue<Short> operator+=(Short &lhs, RValue<Short> rhs)
2081         {
2082                 return lhs = lhs + rhs;
2083         }
2084
2085         RValue<Short> operator-=(Short &lhs, RValue<Short> rhs)
2086         {
2087                 return lhs = lhs - rhs;
2088         }
2089
2090         RValue<Short> operator*=(Short &lhs, RValue<Short> rhs)
2091         {
2092                 return lhs = lhs * rhs;
2093         }
2094
2095         RValue<Short> operator/=(Short &lhs, RValue<Short> rhs)
2096         {
2097                 return lhs = lhs / rhs;
2098         }
2099
2100         RValue<Short> operator%=(Short &lhs, RValue<Short> rhs)
2101         {
2102                 return lhs = lhs % rhs;
2103         }
2104
2105         RValue<Short> operator&=(Short &lhs, RValue<Short> rhs)
2106         {
2107                 return lhs = lhs & rhs;
2108         }
2109
2110         RValue<Short> operator|=(Short &lhs, RValue<Short> rhs)
2111         {
2112                 return lhs = lhs | rhs;
2113         }
2114
2115         RValue<Short> operator^=(Short &lhs, RValue<Short> rhs)
2116         {
2117                 return lhs = lhs ^ rhs;
2118         }
2119
2120         RValue<Short> operator<<=(Short &lhs, RValue<Short> rhs)
2121         {
2122                 return lhs = lhs << rhs;
2123         }
2124
2125         RValue<Short> operator>>=(Short &lhs, RValue<Short> rhs)
2126         {
2127                 return lhs = lhs >> rhs;
2128         }
2129
2130         RValue<Short> operator+(RValue<Short> val)
2131         {
2132                 return val;
2133         }
2134
2135         RValue<Short> operator-(RValue<Short> val)
2136         {
2137                 return RValue<Short>(Nucleus::createNeg(val.value));
2138         }
2139
2140         RValue<Short> operator~(RValue<Short> val)
2141         {
2142                 return RValue<Short>(Nucleus::createNot(val.value));
2143         }
2144
2145         RValue<Short> operator++(Short &val, int)   // Post-increment
2146         {
2147                 RValue<Short> res = val;
2148                 val += Short(1);
2149                 return res;
2150         }
2151
2152         const Short &operator++(Short &val)   // Pre-increment
2153         {
2154                 val += Short(1);
2155                 return val;
2156         }
2157
2158         RValue<Short> operator--(Short &val, int)   // Post-decrement
2159         {
2160                 RValue<Short> res = val;
2161                 val -= Short(1);
2162                 return res;
2163         }
2164
2165         const Short &operator--(Short &val)   // Pre-decrement
2166         {
2167                 val -= Short(1);
2168                 return val;
2169         }
2170
2171         RValue<Bool> operator<(RValue<Short> lhs, RValue<Short> rhs)
2172         {
2173                 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
2174         }
2175
2176         RValue<Bool> operator<=(RValue<Short> lhs, RValue<Short> rhs)
2177         {
2178                 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
2179         }
2180
2181         RValue<Bool> operator>(RValue<Short> lhs, RValue<Short> rhs)
2182         {
2183                 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
2184         }
2185
2186         RValue<Bool> operator>=(RValue<Short> lhs, RValue<Short> rhs)
2187         {
2188                 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
2189         }
2190
2191         RValue<Bool> operator!=(RValue<Short> lhs, RValue<Short> rhs)
2192         {
2193                 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
2194         }
2195
2196         RValue<Bool> operator==(RValue<Short> lhs, RValue<Short> rhs)
2197         {
2198                 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
2199         }
2200
2201         Type *Short::getType()
2202         {
2203                 return T(Ice::IceType_i16);
2204         }
2205
2206         UShort::UShort(Argument<UShort> argument)
2207         {
2208                 storeValue(argument.value);
2209         }
2210
2211         UShort::UShort(RValue<UInt> cast)
2212         {
2213                 Value *integer = Nucleus::createTrunc(cast.value, UShort::getType());
2214
2215                 storeValue(integer);
2216         }
2217
2218         UShort::UShort(RValue<Int> cast)
2219         {
2220                 Value *integer = Nucleus::createTrunc(cast.value, UShort::getType());
2221
2222                 storeValue(integer);
2223         }
2224
2225         UShort::UShort(unsigned short x)
2226         {
2227                 storeValue(Nucleus::createConstantShort(x));
2228         }
2229
2230         UShort::UShort(RValue<UShort> rhs)
2231         {
2232                 storeValue(rhs.value);
2233         }
2234
2235         UShort::UShort(const UShort &rhs)
2236         {
2237                 Value *value = rhs.loadValue();
2238                 storeValue(value);
2239         }
2240
2241         UShort::UShort(const Reference<UShort> &rhs)
2242         {
2243                 Value *value = rhs.loadValue();
2244                 storeValue(value);
2245         }
2246
2247         RValue<UShort> UShort::operator=(RValue<UShort> rhs)
2248         {
2249                 storeValue(rhs.value);
2250
2251                 return rhs;
2252         }
2253
2254         RValue<UShort> UShort::operator=(const UShort &rhs)
2255         {
2256                 Value *value = rhs.loadValue();
2257                 storeValue(value);
2258
2259                 return RValue<UShort>(value);
2260         }
2261
2262         RValue<UShort> UShort::operator=(const Reference<UShort> &rhs)
2263         {
2264                 Value *value = rhs.loadValue();
2265                 storeValue(value);
2266
2267                 return RValue<UShort>(value);
2268         }
2269
2270         RValue<UShort> operator+(RValue<UShort> lhs, RValue<UShort> rhs)
2271         {
2272                 return RValue<UShort>(Nucleus::createAdd(lhs.value, rhs.value));
2273         }
2274
2275         RValue<UShort> operator-(RValue<UShort> lhs, RValue<UShort> rhs)
2276         {
2277                 return RValue<UShort>(Nucleus::createSub(lhs.value, rhs.value));
2278         }
2279
2280         RValue<UShort> operator*(RValue<UShort> lhs, RValue<UShort> rhs)
2281         {
2282                 return RValue<UShort>(Nucleus::createMul(lhs.value, rhs.value));
2283         }
2284
2285         RValue<UShort> operator/(RValue<UShort> lhs, RValue<UShort> rhs)
2286         {
2287                 return RValue<UShort>(Nucleus::createUDiv(lhs.value, rhs.value));
2288         }
2289
2290         RValue<UShort> operator%(RValue<UShort> lhs, RValue<UShort> rhs)
2291         {
2292                 return RValue<UShort>(Nucleus::createURem(lhs.value, rhs.value));
2293         }
2294
2295         RValue<UShort> operator&(RValue<UShort> lhs, RValue<UShort> rhs)
2296         {
2297                 return RValue<UShort>(Nucleus::createAnd(lhs.value, rhs.value));
2298         }
2299
2300         RValue<UShort> operator|(RValue<UShort> lhs, RValue<UShort> rhs)
2301         {
2302                 return RValue<UShort>(Nucleus::createOr(lhs.value, rhs.value));
2303         }
2304
2305         RValue<UShort> operator^(RValue<UShort> lhs, RValue<UShort> rhs)
2306         {
2307                 return RValue<UShort>(Nucleus::createXor(lhs.value, rhs.value));
2308         }
2309
2310         RValue<UShort> operator<<(RValue<UShort> lhs, RValue<UShort> rhs)
2311         {
2312                 return RValue<UShort>(Nucleus::createShl(lhs.value, rhs.value));
2313         }
2314
2315         RValue<UShort> operator>>(RValue<UShort> lhs, RValue<UShort> rhs)
2316         {
2317                 return RValue<UShort>(Nucleus::createLShr(lhs.value, rhs.value));
2318         }
2319
2320         RValue<UShort> operator+=(UShort &lhs, RValue<UShort> rhs)
2321         {
2322                 return lhs = lhs + rhs;
2323         }
2324
2325         RValue<UShort> operator-=(UShort &lhs, RValue<UShort> rhs)
2326         {
2327                 return lhs = lhs - rhs;
2328         }
2329
2330         RValue<UShort> operator*=(UShort &lhs, RValue<UShort> rhs)
2331         {
2332                 return lhs = lhs * rhs;
2333         }
2334
2335         RValue<UShort> operator/=(UShort &lhs, RValue<UShort> rhs)
2336         {
2337                 return lhs = lhs / rhs;
2338         }
2339
2340         RValue<UShort> operator%=(UShort &lhs, RValue<UShort> rhs)
2341         {
2342                 return lhs = lhs % rhs;
2343         }
2344
2345         RValue<UShort> operator&=(UShort &lhs, RValue<UShort> rhs)
2346         {
2347                 return lhs = lhs & rhs;
2348         }
2349
2350         RValue<UShort> operator|=(UShort &lhs, RValue<UShort> rhs)
2351         {
2352                 return lhs = lhs | rhs;
2353         }
2354
2355         RValue<UShort> operator^=(UShort &lhs, RValue<UShort> rhs)
2356         {
2357                 return lhs = lhs ^ rhs;
2358         }
2359
2360         RValue<UShort> operator<<=(UShort &lhs, RValue<UShort> rhs)
2361         {
2362                 return lhs = lhs << rhs;
2363         }
2364
2365         RValue<UShort> operator>>=(UShort &lhs, RValue<UShort> rhs)
2366         {
2367                 return lhs = lhs >> rhs;
2368         }
2369
2370         RValue<UShort> operator+(RValue<UShort> val)
2371         {
2372                 return val;
2373         }
2374
2375         RValue<UShort> operator-(RValue<UShort> val)
2376         {
2377                 return RValue<UShort>(Nucleus::createNeg(val.value));
2378         }
2379
2380         RValue<UShort> operator~(RValue<UShort> val)
2381         {
2382                 return RValue<UShort>(Nucleus::createNot(val.value));
2383         }
2384
2385         RValue<UShort> operator++(UShort &val, int)   // Post-increment
2386         {
2387                 RValue<UShort> res = val;
2388                 val += UShort(1);
2389                 return res;
2390         }
2391
2392         const UShort &operator++(UShort &val)   // Pre-increment
2393         {
2394                 val += UShort(1);
2395                 return val;
2396         }
2397
2398         RValue<UShort> operator--(UShort &val, int)   // Post-decrement
2399         {
2400                 RValue<UShort> res = val;
2401                 val -= UShort(1);
2402                 return res;
2403         }
2404
2405         const UShort &operator--(UShort &val)   // Pre-decrement
2406         {
2407                 val -= UShort(1);
2408                 return val;
2409         }
2410
2411         RValue<Bool> operator<(RValue<UShort> lhs, RValue<UShort> rhs)
2412         {
2413                 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
2414         }
2415
2416         RValue<Bool> operator<=(RValue<UShort> lhs, RValue<UShort> rhs)
2417         {
2418                 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
2419         }
2420
2421         RValue<Bool> operator>(RValue<UShort> lhs, RValue<UShort> rhs)
2422         {
2423                 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
2424         }
2425
2426         RValue<Bool> operator>=(RValue<UShort> lhs, RValue<UShort> rhs)
2427         {
2428                 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
2429         }
2430
2431         RValue<Bool> operator!=(RValue<UShort> lhs, RValue<UShort> rhs)
2432         {
2433                 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
2434         }
2435
2436         RValue<Bool> operator==(RValue<UShort> lhs, RValue<UShort> rhs)
2437         {
2438                 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
2439         }
2440
2441         Type *UShort::getType()
2442         {
2443                 return T(Ice::IceType_i16);
2444         }
2445
2446         Byte4::Byte4(RValue<Byte8> cast)
2447         {
2448                 storeValue(Nucleus::createBitCast(cast.value, getType()));
2449         }
2450
2451         Byte4::Byte4(const Reference<Byte4> &rhs)
2452         {
2453                 Value *value = rhs.loadValue();
2454                 storeValue(value);
2455         }
2456
2457         Type *Byte4::getType()
2458         {
2459                 return T(Type_v4i8);
2460         }
2461
2462         Type *SByte4::getType()
2463         {
2464                 return T(Type_v4i8);
2465         }
2466
2467         Byte8::Byte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7)
2468         {
2469                 int64_t constantVector[8] = {x0, x1, x2, x3, x4, x5, x6, x7};
2470                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
2471         }
2472
2473         Byte8::Byte8(RValue<Byte8> rhs)
2474         {
2475                 storeValue(rhs.value);
2476         }
2477
2478         Byte8::Byte8(const Byte8 &rhs)
2479         {
2480                 Value *value = rhs.loadValue();
2481                 storeValue(value);
2482         }
2483
2484         Byte8::Byte8(const Reference<Byte8> &rhs)
2485         {
2486                 Value *value = rhs.loadValue();
2487                 storeValue(value);
2488         }
2489
2490         RValue<Byte8> Byte8::operator=(RValue<Byte8> rhs)
2491         {
2492                 storeValue(rhs.value);
2493
2494                 return rhs;
2495         }
2496
2497         RValue<Byte8> Byte8::operator=(const Byte8 &rhs)
2498         {
2499                 Value *value = rhs.loadValue();
2500                 storeValue(value);
2501
2502                 return RValue<Byte8>(value);
2503         }
2504
2505         RValue<Byte8> Byte8::operator=(const Reference<Byte8> &rhs)
2506         {
2507                 Value *value = rhs.loadValue();
2508                 storeValue(value);
2509
2510                 return RValue<Byte8>(value);
2511         }
2512
2513         RValue<Byte8> operator+(RValue<Byte8> lhs, RValue<Byte8> rhs)
2514         {
2515                 return RValue<Byte8>(Nucleus::createAdd(lhs.value, rhs.value));
2516         }
2517
2518         RValue<Byte8> operator-(RValue<Byte8> lhs, RValue<Byte8> rhs)
2519         {
2520                 return RValue<Byte8>(Nucleus::createSub(lhs.value, rhs.value));
2521         }
2522
2523 //      RValue<Byte8> operator*(RValue<Byte8> lhs, RValue<Byte8> rhs)
2524 //      {
2525 //              return RValue<Byte8>(Nucleus::createMul(lhs.value, rhs.value));
2526 //      }
2527
2528 //      RValue<Byte8> operator/(RValue<Byte8> lhs, RValue<Byte8> rhs)
2529 //      {
2530 //              return RValue<Byte8>(Nucleus::createUDiv(lhs.value, rhs.value));
2531 //      }
2532
2533 //      RValue<Byte8> operator%(RValue<Byte8> lhs, RValue<Byte8> rhs)
2534 //      {
2535 //              return RValue<Byte8>(Nucleus::createURem(lhs.value, rhs.value));
2536 //      }
2537
2538         RValue<Byte8> operator&(RValue<Byte8> lhs, RValue<Byte8> rhs)
2539         {
2540                 return RValue<Byte8>(Nucleus::createAnd(lhs.value, rhs.value));
2541         }
2542
2543         RValue<Byte8> operator|(RValue<Byte8> lhs, RValue<Byte8> rhs)
2544         {
2545                 return RValue<Byte8>(Nucleus::createOr(lhs.value, rhs.value));
2546         }
2547
2548         RValue<Byte8> operator^(RValue<Byte8> lhs, RValue<Byte8> rhs)
2549         {
2550                 return RValue<Byte8>(Nucleus::createXor(lhs.value, rhs.value));
2551         }
2552
2553 //      RValue<Byte8> operator<<(RValue<Byte8> lhs, unsigned char rhs)
2554 //      {
2555 //              return RValue<Byte8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2556 //      }
2557
2558 //      RValue<Byte8> operator>>(RValue<Byte8> lhs, unsigned char rhs)
2559 //      {
2560 //              return RValue<Byte8>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
2561 //      }
2562
2563         RValue<Byte8> operator+=(Byte8 &lhs, RValue<Byte8> rhs)
2564         {
2565                 return lhs = lhs + rhs;
2566         }
2567
2568         RValue<Byte8> operator-=(Byte8 &lhs, RValue<Byte8> rhs)
2569         {
2570                 return lhs = lhs - rhs;
2571         }
2572
2573 //      RValue<Byte8> operator*=(Byte8 &lhs, RValue<Byte8> rhs)
2574 //      {
2575 //              return lhs = lhs * rhs;
2576 //      }
2577
2578 //      RValue<Byte8> operator/=(Byte8 &lhs, RValue<Byte8> rhs)
2579 //      {
2580 //              return lhs = lhs / rhs;
2581 //      }
2582
2583 //      RValue<Byte8> operator%=(Byte8 &lhs, RValue<Byte8> rhs)
2584 //      {
2585 //              return lhs = lhs % rhs;
2586 //      }
2587
2588         RValue<Byte8> operator&=(Byte8 &lhs, RValue<Byte8> rhs)
2589         {
2590                 return lhs = lhs & rhs;
2591         }
2592
2593         RValue<Byte8> operator|=(Byte8 &lhs, RValue<Byte8> rhs)
2594         {
2595                 return lhs = lhs | rhs;
2596         }
2597
2598         RValue<Byte8> operator^=(Byte8 &lhs, RValue<Byte8> rhs)
2599         {
2600                 return lhs = lhs ^ rhs;
2601         }
2602
2603 //      RValue<Byte8> operator<<=(Byte8 &lhs, RValue<Byte8> rhs)
2604 //      {
2605 //              return lhs = lhs << rhs;
2606 //      }
2607
2608 //      RValue<Byte8> operator>>=(Byte8 &lhs, RValue<Byte8> rhs)
2609 //      {
2610 //              return lhs = lhs >> rhs;
2611 //      }
2612
2613 //      RValue<Byte8> operator+(RValue<Byte8> val)
2614 //      {
2615 //              return val;
2616 //      }
2617
2618 //      RValue<Byte8> operator-(RValue<Byte8> val)
2619 //      {
2620 //              return RValue<Byte8>(Nucleus::createNeg(val.value));
2621 //      }
2622
2623         RValue<Byte8> operator~(RValue<Byte8> val)
2624         {
2625                 return RValue<Byte8>(Nucleus::createNot(val.value));
2626         }
2627
2628         RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
2629         {
2630                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
2631                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2632                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2633                 auto paddusb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2634                 paddusb->addArg(x.value);
2635                 paddusb->addArg(y.value);
2636                 ::basicBlock->appendInst(paddusb);
2637
2638                 return RValue<Byte8>(V(result));
2639         }
2640
2641         RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
2642         {
2643                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
2644                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2645                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2646                 auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2647                 psubusw->addArg(x.value);
2648                 psubusw->addArg(y.value);
2649                 ::basicBlock->appendInst(psubusw);
2650
2651                 return RValue<Byte8>(V(result));
2652         }
2653
2654         RValue<Short4> Unpack(RValue<Byte4> x)
2655         {
2656                 int shuffle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7};   // Real type is v16i8
2657                 return RValue<Short4>(Nucleus::createShuffleVector(x.value, x.value, shuffle));
2658         }
2659
2660         RValue<Short4> Unpack(RValue<Byte4> x, RValue<Byte4> y)
2661         {
2662                 return UnpackLow(As<Byte8>(x), As<Byte8>(y));
2663         }
2664
2665         RValue<Short4> UnpackLow(RValue<Byte8> x, RValue<Byte8> y)
2666         {
2667                 int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};   // Real type is v16i8
2668                 return RValue<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
2669         }
2670
2671         RValue<Short4> UnpackHigh(RValue<Byte8> x, RValue<Byte8> y)
2672         {
2673                 int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};   // Real type is v16i8
2674                 auto lowHigh = RValue<Byte16>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
2675                 return As<Short4>(Swizzle(As<Int4>(lowHigh), 0xEE));
2676         }
2677
2678         RValue<Int> SignMask(RValue<Byte8> x)
2679         {
2680                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
2681                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2682                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2683                 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
2684                 movmsk->addArg(x.value);
2685                 ::basicBlock->appendInst(movmsk);
2686
2687                 return RValue<Int>(V(result));
2688         }
2689
2690 //      RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
2691 //      {
2692 //              return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Ugt, x.value, y.value));
2693 //      }
2694
2695         RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
2696         {
2697                 return RValue<Byte8>(Nucleus::createICmpEQ(x.value, y.value));
2698         }
2699
2700         Type *Byte8::getType()
2701         {
2702                 return T(Type_v8i8);
2703         }
2704
2705         SByte8::SByte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7)
2706         {
2707                 int64_t constantVector[8] = { x0, x1, x2, x3, x4, x5, x6, x7 };
2708                 Value *vector = V(Nucleus::createConstantVector(constantVector, getType()));
2709
2710                 storeValue(Nucleus::createBitCast(vector, getType()));
2711         }
2712
2713         SByte8::SByte8(RValue<SByte8> rhs)
2714         {
2715                 storeValue(rhs.value);
2716         }
2717
2718         SByte8::SByte8(const SByte8 &rhs)
2719         {
2720                 Value *value = rhs.loadValue();
2721                 storeValue(value);
2722         }
2723
2724         SByte8::SByte8(const Reference<SByte8> &rhs)
2725         {
2726                 Value *value = rhs.loadValue();
2727                 storeValue(value);
2728         }
2729
2730         RValue<SByte8> SByte8::operator=(RValue<SByte8> rhs)
2731         {
2732                 storeValue(rhs.value);
2733
2734                 return rhs;
2735         }
2736
2737         RValue<SByte8> SByte8::operator=(const SByte8 &rhs)
2738         {
2739                 Value *value = rhs.loadValue();
2740                 storeValue(value);
2741
2742                 return RValue<SByte8>(value);
2743         }
2744
2745         RValue<SByte8> SByte8::operator=(const Reference<SByte8> &rhs)
2746         {
2747                 Value *value = rhs.loadValue();
2748                 storeValue(value);
2749
2750                 return RValue<SByte8>(value);
2751         }
2752
2753         RValue<SByte8> operator+(RValue<SByte8> lhs, RValue<SByte8> rhs)
2754         {
2755                 return RValue<SByte8>(Nucleus::createAdd(lhs.value, rhs.value));
2756         }
2757
2758         RValue<SByte8> operator-(RValue<SByte8> lhs, RValue<SByte8> rhs)
2759         {
2760                 return RValue<SByte8>(Nucleus::createSub(lhs.value, rhs.value));
2761         }
2762
2763 //      RValue<SByte8> operator*(RValue<SByte8> lhs, RValue<SByte8> rhs)
2764 //      {
2765 //              return RValue<SByte8>(Nucleus::createMul(lhs.value, rhs.value));
2766 //      }
2767
2768 //      RValue<SByte8> operator/(RValue<SByte8> lhs, RValue<SByte8> rhs)
2769 //      {
2770 //              return RValue<SByte8>(Nucleus::createSDiv(lhs.value, rhs.value));
2771 //      }
2772
2773 //      RValue<SByte8> operator%(RValue<SByte8> lhs, RValue<SByte8> rhs)
2774 //      {
2775 //              return RValue<SByte8>(Nucleus::createSRem(lhs.value, rhs.value));
2776 //      }
2777
2778         RValue<SByte8> operator&(RValue<SByte8> lhs, RValue<SByte8> rhs)
2779         {
2780                 return RValue<SByte8>(Nucleus::createAnd(lhs.value, rhs.value));
2781         }
2782
2783         RValue<SByte8> operator|(RValue<SByte8> lhs, RValue<SByte8> rhs)
2784         {
2785                 return RValue<SByte8>(Nucleus::createOr(lhs.value, rhs.value));
2786         }
2787
2788         RValue<SByte8> operator^(RValue<SByte8> lhs, RValue<SByte8> rhs)
2789         {
2790                 return RValue<SByte8>(Nucleus::createXor(lhs.value, rhs.value));
2791         }
2792
2793 //      RValue<SByte8> operator<<(RValue<SByte8> lhs, unsigned char rhs)
2794 //      {
2795 //              return RValue<SByte8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2796 //      }
2797
2798 //      RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2799 //      {
2800 //              return RValue<SByte8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
2801 //      }
2802
2803         RValue<SByte8> operator+=(SByte8 &lhs, RValue<SByte8> rhs)
2804         {
2805                 return lhs = lhs + rhs;
2806         }
2807
2808         RValue<SByte8> operator-=(SByte8 &lhs, RValue<SByte8> rhs)
2809         {
2810                 return lhs = lhs - rhs;
2811         }
2812
2813 //      RValue<SByte8> operator*=(SByte8 &lhs, RValue<SByte8> rhs)
2814 //      {
2815 //              return lhs = lhs * rhs;
2816 //      }
2817
2818 //      RValue<SByte8> operator/=(SByte8 &lhs, RValue<SByte8> rhs)
2819 //      {
2820 //              return lhs = lhs / rhs;
2821 //      }
2822
2823 //      RValue<SByte8> operator%=(SByte8 &lhs, RValue<SByte8> rhs)
2824 //      {
2825 //              return lhs = lhs % rhs;
2826 //      }
2827
2828         RValue<SByte8> operator&=(SByte8 &lhs, RValue<SByte8> rhs)
2829         {
2830                 return lhs = lhs & rhs;
2831         }
2832
2833         RValue<SByte8> operator|=(SByte8 &lhs, RValue<SByte8> rhs)
2834         {
2835                 return lhs = lhs | rhs;
2836         }
2837
2838         RValue<SByte8> operator^=(SByte8 &lhs, RValue<SByte8> rhs)
2839         {
2840                 return lhs = lhs ^ rhs;
2841         }
2842
2843 //      RValue<SByte8> operator<<=(SByte8 &lhs, RValue<SByte8> rhs)
2844 //      {
2845 //              return lhs = lhs << rhs;
2846 //      }
2847
2848 //      RValue<SByte8> operator>>=(SByte8 &lhs, RValue<SByte8> rhs)
2849 //      {
2850 //              return lhs = lhs >> rhs;
2851 //      }
2852
2853 //      RValue<SByte8> operator+(RValue<SByte8> val)
2854 //      {
2855 //              return val;
2856 //      }
2857
2858 //      RValue<SByte8> operator-(RValue<SByte8> val)
2859 //      {
2860 //              return RValue<SByte8>(Nucleus::createNeg(val.value));
2861 //      }
2862
2863         RValue<SByte8> operator~(RValue<SByte8> val)
2864         {
2865                 return RValue<SByte8>(Nucleus::createNot(val.value));
2866         }
2867
2868         RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
2869         {
2870                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
2871                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2872                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2873                 auto paddsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2874                 paddsb->addArg(x.value);
2875                 paddsb->addArg(y.value);
2876                 ::basicBlock->appendInst(paddsb);
2877
2878                 return RValue<SByte8>(V(result));
2879         }
2880
2881         RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
2882         {
2883                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
2884                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2885                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2886                 auto psubsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2887                 psubsb->addArg(x.value);
2888                 psubsb->addArg(y.value);
2889                 ::basicBlock->appendInst(psubsb);
2890
2891                 return RValue<SByte8>(V(result));
2892         }
2893
2894         RValue<Short4> UnpackLow(RValue<SByte8> x, RValue<SByte8> y)
2895         {
2896                 int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};   // Real type is v16i8
2897                 return RValue<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
2898         }
2899
2900         RValue<Short4> UnpackHigh(RValue<SByte8> x, RValue<SByte8> y)
2901         {
2902                 int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};   // Real type is v16i8
2903                 auto lowHigh = RValue<Byte16>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
2904                 return As<Short4>(Swizzle(As<Int4>(lowHigh), 0xEE));
2905         }
2906
2907         RValue<Int> SignMask(RValue<SByte8> x)
2908         {
2909                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
2910                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2911                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2912                 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
2913                 movmsk->addArg(x.value);
2914                 ::basicBlock->appendInst(movmsk);
2915
2916                 return RValue<Int>(V(result));
2917         }
2918
2919         RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
2920         {
2921                 return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Sgt, x.value, y.value));
2922         }
2923
2924         RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
2925         {
2926                 return RValue<Byte8>(Nucleus::createICmpEQ(x.value, y.value));
2927         }
2928
2929         Type *SByte8::getType()
2930         {
2931                 return T(Type_v8i8);
2932         }
2933
2934         Byte16::Byte16(RValue<Byte16> rhs)
2935         {
2936                 storeValue(rhs.value);
2937         }
2938
2939         Byte16::Byte16(const Byte16 &rhs)
2940         {
2941                 Value *value = rhs.loadValue();
2942                 storeValue(value);
2943         }
2944
2945         Byte16::Byte16(const Reference<Byte16> &rhs)
2946         {
2947                 Value *value = rhs.loadValue();
2948                 storeValue(value);
2949         }
2950
2951         RValue<Byte16> Byte16::operator=(RValue<Byte16> rhs)
2952         {
2953                 storeValue(rhs.value);
2954
2955                 return rhs;
2956         }
2957
2958         RValue<Byte16> Byte16::operator=(const Byte16 &rhs)
2959         {
2960                 Value *value = rhs.loadValue();
2961                 storeValue(value);
2962
2963                 return RValue<Byte16>(value);
2964         }
2965
2966         RValue<Byte16> Byte16::operator=(const Reference<Byte16> &rhs)
2967         {
2968                 Value *value = rhs.loadValue();
2969                 storeValue(value);
2970
2971                 return RValue<Byte16>(value);
2972         }
2973
2974         Type *Byte16::getType()
2975         {
2976                 return T(Ice::IceType_v16i8);
2977         }
2978
2979         Type *SByte16::getType()
2980         {
2981                 return T(Ice::IceType_v16i8);
2982         }
2983
2984         Short2::Short2(RValue<Short4> cast)
2985         {
2986                 storeValue(Nucleus::createBitCast(cast.value, getType()));
2987         }
2988
2989         Type *Short2::getType()
2990         {
2991                 return T(Type_v2i16);
2992         }
2993
2994         UShort2::UShort2(RValue<UShort4> cast)
2995         {
2996                 storeValue(Nucleus::createBitCast(cast.value, getType()));
2997         }
2998
2999         Type *UShort2::getType()
3000         {
3001                 return T(Type_v2i16);
3002         }
3003
3004         Short4::Short4(RValue<Int> cast)
3005         {
3006                 Value *vector = loadValue();
3007                 Value *element = Nucleus::createTrunc(cast.value, Short::getType());
3008                 Value *insert = Nucleus::createInsertElement(vector, element, 0);
3009                 Value *swizzle = Swizzle(RValue<Short4>(insert), 0x00).value;
3010
3011                 storeValue(swizzle);
3012         }
3013
3014         Short4::Short4(RValue<Int4> cast)
3015         {
3016                 int select[8] = {0, 2, 4, 6, 0, 2, 4, 6};
3017                 Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType());
3018                 Value *packed = Nucleus::createShuffleVector(short8, short8, select);
3019
3020                 Value *int2 = RValue<Int2>(Int2(RValue<Int4>(packed))).value;
3021                 Value *short4 = Nucleus::createBitCast(int2, Short4::getType());
3022
3023                 storeValue(short4);
3024         }
3025
3026 //      Short4::Short4(RValue<Float> cast)
3027 //      {
3028 //      }
3029
3030         Short4::Short4(RValue<Float4> cast)
3031         {
3032                 assert(false && "UNIMPLEMENTED");
3033         }
3034
3035         Short4::Short4(short xyzw)
3036         {
3037                 int64_t constantVector[4] = {xyzw, xyzw, xyzw, xyzw};
3038                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3039         }
3040
3041         Short4::Short4(short x, short y, short z, short w)
3042         {
3043                 int64_t constantVector[4] = {x, y, z, w};
3044                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3045         }
3046
3047         Short4::Short4(RValue<Short4> rhs)
3048         {
3049                 storeValue(rhs.value);
3050         }
3051
3052         Short4::Short4(const Short4 &rhs)
3053         {
3054                 Value *value = rhs.loadValue();
3055                 storeValue(value);
3056         }
3057
3058         Short4::Short4(const Reference<Short4> &rhs)
3059         {
3060                 Value *value = rhs.loadValue();
3061                 storeValue(value);
3062         }
3063
3064         Short4::Short4(RValue<UShort4> rhs)
3065         {
3066                 storeValue(rhs.value);
3067         }
3068
3069         Short4::Short4(const UShort4 &rhs)
3070         {
3071                 storeValue(rhs.loadValue());
3072         }
3073
3074         Short4::Short4(const Reference<UShort4> &rhs)
3075         {
3076                 storeValue(rhs.loadValue());
3077         }
3078
3079         RValue<Short4> Short4::operator=(RValue<Short4> rhs)
3080         {
3081                 storeValue(rhs.value);
3082
3083                 return rhs;
3084         }
3085
3086         RValue<Short4> Short4::operator=(const Short4 &rhs)
3087         {
3088                 Value *value = rhs.loadValue();
3089                 storeValue(value);
3090
3091                 return RValue<Short4>(value);
3092         }
3093
3094         RValue<Short4> Short4::operator=(const Reference<Short4> &rhs)
3095         {
3096                 Value *value = rhs.loadValue();
3097                 storeValue(value);
3098
3099                 return RValue<Short4>(value);
3100         }
3101
3102         RValue<Short4> Short4::operator=(RValue<UShort4> rhs)
3103         {
3104                 storeValue(rhs.value);
3105
3106                 return RValue<Short4>(rhs);
3107         }
3108
3109         RValue<Short4> Short4::operator=(const UShort4 &rhs)
3110         {
3111                 Value *value = rhs.loadValue();
3112                 storeValue(value);
3113
3114                 return RValue<Short4>(value);
3115         }
3116
3117         RValue<Short4> Short4::operator=(const Reference<UShort4> &rhs)
3118         {
3119                 Value *value = rhs.loadValue();
3120                 storeValue(value);
3121
3122                 return RValue<Short4>(value);
3123         }
3124
3125         RValue<Short4> operator+(RValue<Short4> lhs, RValue<Short4> rhs)
3126         {
3127                 return RValue<Short4>(Nucleus::createAdd(lhs.value, rhs.value));
3128         }
3129
3130         RValue<Short4> operator-(RValue<Short4> lhs, RValue<Short4> rhs)
3131         {
3132                 return RValue<Short4>(Nucleus::createSub(lhs.value, rhs.value));
3133         }
3134
3135         RValue<Short4> operator*(RValue<Short4> lhs, RValue<Short4> rhs)
3136         {
3137                 return RValue<Short4>(Nucleus::createMul(lhs.value, rhs.value));
3138         }
3139
3140 //      RValue<Short4> operator/(RValue<Short4> lhs, RValue<Short4> rhs)
3141 //      {
3142 //              return RValue<Short4>(Nucleus::createSDiv(lhs.value, rhs.value));
3143 //      }
3144
3145 //      RValue<Short4> operator%(RValue<Short4> lhs, RValue<Short4> rhs)
3146 //      {
3147 //              return RValue<Short4>(Nucleus::createSRem(lhs.value, rhs.value));
3148 //      }
3149
3150         RValue<Short4> operator&(RValue<Short4> lhs, RValue<Short4> rhs)
3151         {
3152                 return RValue<Short4>(Nucleus::createAnd(lhs.value, rhs.value));
3153         }
3154
3155         RValue<Short4> operator|(RValue<Short4> lhs, RValue<Short4> rhs)
3156         {
3157                 return RValue<Short4>(Nucleus::createOr(lhs.value, rhs.value));
3158         }
3159
3160         RValue<Short4> operator^(RValue<Short4> lhs, RValue<Short4> rhs)
3161         {
3162                 return RValue<Short4>(Nucleus::createXor(lhs.value, rhs.value));
3163         }
3164
3165         RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
3166         {
3167                 return RValue<Short4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
3168         }
3169
3170         RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
3171         {
3172                 return RValue<Short4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
3173         }
3174
3175         RValue<Short4> operator+=(Short4 &lhs, RValue<Short4> rhs)
3176         {
3177                 return lhs = lhs + rhs;
3178         }
3179
3180         RValue<Short4> operator-=(Short4 &lhs, RValue<Short4> rhs)
3181         {
3182                 return lhs = lhs - rhs;
3183         }
3184
3185         RValue<Short4> operator*=(Short4 &lhs, RValue<Short4> rhs)
3186         {
3187                 return lhs = lhs * rhs;
3188         }
3189
3190 //      RValue<Short4> operator/=(Short4 &lhs, RValue<Short4> rhs)
3191 //      {
3192 //              return lhs = lhs / rhs;
3193 //      }
3194
3195 //      RValue<Short4> operator%=(Short4 &lhs, RValue<Short4> rhs)
3196 //      {
3197 //              return lhs = lhs % rhs;
3198 //      }
3199
3200         RValue<Short4> operator&=(Short4 &lhs, RValue<Short4> rhs)
3201         {
3202                 return lhs = lhs & rhs;
3203         }
3204
3205         RValue<Short4> operator|=(Short4 &lhs, RValue<Short4> rhs)
3206         {
3207                 return lhs = lhs | rhs;
3208         }
3209
3210         RValue<Short4> operator^=(Short4 &lhs, RValue<Short4> rhs)
3211         {
3212                 return lhs = lhs ^ rhs;
3213         }
3214
3215         RValue<Short4> operator<<=(Short4 &lhs, unsigned char rhs)
3216         {
3217                 return lhs = lhs << rhs;
3218         }
3219
3220         RValue<Short4> operator>>=(Short4 &lhs, unsigned char rhs)
3221         {
3222                 return lhs = lhs >> rhs;
3223         }
3224
3225 //      RValue<Short4> operator+(RValue<Short4> val)
3226 //      {
3227 //              return val;
3228 //      }
3229
3230         RValue<Short4> operator-(RValue<Short4> val)
3231         {
3232                 return RValue<Short4>(Nucleus::createNeg(val.value));
3233         }
3234
3235         RValue<Short4> operator~(RValue<Short4> val)
3236         {
3237                 return RValue<Short4>(Nucleus::createNot(val.value));
3238         }
3239
3240         RValue<Short4> RoundShort4(RValue<Float4> cast)
3241         {
3242                 RValue<Int4> int4 = RoundInt(cast);
3243                 return As<Short4>(Pack(int4, int4));
3244         }
3245
3246         RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
3247         {
3248                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
3249                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value, y.value);
3250                 ::basicBlock->appendInst(cmp);
3251
3252                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3253                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3254                 ::basicBlock->appendInst(select);
3255
3256                 return RValue<Short4>(V(result));
3257         }
3258
3259         RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
3260         {
3261                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
3262                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value, y.value);
3263                 ::basicBlock->appendInst(cmp);
3264
3265                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3266                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3267                 ::basicBlock->appendInst(select);
3268
3269                 return RValue<Short4>(V(result));
3270         }
3271
3272         RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
3273         {
3274                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3275                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3276                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3277                 auto paddsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3278                 paddsw->addArg(x.value);
3279                 paddsw->addArg(y.value);
3280                 ::basicBlock->appendInst(paddsw);
3281
3282                 return RValue<Short4>(V(result));
3283         }
3284
3285         RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
3286         {
3287                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3288                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3289                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3290                 auto psubsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3291                 psubsw->addArg(x.value);
3292                 psubsw->addArg(y.value);
3293                 ::basicBlock->appendInst(psubsw);
3294
3295                 return RValue<Short4>(V(result));
3296         }
3297
3298         RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
3299         {
3300                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3301                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::MultiplyHighSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3302                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3303                 auto pmulhw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3304                 pmulhw->addArg(x.value);
3305                 pmulhw->addArg(y.value);
3306                 ::basicBlock->appendInst(pmulhw);
3307
3308                 return RValue<Short4>(V(result));
3309         }
3310
3311         RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
3312         {
3313                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3314                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::MultiplyAddPairs, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3315                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3316                 auto pmaddwd = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3317                 pmaddwd->addArg(x.value);
3318                 pmaddwd->addArg(y.value);
3319                 ::basicBlock->appendInst(pmaddwd);
3320
3321                 return RValue<Int2>(V(result));
3322         }
3323
3324         RValue<SByte8> Pack(RValue<Short4> x, RValue<Short4> y)
3325         {
3326                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
3327                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3328                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3329                 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3330                 pack->addArg(x.value);
3331                 pack->addArg(y.value);
3332                 ::basicBlock->appendInst(pack);
3333
3334                 return As<SByte8>(Swizzle(As<Int4>(V(result)), 0x88));
3335         }
3336
3337         RValue<Int2> UnpackLow(RValue<Short4> x, RValue<Short4> y)
3338         {
3339                 int shuffle[8] = {0, 8, 1, 9, 2, 10, 3, 11};   // Real type is v8i16
3340                 return RValue<Int2>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
3341         }
3342
3343         RValue<Int2> UnpackHigh(RValue<Short4> x, RValue<Short4> y)
3344         {
3345                 int shuffle[8] = {0, 8, 1, 9, 2, 10, 3, 11};   // Real type is v8i16
3346                 auto lowHigh = RValue<Short8>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
3347                 return As<Int2>(Swizzle(As<Int4>(lowHigh), 0xEE));
3348         }
3349
3350         RValue<Short4> Swizzle(RValue<Short4> x, unsigned char select)
3351         {
3352                 // Real type is v8i16
3353                 int shuffle[8] =
3354                 {
3355                         (select >> 0) & 0x03,
3356                         (select >> 2) & 0x03,
3357                         (select >> 4) & 0x03,
3358                         (select >> 6) & 0x03,
3359                         (select >> 0) & 0x03,
3360                         (select >> 2) & 0x03,
3361                         (select >> 4) & 0x03,
3362                         (select >> 6) & 0x03,
3363                 };
3364
3365                 return RValue<Short4>(Nucleus::createShuffleVector(x.value, x.value, shuffle));
3366         }
3367
3368         RValue<Short4> Insert(RValue<Short4> val, RValue<Short> element, int i)
3369         {
3370                 return RValue<Short4>(Nucleus::createInsertElement(val.value, element.value, i));
3371         }
3372
3373         RValue<Short> Extract(RValue<Short4> val, int i)
3374         {
3375                 return RValue<Short>(Nucleus::createExtractElement(val.value, Short::getType(), i));
3376         }
3377
3378         RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
3379         {
3380                 return RValue<Short4>(createIntCompare(Ice::InstIcmp::Sgt, x.value, y.value));
3381         }
3382
3383         RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
3384         {
3385                 return RValue<Short4>(Nucleus::createICmpEQ(x.value, y.value));
3386         }
3387
3388         Type *Short4::getType()
3389         {
3390                 return T(Type_v4i16);
3391         }
3392
3393         UShort4::UShort4(RValue<Int4> cast)
3394         {
3395                 *this = Short4(cast);
3396         }
3397
3398         UShort4::UShort4(RValue<Float4> cast, bool saturate)
3399         {
3400                 if(saturate)
3401                 {
3402                         if(CPUID::SSE4_1)
3403                         {
3404                                 Int4 int4(Min(cast, Float4(0xFFFF)));   // packusdw takes care of 0x0000 saturation
3405                                 *this = As<Short4>(Pack(As<UInt4>(int4), As<UInt4>(int4)));
3406                         }
3407                         else
3408                         {
3409                                 *this = Short4(Int4(Max(Min(cast, Float4(0xFFFF)), Float4(0x0000))));
3410                         }
3411                 }
3412                 else
3413                 {
3414                         *this = Short4(Int4(cast));
3415                 }
3416         }
3417
3418         UShort4::UShort4(unsigned short xyzw)
3419         {
3420                 int64_t constantVector[4] = {xyzw, xyzw, xyzw, xyzw};
3421                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3422         }
3423
3424         UShort4::UShort4(unsigned short x, unsigned short y, unsigned short z, unsigned short w)
3425         {
3426                 int64_t constantVector[4] = {x, y, z, w};
3427                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3428         }
3429
3430         UShort4::UShort4(RValue<UShort4> rhs)
3431         {
3432                 storeValue(rhs.value);
3433         }
3434
3435         UShort4::UShort4(const UShort4 &rhs)
3436         {
3437                 Value *value = rhs.loadValue();
3438                 storeValue(value);
3439         }
3440
3441         UShort4::UShort4(const Reference<UShort4> &rhs)
3442         {
3443                 Value *value = rhs.loadValue();
3444                 storeValue(value);
3445         }
3446
3447         UShort4::UShort4(RValue<Short4> rhs)
3448         {
3449                 storeValue(rhs.value);
3450         }
3451
3452         UShort4::UShort4(const Short4 &rhs)
3453         {
3454                 Value *value = rhs.loadValue();
3455                 storeValue(value);
3456         }
3457
3458         UShort4::UShort4(const Reference<Short4> &rhs)
3459         {
3460                 Value *value = rhs.loadValue();
3461                 storeValue(value);
3462         }
3463
3464         RValue<UShort4> UShort4::operator=(RValue<UShort4> rhs)
3465         {
3466                 storeValue(rhs.value);
3467
3468                 return rhs;
3469         }
3470
3471         RValue<UShort4> UShort4::operator=(const UShort4 &rhs)
3472         {
3473                 Value *value = rhs.loadValue();
3474                 storeValue(value);
3475
3476                 return RValue<UShort4>(value);
3477         }
3478
3479         RValue<UShort4> UShort4::operator=(const Reference<UShort4> &rhs)
3480         {
3481                 Value *value = rhs.loadValue();
3482                 storeValue(value);
3483
3484                 return RValue<UShort4>(value);
3485         }
3486
3487         RValue<UShort4> UShort4::operator=(RValue<Short4> rhs)
3488         {
3489                 storeValue(rhs.value);
3490
3491                 return RValue<UShort4>(rhs);
3492         }
3493
3494         RValue<UShort4> UShort4::operator=(const Short4 &rhs)
3495         {
3496                 Value *value = rhs.loadValue();
3497                 storeValue(value);
3498
3499                 return RValue<UShort4>(value);
3500         }
3501
3502         RValue<UShort4> UShort4::operator=(const Reference<Short4> &rhs)
3503         {
3504                 Value *value = rhs.loadValue();
3505                 storeValue(value);
3506
3507                 return RValue<UShort4>(value);
3508         }
3509
3510         RValue<UShort4> operator+(RValue<UShort4> lhs, RValue<UShort4> rhs)
3511         {
3512                 return RValue<UShort4>(Nucleus::createAdd(lhs.value, rhs.value));
3513         }
3514
3515         RValue<UShort4> operator-(RValue<UShort4> lhs, RValue<UShort4> rhs)
3516         {
3517                 return RValue<UShort4>(Nucleus::createSub(lhs.value, rhs.value));
3518         }
3519
3520         RValue<UShort4> operator*(RValue<UShort4> lhs, RValue<UShort4> rhs)
3521         {
3522                 return RValue<UShort4>(Nucleus::createMul(lhs.value, rhs.value));
3523         }
3524
3525         RValue<UShort4> operator&(RValue<UShort4> lhs, RValue<UShort4> rhs)
3526         {
3527                 return RValue<UShort4>(Nucleus::createAnd(lhs.value, rhs.value));
3528         }
3529
3530         RValue<UShort4> operator|(RValue<UShort4> lhs, RValue<UShort4> rhs)
3531         {
3532                 return RValue<UShort4>(Nucleus::createOr(lhs.value, rhs.value));
3533         }
3534
3535         RValue<UShort4> operator^(RValue<UShort4> lhs, RValue<UShort4> rhs)
3536         {
3537                 return RValue<UShort4>(Nucleus::createXor(lhs.value, rhs.value));
3538         }
3539
3540         RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
3541         {
3542                 return RValue<UShort4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
3543         }
3544
3545         RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
3546         {
3547                 return RValue<UShort4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
3548         }
3549
3550         RValue<UShort4> operator<<=(UShort4 &lhs, unsigned char rhs)
3551         {
3552                 return lhs = lhs << rhs;
3553         }
3554
3555         RValue<UShort4> operator>>=(UShort4 &lhs, unsigned char rhs)
3556         {
3557                 return lhs = lhs >> rhs;
3558         }
3559
3560         RValue<UShort4> operator~(RValue<UShort4> val)
3561         {
3562                 return RValue<UShort4>(Nucleus::createNot(val.value));
3563         }
3564
3565         RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
3566         {
3567                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
3568                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value, y.value);
3569                 ::basicBlock->appendInst(cmp);
3570
3571                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3572                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3573                 ::basicBlock->appendInst(select);
3574
3575                 return RValue<UShort4>(V(result));
3576         }
3577
3578         RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
3579         {
3580                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
3581                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value, y.value);
3582                 ::basicBlock->appendInst(cmp);
3583
3584                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3585                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3586                 ::basicBlock->appendInst(select);
3587
3588                 return RValue<UShort4>(V(result));
3589         }
3590
3591         RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
3592         {
3593                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3594                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3595                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3596                 auto paddusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3597                 paddusw->addArg(x.value);
3598                 paddusw->addArg(y.value);
3599                 ::basicBlock->appendInst(paddusw);
3600
3601                 return RValue<UShort4>(V(result));
3602         }
3603
3604         RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
3605         {
3606                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3607                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3608                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3609                 auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3610                 psubusw->addArg(x.value);
3611                 psubusw->addArg(y.value);
3612                 ::basicBlock->appendInst(psubusw);
3613
3614                 return RValue<UShort4>(V(result));
3615         }
3616
3617         RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
3618         {
3619                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3620                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::MultiplyHighUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3621                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3622                 auto pmulhuw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3623                 pmulhuw->addArg(x.value);
3624                 pmulhuw->addArg(y.value);
3625                 ::basicBlock->appendInst(pmulhuw);
3626
3627                 return RValue<UShort4>(V(result));
3628         }
3629
3630         RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
3631         {
3632                 assert(false && "UNIMPLEMENTED"); return RValue<UShort4>(V(nullptr));
3633         }
3634
3635         RValue<Byte8> Pack(RValue<UShort4> x, RValue<UShort4> y)
3636         {
3637                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
3638                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3639                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3640                 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3641                 pack->addArg(x.value);
3642                 pack->addArg(y.value);
3643                 ::basicBlock->appendInst(pack);
3644
3645                 return As<Byte8>(Swizzle(As<Int4>(V(result)), 0x88));
3646         }
3647
3648         Type *UShort4::getType()
3649         {
3650                 return T(Type_v4i16);
3651         }
3652
3653         Short8::Short8(short c)
3654         {
3655                 int64_t constantVector[8] = {c, c, c, c, c, c, c, c};
3656                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3657         }
3658
3659         Short8::Short8(short c0, short c1, short c2, short c3, short c4, short c5, short c6, short c7)
3660         {
3661                 int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7};
3662                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3663         }
3664
3665         Short8::Short8(RValue<Short8> rhs)
3666         {
3667                 storeValue(rhs.value);
3668         }
3669
3670         Short8::Short8(const Reference<Short8> &rhs)
3671         {
3672                 Value *value = rhs.loadValue();
3673                 storeValue(value);
3674         }
3675
3676         Short8::Short8(RValue<Short4> lo, RValue<Short4> hi)
3677         {
3678                 int shuffle[8] = {0, 1, 2, 3, 8, 9, 10, 11};   // Real type is v8i16
3679                 Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
3680
3681                 storeValue(packed);
3682         }
3683
3684         RValue<Short8> operator+(RValue<Short8> lhs, RValue<Short8> rhs)
3685         {
3686                 return RValue<Short8>(Nucleus::createAdd(lhs.value, rhs.value));
3687         }
3688
3689         RValue<Short8> operator&(RValue<Short8> lhs, RValue<Short8> rhs)
3690         {
3691                 return RValue<Short8>(Nucleus::createAnd(lhs.value, rhs.value));
3692         }
3693
3694         RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
3695         {
3696                 return RValue<Short8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
3697         }
3698
3699         RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
3700         {
3701                 return RValue<Short8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
3702         }
3703
3704         RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
3705         {
3706                 assert(false && "UNIMPLEMENTED"); return RValue<Int4>(V(nullptr));
3707         }
3708
3709         RValue<Int4> Abs(RValue<Int4> x)
3710         {
3711                 auto negative = x >> 31;
3712                 return (x ^ negative) - negative;
3713         }
3714
3715         RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
3716         {
3717                 assert(false && "UNIMPLEMENTED"); return RValue<Short8>(V(nullptr));
3718         }
3719
3720         Type *Short8::getType()
3721         {
3722                 return T(Ice::IceType_v8i16);
3723         }
3724
3725         UShort8::UShort8(unsigned short c)
3726         {
3727                 int64_t constantVector[8] = {c, c, c, c, c, c, c, c};
3728                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3729         }
3730
3731         UShort8::UShort8(unsigned short c0, unsigned short c1, unsigned short c2, unsigned short c3, unsigned short c4, unsigned short c5, unsigned short c6, unsigned short c7)
3732         {
3733                 int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7};
3734                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3735         }
3736
3737         UShort8::UShort8(RValue<UShort8> rhs)
3738         {
3739                 storeValue(rhs.value);
3740         }
3741
3742         UShort8::UShort8(const Reference<UShort8> &rhs)
3743         {
3744                 Value *value = rhs.loadValue();
3745                 storeValue(value);
3746         }
3747
3748         UShort8::UShort8(RValue<UShort4> lo, RValue<UShort4> hi)
3749         {
3750                 int shuffle[8] = {0, 1, 2, 3, 8, 9, 10, 11};   // Real type is v8i16
3751                 Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
3752
3753                 storeValue(packed);
3754         }
3755
3756         RValue<UShort8> UShort8::operator=(RValue<UShort8> rhs)
3757         {
3758                 storeValue(rhs.value);
3759
3760                 return rhs;
3761         }
3762
3763         RValue<UShort8> UShort8::operator=(const UShort8 &rhs)
3764         {
3765                 Value *value = rhs.loadValue();
3766                 storeValue(value);
3767
3768                 return RValue<UShort8>(value);
3769         }
3770
3771         RValue<UShort8> UShort8::operator=(const Reference<UShort8> &rhs)
3772         {
3773                 Value *value = rhs.loadValue();
3774                 storeValue(value);
3775
3776                 return RValue<UShort8>(value);
3777         }
3778
3779         RValue<UShort8> operator&(RValue<UShort8> lhs, RValue<UShort8> rhs)
3780         {
3781                 return RValue<UShort8>(Nucleus::createAnd(lhs.value, rhs.value));
3782         }
3783
3784         RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
3785         {
3786                 return RValue<UShort8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
3787         }
3788
3789         RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
3790         {
3791                 return RValue<UShort8>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
3792         }
3793
3794         RValue<UShort8> operator+(RValue<UShort8> lhs, RValue<UShort8> rhs)
3795         {
3796                 return RValue<UShort8>(Nucleus::createAdd(lhs.value, rhs.value));
3797         }
3798
3799         RValue<UShort8> operator*(RValue<UShort8> lhs, RValue<UShort8> rhs)
3800         {
3801                 return RValue<UShort8>(Nucleus::createMul(lhs.value, rhs.value));
3802         }
3803
3804         RValue<UShort8> operator+=(UShort8 &lhs, RValue<UShort8> rhs)
3805         {
3806                 return lhs = lhs + rhs;
3807         }
3808
3809         RValue<UShort8> operator~(RValue<UShort8> val)
3810         {
3811                 return RValue<UShort8>(Nucleus::createNot(val.value));
3812         }
3813
3814         RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7)
3815         {
3816                 assert(false && "UNIMPLEMENTED"); return RValue<UShort8>(V(nullptr));
3817         }
3818
3819         RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
3820         {
3821                 assert(false && "UNIMPLEMENTED"); return RValue<UShort8>(V(nullptr));
3822         }
3823
3824         // FIXME: Implement as Shuffle(x, y, Select(i0, ..., i16)) and Shuffle(x, y, SELECT_PACK_REPEAT(element))
3825 //      RValue<UShort8> PackRepeat(RValue<Byte16> x, RValue<Byte16> y, int element)
3826 //      {
3827 //              assert(false && "UNIMPLEMENTED"); return RValue<UShort8>(V(nullptr));
3828 //      }
3829
3830         Type *UShort8::getType()
3831         {
3832                 return T(Ice::IceType_v8i16);
3833         }
3834
3835         Int::Int(Argument<Int> argument)
3836         {
3837                 storeValue(argument.value);
3838         }
3839
3840         Int::Int(RValue<Byte> cast)
3841         {
3842                 Value *integer = Nucleus::createZExt(cast.value, Int::getType());
3843
3844                 storeValue(integer);
3845         }
3846
3847         Int::Int(RValue<SByte> cast)
3848         {
3849                 Value *integer = Nucleus::createSExt(cast.value, Int::getType());
3850
3851                 storeValue(integer);
3852         }
3853
3854         Int::Int(RValue<Short> cast)
3855         {
3856                 Value *integer = Nucleus::createSExt(cast.value, Int::getType());
3857
3858                 storeValue(integer);
3859         }
3860
3861         Int::Int(RValue<UShort> cast)
3862         {
3863                 Value *integer = Nucleus::createZExt(cast.value, Int::getType());
3864
3865                 storeValue(integer);
3866         }
3867
3868         Int::Int(RValue<Int2> cast)
3869         {
3870                 *this = Extract(cast, 0);
3871         }
3872
3873         Int::Int(RValue<Long> cast)
3874         {
3875                 Value *integer = Nucleus::createTrunc(cast.value, Int::getType());
3876
3877                 storeValue(integer);
3878         }
3879
3880         Int::Int(RValue<Float> cast)
3881         {
3882                 Value *integer = Nucleus::createFPToSI(cast.value, Int::getType());
3883
3884                 storeValue(integer);
3885         }
3886
3887         Int::Int(int x)
3888         {
3889                 storeValue(Nucleus::createConstantInt(x));
3890         }
3891
3892         Int::Int(RValue<Int> rhs)
3893         {
3894                 storeValue(rhs.value);
3895         }
3896
3897         Int::Int(RValue<UInt> rhs)
3898         {
3899                 storeValue(rhs.value);
3900         }
3901
3902         Int::Int(const Int &rhs)
3903         {
3904                 Value *value = rhs.loadValue();
3905                 storeValue(value);
3906         }
3907
3908         Int::Int(const Reference<Int> &rhs)
3909         {
3910                 Value *value = rhs.loadValue();
3911                 storeValue(value);
3912         }
3913
3914         Int::Int(const UInt &rhs)
3915         {
3916                 Value *value = rhs.loadValue();
3917                 storeValue(value);
3918         }
3919
3920         Int::Int(const Reference<UInt> &rhs)
3921         {
3922                 Value *value = rhs.loadValue();
3923                 storeValue(value);
3924         }
3925
3926         RValue<Int> Int::operator=(int rhs)
3927         {
3928                 return RValue<Int>(storeValue(Nucleus::createConstantInt(rhs)));
3929         }
3930
3931         RValue<Int> Int::operator=(RValue<Int> rhs)
3932         {
3933                 storeValue(rhs.value);
3934
3935                 return rhs;
3936         }
3937
3938         RValue<Int> Int::operator=(RValue<UInt> rhs)
3939         {
3940                 storeValue(rhs.value);
3941
3942                 return RValue<Int>(rhs);
3943         }
3944
3945         RValue<Int> Int::operator=(const Int &rhs)
3946         {
3947                 Value *value = rhs.loadValue();
3948                 storeValue(value);
3949
3950                 return RValue<Int>(value);
3951         }
3952
3953         RValue<Int> Int::operator=(const Reference<Int> &rhs)
3954         {
3955                 Value *value = rhs.loadValue();
3956                 storeValue(value);
3957
3958                 return RValue<Int>(value);
3959         }
3960
3961         RValue<Int> Int::operator=(const UInt &rhs)
3962         {
3963                 Value *value = rhs.loadValue();
3964                 storeValue(value);
3965
3966                 return RValue<Int>(value);
3967         }
3968
3969         RValue<Int> Int::operator=(const Reference<UInt> &rhs)
3970         {
3971                 Value *value = rhs.loadValue();
3972                 storeValue(value);
3973
3974                 return RValue<Int>(value);
3975         }
3976
3977         RValue<Int> operator+(RValue<Int> lhs, RValue<Int> rhs)
3978         {
3979                 return RValue<Int>(Nucleus::createAdd(lhs.value, rhs.value));
3980         }
3981
3982         RValue<Int> operator-(RValue<Int> lhs, RValue<Int> rhs)
3983         {
3984                 return RValue<Int>(Nucleus::createSub(lhs.value, rhs.value));
3985         }
3986
3987         RValue<Int> operator*(RValue<Int> lhs, RValue<Int> rhs)
3988         {
3989                 return RValue<Int>(Nucleus::createMul(lhs.value, rhs.value));
3990         }
3991
3992         RValue<Int> operator/(RValue<Int> lhs, RValue<Int> rhs)
3993         {
3994                 return RValue<Int>(Nucleus::createSDiv(lhs.value, rhs.value));
3995         }
3996
3997         RValue<Int> operator%(RValue<Int> lhs, RValue<Int> rhs)
3998         {
3999                 return RValue<Int>(Nucleus::createSRem(lhs.value, rhs.value));
4000         }
4001
4002         RValue<Int> operator&(RValue<Int> lhs, RValue<Int> rhs)
4003         {
4004                 return RValue<Int>(Nucleus::createAnd(lhs.value, rhs.value));
4005         }
4006
4007         RValue<Int> operator|(RValue<Int> lhs, RValue<Int> rhs)
4008         {
4009                 return RValue<Int>(Nucleus::createOr(lhs.value, rhs.value));
4010         }
4011
4012         RValue<Int> operator^(RValue<Int> lhs, RValue<Int> rhs)
4013         {
4014                 return RValue<Int>(Nucleus::createXor(lhs.value, rhs.value));
4015         }
4016
4017         RValue<Int> operator<<(RValue<Int> lhs, RValue<Int> rhs)
4018         {
4019                 return RValue<Int>(Nucleus::createShl(lhs.value, rhs.value));
4020         }
4021
4022         RValue<Int> operator>>(RValue<Int> lhs, RValue<Int> rhs)
4023         {
4024                 return RValue<Int>(Nucleus::createAShr(lhs.value, rhs.value));
4025         }
4026
4027         RValue<Int> operator+=(Int &lhs, RValue<Int> rhs)
4028         {
4029                 return lhs = lhs + rhs;
4030         }
4031
4032         RValue<Int> operator-=(Int &lhs, RValue<Int> rhs)
4033         {
4034                 return lhs = lhs - rhs;
4035         }
4036
4037         RValue<Int> operator*=(Int &lhs, RValue<Int> rhs)
4038         {
4039                 return lhs = lhs * rhs;
4040         }
4041
4042         RValue<Int> operator/=(Int &lhs, RValue<Int> rhs)
4043         {
4044                 return lhs = lhs / rhs;
4045         }
4046
4047         RValue<Int> operator%=(Int &lhs, RValue<Int> rhs)
4048         {
4049                 return lhs = lhs % rhs;
4050         }
4051
4052         RValue<Int> operator&=(Int &lhs, RValue<Int> rhs)
4053         {
4054                 return lhs = lhs & rhs;
4055         }
4056
4057         RValue<Int> operator|=(Int &lhs, RValue<Int> rhs)
4058         {
4059                 return lhs = lhs | rhs;
4060         }
4061
4062         RValue<Int> operator^=(Int &lhs, RValue<Int> rhs)
4063         {
4064                 return lhs = lhs ^ rhs;
4065         }
4066
4067         RValue<Int> operator<<=(Int &lhs, RValue<Int> rhs)
4068         {
4069                 return lhs = lhs << rhs;
4070         }
4071
4072         RValue<Int> operator>>=(Int &lhs, RValue<Int> rhs)
4073         {
4074                 return lhs = lhs >> rhs;
4075         }
4076
4077         RValue<Int> operator+(RValue<Int> val)
4078         {
4079                 return val;
4080         }
4081
4082         RValue<Int> operator-(RValue<Int> val)
4083         {
4084                 return RValue<Int>(Nucleus::createNeg(val.value));
4085         }
4086
4087         RValue<Int> operator~(RValue<Int> val)
4088         {
4089                 return RValue<Int>(Nucleus::createNot(val.value));
4090         }
4091
4092         RValue<Int> operator++(Int &val, int)   // Post-increment
4093         {
4094                 RValue<Int> res = val;
4095                 val += 1;
4096                 return res;
4097         }
4098
4099         const Int &operator++(Int &val)   // Pre-increment
4100         {
4101                 val += 1;
4102                 return val;
4103         }
4104
4105         RValue<Int> operator--(Int &val, int)   // Post-decrement
4106         {
4107                 RValue<Int> res = val;
4108                 val -= 1;
4109                 return res;
4110         }
4111
4112         const Int &operator--(Int &val)   // Pre-decrement
4113         {
4114                 val -= 1;
4115                 return val;
4116         }
4117
4118         RValue<Bool> operator<(RValue<Int> lhs, RValue<Int> rhs)
4119         {
4120                 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
4121         }
4122
4123         RValue<Bool> operator<=(RValue<Int> lhs, RValue<Int> rhs)
4124         {
4125                 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
4126         }
4127
4128         RValue<Bool> operator>(RValue<Int> lhs, RValue<Int> rhs)
4129         {
4130                 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
4131         }
4132
4133         RValue<Bool> operator>=(RValue<Int> lhs, RValue<Int> rhs)
4134         {
4135                 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
4136         }
4137
4138         RValue<Bool> operator!=(RValue<Int> lhs, RValue<Int> rhs)
4139         {
4140                 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
4141         }
4142
4143         RValue<Bool> operator==(RValue<Int> lhs, RValue<Int> rhs)
4144         {
4145                 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
4146         }
4147
4148         RValue<Int> Max(RValue<Int> x, RValue<Int> y)
4149         {
4150                 return IfThenElse(x > y, x, y);
4151         }
4152
4153         RValue<Int> Min(RValue<Int> x, RValue<Int> y)
4154         {
4155                 return IfThenElse(x < y, x, y);
4156         }
4157
4158         RValue<Int> Clamp(RValue<Int> x, RValue<Int> min, RValue<Int> max)
4159         {
4160                 return Min(Max(x, min), max);
4161         }
4162
4163         RValue<Int> RoundInt(RValue<Float> cast)
4164         {
4165                 if(emulateIntrinsics)
4166                 {
4167                         // Push the fractional part off the mantissa. Accurate up to +/-2^22.
4168                         return Int((cast + Float(0x00C00000)) - Float(0x00C00000));
4169                 }
4170                 else
4171                 {
4172                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
4173                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
4174                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
4175                         auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
4176                         nearbyint->addArg(cast.value);
4177                         ::basicBlock->appendInst(nearbyint);
4178
4179                         return RValue<Int>(V(result));
4180                 }
4181         }
4182
4183         Type *Int::getType()
4184         {
4185                 return T(Ice::IceType_i32);
4186         }
4187
4188         Long::Long(RValue<Int> cast)
4189         {
4190                 Value *integer = Nucleus::createSExt(cast.value, Long::getType());
4191
4192                 storeValue(integer);
4193         }
4194
4195         Long::Long(RValue<UInt> cast)
4196         {
4197                 Value *integer = Nucleus::createZExt(cast.value, Long::getType());
4198
4199                 storeValue(integer);
4200         }
4201
4202         Long::Long(RValue<Long> rhs)
4203         {
4204                 storeValue(rhs.value);
4205         }
4206
4207         RValue<Long> Long::operator=(int64_t rhs)
4208         {
4209                 return RValue<Long>(storeValue(Nucleus::createConstantLong(rhs)));
4210         }
4211
4212         RValue<Long> Long::operator=(RValue<Long> rhs)
4213         {
4214                 storeValue(rhs.value);
4215
4216                 return rhs;
4217         }
4218
4219         RValue<Long> Long::operator=(const Long &rhs)
4220         {
4221                 Value *value = rhs.loadValue();
4222                 storeValue(value);
4223
4224                 return RValue<Long>(value);
4225         }
4226
4227         RValue<Long> Long::operator=(const Reference<Long> &rhs)
4228         {
4229                 Value *value = rhs.loadValue();
4230                 storeValue(value);
4231
4232                 return RValue<Long>(value);
4233         }
4234
4235         RValue<Long> operator+(RValue<Long> lhs, RValue<Long> rhs)
4236         {
4237                 return RValue<Long>(Nucleus::createAdd(lhs.value, rhs.value));
4238         }
4239
4240         RValue<Long> operator-(RValue<Long> lhs, RValue<Long> rhs)
4241         {
4242                 return RValue<Long>(Nucleus::createSub(lhs.value, rhs.value));
4243         }
4244
4245         RValue<Long> operator+=(Long &lhs, RValue<Long> rhs)
4246         {
4247                 return lhs = lhs + rhs;
4248         }
4249
4250         RValue<Long> operator-=(Long &lhs, RValue<Long> rhs)
4251         {
4252                 return lhs = lhs - rhs;
4253         }
4254
4255         RValue<Long> AddAtomic(RValue<Pointer<Long> > x, RValue<Long> y)
4256         {
4257                 return RValue<Long>(Nucleus::createAtomicAdd(x.value, y.value));
4258         }
4259
4260         Type *Long::getType()
4261         {
4262                 return T(Ice::IceType_i64);
4263         }
4264
4265         UInt::UInt(Argument<UInt> argument)
4266         {
4267                 storeValue(argument.value);
4268         }
4269
4270         UInt::UInt(RValue<UShort> cast)
4271         {
4272                 Value *integer = Nucleus::createZExt(cast.value, UInt::getType());
4273
4274                 storeValue(integer);
4275         }
4276
4277         UInt::UInt(RValue<Long> cast)
4278         {
4279                 Value *integer = Nucleus::createTrunc(cast.value, UInt::getType());
4280
4281                 storeValue(integer);
4282         }
4283
4284         UInt::UInt(RValue<Float> cast)
4285         {
4286                 // Smallest positive value representable in UInt, but not in Int
4287                 const unsigned int ustart = 0x80000000u;
4288                 const float ustartf = float(ustart);
4289
4290                 // If the value is negative, store 0, otherwise store the result of the conversion
4291                 storeValue((~(As<Int>(cast) >> 31) &
4292                 // Check if the value can be represented as an Int
4293                         IfThenElse(cast >= ustartf,
4294                 // If the value is too large, subtract ustart and re-add it after conversion.
4295                                 As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)),
4296                 // Otherwise, just convert normally
4297                                 Int(cast))).value);
4298         }
4299
4300         UInt::UInt(int x)
4301         {
4302                 storeValue(Nucleus::createConstantInt(x));
4303         }
4304
4305         UInt::UInt(unsigned int x)
4306         {
4307                 storeValue(Nucleus::createConstantInt(x));
4308         }
4309
4310         UInt::UInt(RValue<UInt> rhs)
4311         {
4312                 storeValue(rhs.value);
4313         }
4314
4315         UInt::UInt(RValue<Int> rhs)
4316         {
4317                 storeValue(rhs.value);
4318         }
4319
4320         UInt::UInt(const UInt &rhs)
4321         {
4322                 Value *value = rhs.loadValue();
4323                 storeValue(value);
4324         }
4325
4326         UInt::UInt(const Reference<UInt> &rhs)
4327         {
4328                 Value *value = rhs.loadValue();
4329                 storeValue(value);
4330         }
4331
4332         UInt::UInt(const Int &rhs)
4333         {
4334                 Value *value = rhs.loadValue();
4335                 storeValue(value);
4336         }
4337
4338         UInt::UInt(const Reference<Int> &rhs)
4339         {
4340                 Value *value = rhs.loadValue();
4341                 storeValue(value);
4342         }
4343
4344         RValue<UInt> UInt::operator=(unsigned int rhs)
4345         {
4346                 return RValue<UInt>(storeValue(Nucleus::createConstantInt(rhs)));
4347         }
4348
4349         RValue<UInt> UInt::operator=(RValue<UInt> rhs)
4350         {
4351                 storeValue(rhs.value);
4352
4353                 return rhs;
4354         }
4355
4356         RValue<UInt> UInt::operator=(RValue<Int> rhs)
4357         {
4358                 storeValue(rhs.value);
4359
4360                 return RValue<UInt>(rhs);
4361         }
4362
4363         RValue<UInt> UInt::operator=(const UInt &rhs)
4364         {
4365                 Value *value = rhs.loadValue();
4366                 storeValue(value);
4367
4368                 return RValue<UInt>(value);
4369         }
4370
4371         RValue<UInt> UInt::operator=(const Reference<UInt> &rhs)
4372         {
4373                 Value *value = rhs.loadValue();
4374                 storeValue(value);
4375
4376                 return RValue<UInt>(value);
4377         }
4378
4379         RValue<UInt> UInt::operator=(const Int &rhs)
4380         {
4381                 Value *value = rhs.loadValue();
4382                 storeValue(value);
4383
4384                 return RValue<UInt>(value);
4385         }
4386
4387         RValue<UInt> UInt::operator=(const Reference<Int> &rhs)
4388         {
4389                 Value *value = rhs.loadValue();
4390                 storeValue(value);
4391
4392                 return RValue<UInt>(value);
4393         }
4394
4395         RValue<UInt> operator+(RValue<UInt> lhs, RValue<UInt> rhs)
4396         {
4397                 return RValue<UInt>(Nucleus::createAdd(lhs.value, rhs.value));
4398         }
4399
4400         RValue<UInt> operator-(RValue<UInt> lhs, RValue<UInt> rhs)
4401         {
4402                 return RValue<UInt>(Nucleus::createSub(lhs.value, rhs.value));
4403         }
4404
4405         RValue<UInt> operator*(RValue<UInt> lhs, RValue<UInt> rhs)
4406         {
4407                 return RValue<UInt>(Nucleus::createMul(lhs.value, rhs.value));
4408         }
4409
4410         RValue<UInt> operator/(RValue<UInt> lhs, RValue<UInt> rhs)
4411         {
4412                 return RValue<UInt>(Nucleus::createUDiv(lhs.value, rhs.value));
4413         }
4414
4415         RValue<UInt> operator%(RValue<UInt> lhs, RValue<UInt> rhs)
4416         {
4417                 return RValue<UInt>(Nucleus::createURem(lhs.value, rhs.value));
4418         }
4419
4420         RValue<UInt> operator&(RValue<UInt> lhs, RValue<UInt> rhs)
4421         {
4422                 return RValue<UInt>(Nucleus::createAnd(lhs.value, rhs.value));
4423         }
4424
4425         RValue<UInt> operator|(RValue<UInt> lhs, RValue<UInt> rhs)
4426         {
4427                 return RValue<UInt>(Nucleus::createOr(lhs.value, rhs.value));
4428         }
4429
4430         RValue<UInt> operator^(RValue<UInt> lhs, RValue<UInt> rhs)
4431         {
4432                 return RValue<UInt>(Nucleus::createXor(lhs.value, rhs.value));
4433         }
4434
4435         RValue<UInt> operator<<(RValue<UInt> lhs, RValue<UInt> rhs)
4436         {
4437                 return RValue<UInt>(Nucleus::createShl(lhs.value, rhs.value));
4438         }
4439
4440         RValue<UInt> operator>>(RValue<UInt> lhs, RValue<UInt> rhs)
4441         {
4442                 return RValue<UInt>(Nucleus::createLShr(lhs.value, rhs.value));
4443         }
4444
4445         RValue<UInt> operator+=(UInt &lhs, RValue<UInt> rhs)
4446         {
4447                 return lhs = lhs + rhs;
4448         }
4449
4450         RValue<UInt> operator-=(UInt &lhs, RValue<UInt> rhs)
4451         {
4452                 return lhs = lhs - rhs;
4453         }
4454
4455         RValue<UInt> operator*=(UInt &lhs, RValue<UInt> rhs)
4456         {
4457                 return lhs = lhs * rhs;
4458         }
4459
4460         RValue<UInt> operator/=(UInt &lhs, RValue<UInt> rhs)
4461         {
4462                 return lhs = lhs / rhs;
4463         }
4464
4465         RValue<UInt> operator%=(UInt &lhs, RValue<UInt> rhs)
4466         {
4467                 return lhs = lhs % rhs;
4468         }
4469
4470         RValue<UInt> operator&=(UInt &lhs, RValue<UInt> rhs)
4471         {
4472                 return lhs = lhs & rhs;
4473         }
4474
4475         RValue<UInt> operator|=(UInt &lhs, RValue<UInt> rhs)
4476         {
4477                 return lhs = lhs | rhs;
4478         }
4479
4480         RValue<UInt> operator^=(UInt &lhs, RValue<UInt> rhs)
4481         {
4482                 return lhs = lhs ^ rhs;
4483         }
4484
4485         RValue<UInt> operator<<=(UInt &lhs, RValue<UInt> rhs)
4486         {
4487                 return lhs = lhs << rhs;
4488         }
4489
4490         RValue<UInt> operator>>=(UInt &lhs, RValue<UInt> rhs)
4491         {
4492                 return lhs = lhs >> rhs;
4493         }
4494
4495         RValue<UInt> operator+(RValue<UInt> val)
4496         {
4497                 return val;
4498         }
4499
4500         RValue<UInt> operator-(RValue<UInt> val)
4501         {
4502                 return RValue<UInt>(Nucleus::createNeg(val.value));
4503         }
4504
4505         RValue<UInt> operator~(RValue<UInt> val)
4506         {
4507                 return RValue<UInt>(Nucleus::createNot(val.value));
4508         }
4509
4510         RValue<UInt> operator++(UInt &val, int)   // Post-increment
4511         {
4512                 RValue<UInt> res = val;
4513                 val += 1;
4514                 return res;
4515         }
4516
4517         const UInt &operator++(UInt &val)   // Pre-increment
4518         {
4519                 val += 1;
4520                 return val;
4521         }
4522
4523         RValue<UInt> operator--(UInt &val, int)   // Post-decrement
4524         {
4525                 RValue<UInt> res = val;
4526                 val -= 1;
4527                 return res;
4528         }
4529
4530         const UInt &operator--(UInt &val)   // Pre-decrement
4531         {
4532                 val -= 1;
4533                 return val;
4534         }
4535
4536         RValue<UInt> Max(RValue<UInt> x, RValue<UInt> y)
4537         {
4538                 return IfThenElse(x > y, x, y);
4539         }
4540
4541         RValue<UInt> Min(RValue<UInt> x, RValue<UInt> y)
4542         {
4543                 return IfThenElse(x < y, x, y);
4544         }
4545
4546         RValue<UInt> Clamp(RValue<UInt> x, RValue<UInt> min, RValue<UInt> max)
4547         {
4548                 return Min(Max(x, min), max);
4549         }
4550
4551         RValue<Bool> operator<(RValue<UInt> lhs, RValue<UInt> rhs)
4552         {
4553                 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
4554         }
4555
4556         RValue<Bool> operator<=(RValue<UInt> lhs, RValue<UInt> rhs)
4557         {
4558                 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
4559         }
4560
4561         RValue<Bool> operator>(RValue<UInt> lhs, RValue<UInt> rhs)
4562         {
4563                 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
4564         }
4565
4566         RValue<Bool> operator>=(RValue<UInt> lhs, RValue<UInt> rhs)
4567         {
4568                 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
4569         }
4570
4571         RValue<Bool> operator!=(RValue<UInt> lhs, RValue<UInt> rhs)
4572         {
4573                 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
4574         }
4575
4576         RValue<Bool> operator==(RValue<UInt> lhs, RValue<UInt> rhs)
4577         {
4578                 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
4579         }
4580
4581 //      RValue<UInt> RoundUInt(RValue<Float> cast)
4582 //      {
4583 //              assert(false && "UNIMPLEMENTED"); return RValue<UInt>(V(nullptr));
4584 //      }
4585
4586         Type *UInt::getType()
4587         {
4588                 return T(Ice::IceType_i32);
4589         }
4590
4591 //      Int2::Int2(RValue<Int> cast)
4592 //      {
4593 //              Value *extend = Nucleus::createZExt(cast.value, Long::getType());
4594 //              Value *vector = Nucleus::createBitCast(extend, Int2::getType());
4595 //
4596 //              Constant *shuffle[2];
4597 //              shuffle[0] = Nucleus::createConstantInt(0);
4598 //              shuffle[1] = Nucleus::createConstantInt(0);
4599 //
4600 //              Value *replicate = Nucleus::createShuffleVector(vector, UndefValue::get(Int2::getType()), Nucleus::createConstantVector(shuffle, 2));
4601 //
4602 //              storeValue(replicate);
4603 //      }
4604
4605         Int2::Int2(RValue<Int4> cast)
4606         {
4607                 storeValue(Nucleus::createBitCast(cast.value, getType()));
4608         }
4609
4610         Int2::Int2(int x, int y)
4611         {
4612                 int64_t constantVector[2] = {x, y};
4613                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
4614         }
4615
4616         Int2::Int2(RValue<Int2> rhs)
4617         {
4618                 storeValue(rhs.value);
4619         }
4620
4621         Int2::Int2(const Int2 &rhs)
4622         {
4623                 Value *value = rhs.loadValue();
4624                 storeValue(value);
4625         }
4626
4627         Int2::Int2(const Reference<Int2> &rhs)
4628         {
4629                 Value *value = rhs.loadValue();
4630                 storeValue(value);
4631         }
4632
4633         Int2::Int2(RValue<Int> lo, RValue<Int> hi)
4634         {
4635                 int shuffle[4] = {0, 4, 1, 5};
4636                 Value *packed = Nucleus::createShuffleVector(Int4(lo).loadValue(), Int4(hi).loadValue(), shuffle);
4637
4638                 storeValue(Nucleus::createBitCast(packed, Int2::getType()));
4639         }
4640
4641         RValue<Int2> Int2::operator=(RValue<Int2> rhs)
4642         {
4643                 storeValue(rhs.value);
4644
4645                 return rhs;
4646         }
4647
4648         RValue<Int2> Int2::operator=(const Int2 &rhs)
4649         {
4650                 Value *value = rhs.loadValue();
4651                 storeValue(value);
4652
4653                 return RValue<Int2>(value);
4654         }
4655
4656         RValue<Int2> Int2::operator=(const Reference<Int2> &rhs)
4657         {
4658                 Value *value = rhs.loadValue();
4659                 storeValue(value);
4660
4661                 return RValue<Int2>(value);
4662         }
4663
4664         RValue<Int2> operator+(RValue<Int2> lhs, RValue<Int2> rhs)
4665         {
4666                 return RValue<Int2>(Nucleus::createAdd(lhs.value, rhs.value));
4667         }
4668
4669         RValue<Int2> operator-(RValue<Int2> lhs, RValue<Int2> rhs)
4670         {
4671                 return RValue<Int2>(Nucleus::createSub(lhs.value, rhs.value));
4672         }
4673
4674 //      RValue<Int2> operator*(RValue<Int2> lhs, RValue<Int2> rhs)
4675 //      {
4676 //              return RValue<Int2>(Nucleus::createMul(lhs.value, rhs.value));
4677 //      }
4678
4679 //      RValue<Int2> operator/(RValue<Int2> lhs, RValue<Int2> rhs)
4680 //      {
4681 //              return RValue<Int2>(Nucleus::createSDiv(lhs.value, rhs.value));
4682 //      }
4683
4684 //      RValue<Int2> operator%(RValue<Int2> lhs, RValue<Int2> rhs)
4685 //      {
4686 //              return RValue<Int2>(Nucleus::createSRem(lhs.value, rhs.value));
4687 //      }
4688
4689         RValue<Int2> operator&(RValue<Int2> lhs, RValue<Int2> rhs)
4690         {
4691                 return RValue<Int2>(Nucleus::createAnd(lhs.value, rhs.value));
4692         }
4693
4694         RValue<Int2> operator|(RValue<Int2> lhs, RValue<Int2> rhs)
4695         {
4696                 return RValue<Int2>(Nucleus::createOr(lhs.value, rhs.value));
4697         }
4698
4699         RValue<Int2> operator^(RValue<Int2> lhs, RValue<Int2> rhs)
4700         {
4701                 return RValue<Int2>(Nucleus::createXor(lhs.value, rhs.value));
4702         }
4703
4704         RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
4705         {
4706                 return RValue<Int2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
4707         }
4708
4709         RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
4710         {
4711                 return RValue<Int2>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
4712         }
4713
4714         RValue<Int2> operator+=(Int2 &lhs, RValue<Int2> rhs)
4715         {
4716                 return lhs = lhs + rhs;
4717         }
4718
4719         RValue<Int2> operator-=(Int2 &lhs, RValue<Int2> rhs)
4720         {
4721                 return lhs = lhs - rhs;
4722         }
4723
4724 //      RValue<Int2> operator*=(Int2 &lhs, RValue<Int2> rhs)
4725 //      {
4726 //              return lhs = lhs * rhs;
4727 //      }
4728
4729 //      RValue<Int2> operator/=(Int2 &lhs, RValue<Int2> rhs)
4730 //      {
4731 //              return lhs = lhs / rhs;
4732 //      }
4733
4734 //      RValue<Int2> operator%=(Int2 &lhs, RValue<Int2> rhs)
4735 //      {
4736 //              return lhs = lhs % rhs;
4737 //      }
4738
4739         RValue<Int2> operator&=(Int2 &lhs, RValue<Int2> rhs)
4740         {
4741                 return lhs = lhs & rhs;
4742         }
4743
4744         RValue<Int2> operator|=(Int2 &lhs, RValue<Int2> rhs)
4745         {
4746                 return lhs = lhs | rhs;
4747         }
4748
4749         RValue<Int2> operator^=(Int2 &lhs, RValue<Int2> rhs)
4750         {
4751                 return lhs = lhs ^ rhs;
4752         }
4753
4754         RValue<Int2> operator<<=(Int2 &lhs, unsigned char rhs)
4755         {
4756                 return lhs = lhs << rhs;
4757         }
4758
4759         RValue<Int2> operator>>=(Int2 &lhs, unsigned char rhs)
4760         {
4761                 return lhs = lhs >> rhs;
4762         }
4763
4764 //      RValue<Int2> operator+(RValue<Int2> val)
4765 //      {
4766 //              return val;
4767 //      }
4768
4769 //      RValue<Int2> operator-(RValue<Int2> val)
4770 //      {
4771 //              return RValue<Int2>(Nucleus::createNeg(val.value));
4772 //      }
4773
4774         RValue<Int2> operator~(RValue<Int2> val)
4775         {
4776                 return RValue<Int2>(Nucleus::createNot(val.value));
4777         }
4778
4779         RValue<Short4> UnpackLow(RValue<Int2> x, RValue<Int2> y)
4780         {
4781                 int shuffle[4] = {0, 4, 1, 5};   // Real type is v4i32
4782                 return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
4783         }
4784
4785         RValue<Short4> UnpackHigh(RValue<Int2> x, RValue<Int2> y)
4786         {
4787                 int shuffle[4] = {0, 4, 1, 5};   // Real type is v4i32
4788                 auto lowHigh = RValue<Int4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
4789                 return As<Short4>(Swizzle(lowHigh, 0xEE));
4790         }
4791
4792         RValue<Int> Extract(RValue<Int2> val, int i)
4793         {
4794                 return RValue<Int>(Nucleus::createExtractElement(val.value, Int::getType(), i));
4795         }
4796
4797         RValue<Int2> Insert(RValue<Int2> val, RValue<Int> element, int i)
4798         {
4799                 return RValue<Int2>(Nucleus::createInsertElement(val.value, element.value, i));
4800         }
4801
4802         Type *Int2::getType()
4803         {
4804                 return T(Type_v2i32);
4805         }
4806
4807         UInt2::UInt2(unsigned int x, unsigned int y)
4808         {
4809                 int64_t constantVector[2] = {x, y};
4810                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
4811         }
4812
4813         UInt2::UInt2(RValue<UInt2> rhs)
4814         {
4815                 storeValue(rhs.value);
4816         }
4817
4818         UInt2::UInt2(const UInt2 &rhs)
4819         {
4820                 Value *value = rhs.loadValue();
4821                 storeValue(value);
4822         }
4823
4824         UInt2::UInt2(const Reference<UInt2> &rhs)
4825         {
4826                 Value *value = rhs.loadValue();
4827                 storeValue(value);
4828         }
4829
4830         RValue<UInt2> UInt2::operator=(RValue<UInt2> rhs)
4831         {
4832                 storeValue(rhs.value);
4833
4834                 return rhs;
4835         }
4836
4837         RValue<UInt2> UInt2::operator=(const UInt2 &rhs)
4838         {
4839                 Value *value = rhs.loadValue();
4840                 storeValue(value);
4841
4842                 return RValue<UInt2>(value);
4843         }
4844
4845         RValue<UInt2> UInt2::operator=(const Reference<UInt2> &rhs)
4846         {
4847                 Value *value = rhs.loadValue();
4848                 storeValue(value);
4849
4850                 return RValue<UInt2>(value);
4851         }
4852
4853         RValue<UInt2> operator+(RValue<UInt2> lhs, RValue<UInt2> rhs)
4854         {
4855                 return RValue<UInt2>(Nucleus::createAdd(lhs.value, rhs.value));
4856         }
4857
4858         RValue<UInt2> operator-(RValue<UInt2> lhs, RValue<UInt2> rhs)
4859         {
4860                 return RValue<UInt2>(Nucleus::createSub(lhs.value, rhs.value));
4861         }
4862
4863 //      RValue<UInt2> operator*(RValue<UInt2> lhs, RValue<UInt2> rhs)
4864 //      {
4865 //              return RValue<UInt2>(Nucleus::createMul(lhs.value, rhs.value));
4866 //      }
4867
4868 //      RValue<UInt2> operator/(RValue<UInt2> lhs, RValue<UInt2> rhs)
4869 //      {
4870 //              return RValue<UInt2>(Nucleus::createUDiv(lhs.value, rhs.value));
4871 //      }
4872
4873 //      RValue<UInt2> operator%(RValue<UInt2> lhs, RValue<UInt2> rhs)
4874 //      {
4875 //              return RValue<UInt2>(Nucleus::createURem(lhs.value, rhs.value));
4876 //      }
4877
4878         RValue<UInt2> operator&(RValue<UInt2> lhs, RValue<UInt2> rhs)
4879         {
4880                 return RValue<UInt2>(Nucleus::createAnd(lhs.value, rhs.value));
4881         }
4882
4883         RValue<UInt2> operator|(RValue<UInt2> lhs, RValue<UInt2> rhs)
4884         {
4885                 return RValue<UInt2>(Nucleus::createOr(lhs.value, rhs.value));
4886         }
4887
4888         RValue<UInt2> operator^(RValue<UInt2> lhs, RValue<UInt2> rhs)
4889         {
4890                 return RValue<UInt2>(Nucleus::createXor(lhs.value, rhs.value));
4891         }
4892
4893         RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
4894         {
4895                 return RValue<UInt2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
4896         }
4897
4898         RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
4899         {
4900                 return RValue<UInt2>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
4901         }
4902
4903         RValue<UInt2> operator+=(UInt2 &lhs, RValue<UInt2> rhs)
4904         {
4905                 return lhs = lhs + rhs;
4906         }
4907
4908         RValue<UInt2> operator-=(UInt2 &lhs, RValue<UInt2> rhs)
4909         {
4910                 return lhs = lhs - rhs;
4911         }
4912
4913 //      RValue<UInt2> operator*=(UInt2 &lhs, RValue<UInt2> rhs)
4914 //      {
4915 //              return lhs = lhs * rhs;
4916 //      }
4917
4918 //      RValue<UInt2> operator/=(UInt2 &lhs, RValue<UInt2> rhs)
4919 //      {
4920 //              return lhs = lhs / rhs;
4921 //      }
4922
4923 //      RValue<UInt2> operator%=(UInt2 &lhs, RValue<UInt2> rhs)
4924 //      {
4925 //              return lhs = lhs % rhs;
4926 //      }
4927
4928         RValue<UInt2> operator&=(UInt2 &lhs, RValue<UInt2> rhs)
4929         {
4930                 return lhs = lhs & rhs;
4931         }
4932
4933         RValue<UInt2> operator|=(UInt2 &lhs, RValue<UInt2> rhs)
4934         {
4935                 return lhs = lhs | rhs;
4936         }
4937
4938         RValue<UInt2> operator^=(UInt2 &lhs, RValue<UInt2> rhs)
4939         {
4940                 return lhs = lhs ^ rhs;
4941         }
4942
4943         RValue<UInt2> operator<<=(UInt2 &lhs, unsigned char rhs)
4944         {
4945                 return lhs = lhs << rhs;
4946         }
4947
4948         RValue<UInt2> operator>>=(UInt2 &lhs, unsigned char rhs)
4949         {
4950                 return lhs = lhs >> rhs;
4951         }
4952
4953 //      RValue<UInt2> operator+(RValue<UInt2> val)
4954 //      {
4955 //              return val;
4956 //      }
4957
4958 //      RValue<UInt2> operator-(RValue<UInt2> val)
4959 //      {
4960 //              return RValue<UInt2>(Nucleus::createNeg(val.value));
4961 //      }
4962
4963         RValue<UInt2> operator~(RValue<UInt2> val)
4964         {
4965                 return RValue<UInt2>(Nucleus::createNot(val.value));
4966         }
4967
4968         Type *UInt2::getType()
4969         {
4970                 return T(Type_v2i32);
4971         }
4972
4973         Int4::Int4(RValue<Byte4> cast)
4974         {
4975                 Value *x = Nucleus::createBitCast(cast.value, Int::getType());
4976                 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
4977
4978                 Value *e;
4979                 int swizzle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};
4980                 Value *b = Nucleus::createBitCast(a, Byte16::getType());
4981                 Value *c = Nucleus::createShuffleVector(b, V(Nucleus::createNullValue(Byte16::getType())), swizzle);
4982
4983                 int swizzle2[8] = {0, 8, 1, 9, 2, 10, 3, 11};
4984                 Value *d = Nucleus::createBitCast(c, Short8::getType());
4985                 e = Nucleus::createShuffleVector(d, V(Nucleus::createNullValue(Short8::getType())), swizzle2);
4986
4987                 Value *f = Nucleus::createBitCast(e, Int4::getType());
4988                 storeValue(f);
4989         }
4990
4991         Int4::Int4(RValue<SByte4> cast)
4992         {
4993                 Value *x = Nucleus::createBitCast(cast.value, Int::getType());
4994                 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
4995
4996                 Value *e;
4997                 int swizzle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7};
4998                 Value *b = Nucleus::createBitCast(a, Byte16::getType());
4999                 Value *c = Nucleus::createShuffleVector(b, b, swizzle);
5000
5001                 int swizzle2[8] = {0, 0, 1, 1, 2, 2, 3, 3};
5002                 Value *d = Nucleus::createBitCast(c, Short8::getType());
5003                 e = Nucleus::createShuffleVector(d, d, swizzle2);
5004
5005                 Value *f = Nucleus::createBitCast(e, Int4::getType());
5006                 Value *g = Nucleus::createAShr(f, V(::context->getConstantInt32(24)));
5007                 storeValue(g);
5008         }
5009
5010         Int4::Int4(RValue<Float4> cast)
5011         {
5012                 Value *xyzw = Nucleus::createFPToSI(cast.value, Int4::getType());
5013
5014                 storeValue(xyzw);
5015         }
5016
5017         Int4::Int4(RValue<Short4> cast)
5018         {
5019                 int swizzle[8] = {0, 0, 1, 1, 2, 2, 3, 3};
5020                 Value *c = Nucleus::createShuffleVector(cast.value, cast.value, swizzle);
5021                 Value *d = Nucleus::createBitCast(c, Int4::getType());
5022                 Value *e = Nucleus::createAShr(d, V(::context->getConstantInt32(16)));
5023                 storeValue(e);
5024         }
5025
5026         Int4::Int4(RValue<UShort4> cast)
5027         {
5028                 int swizzle[8] = {0, 8, 1, 9, 2, 10, 3, 11};
5029                 Value *c = Nucleus::createShuffleVector(cast.value, Short8(0, 0, 0, 0, 0, 0, 0, 0).loadValue(), swizzle);
5030                 Value *d = Nucleus::createBitCast(c, Int4::getType());
5031                 storeValue(d);
5032         }
5033
5034         Int4::Int4(int xyzw)
5035         {
5036                 constant(xyzw, xyzw, xyzw, xyzw);
5037         }
5038
5039         Int4::Int4(int x, int yzw)
5040         {
5041                 constant(x, yzw, yzw, yzw);
5042         }
5043
5044         Int4::Int4(int x, int y, int zw)
5045         {
5046                 constant(x, y, zw, zw);
5047         }
5048
5049         Int4::Int4(int x, int y, int z, int w)
5050         {
5051                 constant(x, y, z, w);
5052         }
5053
5054         void Int4::constant(int x, int y, int z, int w)
5055         {
5056                 int64_t constantVector[4] = {x, y, z, w};
5057                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
5058         }
5059
5060         Int4::Int4(RValue<Int4> rhs)
5061         {
5062                 storeValue(rhs.value);
5063         }
5064
5065         Int4::Int4(const Int4 &rhs)
5066         {
5067                 Value *value = rhs.loadValue();
5068                 storeValue(value);
5069         }
5070
5071         Int4::Int4(const Reference<Int4> &rhs)
5072         {
5073                 Value *value = rhs.loadValue();
5074                 storeValue(value);
5075         }
5076
5077         Int4::Int4(RValue<UInt4> rhs)
5078         {
5079                 storeValue(rhs.value);
5080         }
5081
5082         Int4::Int4(const UInt4 &rhs)
5083         {
5084                 Value *value = rhs.loadValue();
5085                 storeValue(value);
5086         }
5087
5088         Int4::Int4(const Reference<UInt4> &rhs)
5089         {
5090                 Value *value = rhs.loadValue();
5091                 storeValue(value);
5092         }
5093
5094         Int4::Int4(RValue<Int2> lo, RValue<Int2> hi)
5095         {
5096                 int shuffle[4] = {0, 1, 4, 5};   // Real type is v4i32
5097                 Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
5098
5099                 storeValue(packed);
5100         }
5101
5102         Int4::Int4(RValue<Int> rhs)
5103         {
5104                 Value *vector = Nucleus::createBitCast(rhs.value, Int4::getType());
5105
5106                 int swizzle[4] = {0, 0, 0, 0};
5107                 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
5108
5109                 storeValue(replicate);
5110         }
5111
5112         Int4::Int4(const Int &rhs)
5113         {
5114                 *this = RValue<Int>(rhs.loadValue());
5115         }
5116
5117         Int4::Int4(const Reference<Int> &rhs)
5118         {
5119                 *this = RValue<Int>(rhs.loadValue());
5120         }
5121
5122         RValue<Int4> Int4::operator=(RValue<Int4> rhs)
5123         {
5124                 storeValue(rhs.value);
5125
5126                 return rhs;
5127         }
5128
5129         RValue<Int4> Int4::operator=(const Int4 &rhs)
5130         {
5131                 Value *value = rhs.loadValue();
5132                 storeValue(value);
5133
5134                 return RValue<Int4>(value);
5135         }
5136
5137         RValue<Int4> Int4::operator=(const Reference<Int4> &rhs)
5138         {
5139                 Value *value = rhs.loadValue();
5140                 storeValue(value);
5141
5142                 return RValue<Int4>(value);
5143         }
5144
5145         RValue<Int4> operator+(RValue<Int4> lhs, RValue<Int4> rhs)
5146         {
5147                 return RValue<Int4>(Nucleus::createAdd(lhs.value, rhs.value));
5148         }
5149
5150         RValue<Int4> operator-(RValue<Int4> lhs, RValue<Int4> rhs)
5151         {
5152                 return RValue<Int4>(Nucleus::createSub(lhs.value, rhs.value));
5153         }
5154
5155         RValue<Int4> operator*(RValue<Int4> lhs, RValue<Int4> rhs)
5156         {
5157                 return RValue<Int4>(Nucleus::createMul(lhs.value, rhs.value));
5158         }
5159
5160         RValue<Int4> operator/(RValue<Int4> lhs, RValue<Int4> rhs)
5161         {
5162                 return RValue<Int4>(Nucleus::createSDiv(lhs.value, rhs.value));
5163         }
5164
5165         RValue<Int4> operator%(RValue<Int4> lhs, RValue<Int4> rhs)
5166         {
5167                 return RValue<Int4>(Nucleus::createSRem(lhs.value, rhs.value));
5168         }
5169
5170         RValue<Int4> operator&(RValue<Int4> lhs, RValue<Int4> rhs)
5171         {
5172                 return RValue<Int4>(Nucleus::createAnd(lhs.value, rhs.value));
5173         }
5174
5175         RValue<Int4> operator|(RValue<Int4> lhs, RValue<Int4> rhs)
5176         {
5177                 return RValue<Int4>(Nucleus::createOr(lhs.value, rhs.value));
5178         }
5179
5180         RValue<Int4> operator^(RValue<Int4> lhs, RValue<Int4> rhs)
5181         {
5182                 return RValue<Int4>(Nucleus::createXor(lhs.value, rhs.value));
5183         }
5184
5185         RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
5186         {
5187                 return RValue<Int4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
5188         }
5189
5190         RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
5191         {
5192                 return RValue<Int4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
5193         }
5194
5195         RValue<Int4> operator<<(RValue<Int4> lhs, RValue<Int4> rhs)
5196         {
5197                 return RValue<Int4>(Nucleus::createShl(lhs.value, rhs.value));
5198         }
5199
5200         RValue<Int4> operator>>(RValue<Int4> lhs, RValue<Int4> rhs)
5201         {
5202                 return RValue<Int4>(Nucleus::createAShr(lhs.value, rhs.value));
5203         }
5204
5205         RValue<Int4> operator+=(Int4 &lhs, RValue<Int4> rhs)
5206         {
5207                 return lhs = lhs + rhs;
5208         }
5209
5210         RValue<Int4> operator-=(Int4 &lhs, RValue<Int4> rhs)
5211         {
5212                 return lhs = lhs - rhs;
5213         }
5214
5215         RValue<Int4> operator*=(Int4 &lhs, RValue<Int4> rhs)
5216         {
5217                 return lhs = lhs * rhs;
5218         }
5219
5220 //      RValue<Int4> operator/=(Int4 &lhs, RValue<Int4> rhs)
5221 //      {
5222 //              return lhs = lhs / rhs;
5223 //      }
5224
5225 //      RValue<Int4> operator%=(Int4 &lhs, RValue<Int4> rhs)
5226 //      {
5227 //              return lhs = lhs % rhs;
5228 //      }
5229
5230         RValue<Int4> operator&=(Int4 &lhs, RValue<Int4> rhs)
5231         {
5232                 return lhs = lhs & rhs;
5233         }
5234
5235         RValue<Int4> operator|=(Int4 &lhs, RValue<Int4> rhs)
5236         {
5237                 return lhs = lhs | rhs;
5238         }
5239
5240         RValue<Int4> operator^=(Int4 &lhs, RValue<Int4> rhs)
5241         {
5242                 return lhs = lhs ^ rhs;
5243         }
5244
5245         RValue<Int4> operator<<=(Int4 &lhs, unsigned char rhs)
5246         {
5247                 return lhs = lhs << rhs;
5248         }
5249
5250         RValue<Int4> operator>>=(Int4 &lhs, unsigned char rhs)
5251         {
5252                 return lhs = lhs >> rhs;
5253         }
5254
5255         RValue<Int4> operator+(RValue<Int4> val)
5256         {
5257                 return val;
5258         }
5259
5260         RValue<Int4> operator-(RValue<Int4> val)
5261         {
5262                 return RValue<Int4>(Nucleus::createNeg(val.value));
5263         }
5264
5265         RValue<Int4> operator~(RValue<Int4> val)
5266         {
5267                 return RValue<Int4>(Nucleus::createNot(val.value));
5268         }
5269
5270         RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
5271         {
5272                 return RValue<Int4>(Nucleus::createICmpEQ(x.value, y.value));
5273         }
5274
5275         RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
5276         {
5277                 return RValue<Int4>(Nucleus::createICmpSLT(x.value, y.value));
5278         }
5279
5280         RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
5281         {
5282                 return RValue<Int4>(Nucleus::createICmpSLE(x.value, y.value));
5283         }
5284
5285         RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
5286         {
5287                 return RValue<Int4>(Nucleus::createICmpNE(x.value, y.value));
5288         }
5289
5290         RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
5291         {
5292                 return RValue<Int4>(Nucleus::createICmpSGE(x.value, y.value));
5293         }
5294
5295         RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
5296         {
5297                 return RValue<Int4>(Nucleus::createICmpSGT(x.value, y.value));
5298         }
5299
5300         RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
5301         {
5302                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
5303                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value, y.value);
5304                 ::basicBlock->appendInst(cmp);
5305
5306                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
5307                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
5308                 ::basicBlock->appendInst(select);
5309
5310                 return RValue<Int4>(V(result));
5311         }
5312
5313         RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
5314         {
5315                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
5316                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value, y.value);
5317                 ::basicBlock->appendInst(cmp);
5318
5319                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
5320                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
5321                 ::basicBlock->appendInst(select);
5322
5323                 return RValue<Int4>(V(result));
5324         }
5325
5326         RValue<Int4> RoundInt(RValue<Float4> cast)
5327         {
5328                 if(emulateIntrinsics)
5329                 {
5330                         // Push the fractional part off the mantissa. Accurate up to +/-2^22.
5331                         return Int4((cast + Float4(0x00C00000)) - Float4(0x00C00000));
5332                 }
5333                 else
5334                 {
5335                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
5336                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
5337                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
5338                         auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
5339                         nearbyint->addArg(cast.value);
5340                         ::basicBlock->appendInst(nearbyint);
5341
5342                         return RValue<Int4>(V(result));
5343                 }
5344         }
5345
5346         RValue<Short8> Pack(RValue<Int4> x, RValue<Int4> y)
5347         {
5348                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
5349                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
5350                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
5351                 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
5352                 pack->addArg(x.value);
5353                 pack->addArg(y.value);
5354                 ::basicBlock->appendInst(pack);
5355
5356                 return RValue<Short8>(V(result));
5357         }
5358
5359         RValue<Int> Extract(RValue<Int4> x, int i)
5360         {
5361                 return RValue<Int>(Nucleus::createExtractElement(x.value, Int::getType(), i));
5362         }
5363
5364         RValue<Int4> Insert(RValue<Int4> x, RValue<Int> element, int i)
5365         {
5366                 return RValue<Int4>(Nucleus::createInsertElement(x.value, element.value, i));
5367         }
5368
5369         RValue<Int> SignMask(RValue<Int4> x)
5370         {
5371                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
5372                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
5373                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
5374                 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
5375                 movmsk->addArg(x.value);
5376                 ::basicBlock->appendInst(movmsk);
5377
5378                 return RValue<Int>(V(result));
5379         }
5380
5381         RValue<Int4> Swizzle(RValue<Int4> x, unsigned char select)
5382         {
5383                 return RValue<Int4>(createSwizzle4(x.value, select));
5384         }
5385
5386         Type *Int4::getType()
5387         {
5388                 return T(Ice::IceType_v4i32);
5389         }
5390
5391         UInt4::UInt4(RValue<Float4> cast)
5392         {
5393                 // Smallest positive value representable in UInt, but not in Int
5394                 const unsigned int ustart = 0x80000000u;
5395                 const float ustartf = float(ustart);
5396
5397                 // Check if the value can be represented as an Int
5398                 Int4 uiValue = CmpNLT(cast, Float4(ustartf));
5399                 // If the value is too large, subtract ustart and re-add it after conversion.
5400                 uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) |
5401                 // Otherwise, just convert normally
5402                           (~uiValue & Int4(cast));
5403                 // If the value is negative, store 0, otherwise store the result of the conversion
5404                 storeValue((~(As<Int4>(cast) >> 31) & uiValue).value);
5405         }
5406
5407         UInt4::UInt4(int xyzw)
5408         {
5409                 constant(xyzw, xyzw, xyzw, xyzw);
5410         }
5411
5412         UInt4::UInt4(int x, int yzw)
5413         {
5414                 constant(x, yzw, yzw, yzw);
5415         }
5416
5417         UInt4::UInt4(int x, int y, int zw)
5418         {
5419                 constant(x, y, zw, zw);
5420         }
5421
5422         UInt4::UInt4(int x, int y, int z, int w)
5423         {
5424                 constant(x, y, z, w);
5425         }
5426
5427         void UInt4::constant(int x, int y, int z, int w)
5428         {
5429                 int64_t constantVector[4] = {x, y, z, w};
5430                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
5431         }
5432
5433         UInt4::UInt4(RValue<UInt4> rhs)
5434         {
5435                 storeValue(rhs.value);
5436         }
5437
5438         UInt4::UInt4(const UInt4 &rhs)
5439         {
5440                 Value *value = rhs.loadValue();
5441                 storeValue(value);
5442         }
5443
5444         UInt4::UInt4(const Reference<UInt4> &rhs)
5445         {
5446                 Value *value = rhs.loadValue();
5447                 storeValue(value);
5448         }
5449
5450         UInt4::UInt4(RValue<Int4> rhs)
5451         {
5452                 storeValue(rhs.value);
5453         }
5454
5455         UInt4::UInt4(const Int4 &rhs)
5456         {
5457                 Value *value = rhs.loadValue();
5458                 storeValue(value);
5459         }
5460
5461         UInt4::UInt4(const Reference<Int4> &rhs)
5462         {
5463                 Value *value = rhs.loadValue();
5464                 storeValue(value);
5465         }
5466
5467         UInt4::UInt4(RValue<UInt2> lo, RValue<UInt2> hi)
5468         {
5469                 int shuffle[4] = {0, 1, 4, 5};   // Real type is v4i32
5470                 Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
5471
5472                 storeValue(packed);
5473         }
5474
5475         RValue<UInt4> UInt4::operator=(RValue<UInt4> rhs)
5476         {
5477                 storeValue(rhs.value);
5478
5479                 return rhs;
5480         }
5481
5482         RValue<UInt4> UInt4::operator=(const UInt4 &rhs)
5483         {
5484                 Value *value = rhs.loadValue();
5485                 storeValue(value);
5486
5487                 return RValue<UInt4>(value);
5488         }
5489
5490         RValue<UInt4> UInt4::operator=(const Reference<UInt4> &rhs)
5491         {
5492                 Value *value = rhs.loadValue();
5493                 storeValue(value);
5494
5495                 return RValue<UInt4>(value);
5496         }
5497
5498         RValue<UInt4> operator+(RValue<UInt4> lhs, RValue<UInt4> rhs)
5499         {
5500                 return RValue<UInt4>(Nucleus::createAdd(lhs.value, rhs.value));
5501         }
5502
5503         RValue<UInt4> operator-(RValue<UInt4> lhs, RValue<UInt4> rhs)
5504         {
5505                 return RValue<UInt4>(Nucleus::createSub(lhs.value, rhs.value));
5506         }
5507
5508         RValue<UInt4> operator*(RValue<UInt4> lhs, RValue<UInt4> rhs)
5509         {
5510                 return RValue<UInt4>(Nucleus::createMul(lhs.value, rhs.value));
5511         }
5512
5513         RValue<UInt4> operator/(RValue<UInt4> lhs, RValue<UInt4> rhs)
5514         {
5515                 return RValue<UInt4>(Nucleus::createUDiv(lhs.value, rhs.value));
5516         }
5517
5518         RValue<UInt4> operator%(RValue<UInt4> lhs, RValue<UInt4> rhs)
5519         {
5520                 return RValue<UInt4>(Nucleus::createURem(lhs.value, rhs.value));
5521         }
5522
5523         RValue<UInt4> operator&(RValue<UInt4> lhs, RValue<UInt4> rhs)
5524         {
5525                 return RValue<UInt4>(Nucleus::createAnd(lhs.value, rhs.value));
5526         }
5527
5528         RValue<UInt4> operator|(RValue<UInt4> lhs, RValue<UInt4> rhs)
5529         {
5530                 return RValue<UInt4>(Nucleus::createOr(lhs.value, rhs.value));
5531         }
5532
5533         RValue<UInt4> operator^(RValue<UInt4> lhs, RValue<UInt4> rhs)
5534         {
5535                 return RValue<UInt4>(Nucleus::createXor(lhs.value, rhs.value));
5536         }
5537
5538         RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
5539         {
5540                 return RValue<UInt4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
5541         }
5542
5543         RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
5544         {
5545                 return RValue<UInt4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
5546         }
5547
5548         RValue<UInt4> operator<<(RValue<UInt4> lhs, RValue<UInt4> rhs)
5549         {
5550                 return RValue<UInt4>(Nucleus::createShl(lhs.value, rhs.value));
5551         }
5552
5553         RValue<UInt4> operator>>(RValue<UInt4> lhs, RValue<UInt4> rhs)
5554         {
5555                 return RValue<UInt4>(Nucleus::createLShr(lhs.value, rhs.value));
5556         }
5557
5558         RValue<UInt4> operator+=(UInt4 &lhs, RValue<UInt4> rhs)
5559         {
5560                 return lhs = lhs + rhs;
5561         }
5562
5563         RValue<UInt4> operator-=(UInt4 &lhs, RValue<UInt4> rhs)
5564         {
5565                 return lhs = lhs - rhs;
5566         }
5567
5568         RValue<UInt4> operator*=(UInt4 &lhs, RValue<UInt4> rhs)
5569         {
5570                 return lhs = lhs * rhs;
5571         }
5572
5573 //      RValue<UInt4> operator/=(UInt4 &lhs, RValue<UInt4> rhs)
5574 //      {
5575 //              return lhs = lhs / rhs;
5576 //      }
5577
5578 //      RValue<UInt4> operator%=(UInt4 &lhs, RValue<UInt4> rhs)
5579 //      {
5580 //              return lhs = lhs % rhs;
5581 //      }
5582
5583         RValue<UInt4> operator&=(UInt4 &lhs, RValue<UInt4> rhs)
5584         {
5585                 return lhs = lhs & rhs;
5586         }
5587
5588         RValue<UInt4> operator|=(UInt4 &lhs, RValue<UInt4> rhs)
5589         {
5590                 return lhs = lhs | rhs;
5591         }
5592
5593         RValue<UInt4> operator^=(UInt4 &lhs, RValue<UInt4> rhs)
5594         {
5595                 return lhs = lhs ^ rhs;
5596         }
5597
5598         RValue<UInt4> operator<<=(UInt4 &lhs, unsigned char rhs)
5599         {
5600                 return lhs = lhs << rhs;
5601         }
5602
5603         RValue<UInt4> operator>>=(UInt4 &lhs, unsigned char rhs)
5604         {
5605                 return lhs = lhs >> rhs;
5606         }
5607
5608         RValue<UInt4> operator+(RValue<UInt4> val)
5609         {
5610                 return val;
5611         }
5612
5613         RValue<UInt4> operator-(RValue<UInt4> val)
5614         {
5615                 return RValue<UInt4>(Nucleus::createNeg(val.value));
5616         }
5617
5618         RValue<UInt4> operator~(RValue<UInt4> val)
5619         {
5620                 return RValue<UInt4>(Nucleus::createNot(val.value));
5621         }
5622
5623         RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
5624         {
5625                 return RValue<UInt4>(Nucleus::createICmpEQ(x.value, y.value));
5626         }
5627
5628         RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
5629         {
5630                 return RValue<UInt4>(Nucleus::createICmpULT(x.value, y.value));
5631         }
5632
5633         RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
5634         {
5635                 return RValue<UInt4>(Nucleus::createICmpULE(x.value, y.value));
5636         }
5637
5638         RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
5639         {
5640                 return RValue<UInt4>(Nucleus::createICmpNE(x.value, y.value));
5641         }
5642
5643         RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
5644         {
5645                 return RValue<UInt4>(Nucleus::createICmpUGE(x.value, y.value));
5646         }
5647
5648         RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
5649         {
5650                 return RValue<UInt4>(Nucleus::createICmpUGT(x.value, y.value));
5651         }
5652
5653         RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
5654         {
5655                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
5656                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value, y.value);
5657                 ::basicBlock->appendInst(cmp);
5658
5659                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
5660                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
5661                 ::basicBlock->appendInst(select);
5662
5663                 return RValue<UInt4>(V(result));
5664         }
5665
5666         RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
5667         {
5668                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
5669                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value, y.value);
5670                 ::basicBlock->appendInst(cmp);
5671
5672                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
5673                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
5674                 ::basicBlock->appendInst(select);
5675
5676                 return RValue<UInt4>(V(result));
5677         }
5678
5679         RValue<UShort8> Pack(RValue<UInt4> x, RValue<UInt4> y)
5680         {
5681                 if(CPUID::SSE4_1)
5682                 {
5683                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
5684                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
5685                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
5686                         auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
5687                         pack->addArg(x.value);
5688                         pack->addArg(y.value);
5689                         ::basicBlock->appendInst(pack);
5690
5691                         return RValue<UShort8>(V(result));
5692                 }
5693                 else
5694                 {
5695                         RValue<Int4> sx = As<Int4>(x);
5696                         RValue<Int4> bx = (sx & ~(sx >> 31)) - Int4(0x8000);
5697
5698                         RValue<Int4> sy = As<Int4>(y);
5699                         RValue<Int4> by = (sy & ~(sy >> 31)) - Int4(0x8000);
5700
5701                         return As<UShort8>(Pack(bx, by) + Short8(0x8000u));
5702                 }
5703         }
5704
5705         Type *UInt4::getType()
5706         {
5707                 return T(Ice::IceType_v4i32);
5708         }
5709
5710         Float::Float(RValue<Int> cast)
5711         {
5712                 Value *integer = Nucleus::createSIToFP(cast.value, Float::getType());
5713
5714                 storeValue(integer);
5715         }
5716
5717         Float::Float(float x)
5718         {
5719                 storeValue(Nucleus::createConstantFloat(x));
5720         }
5721
5722         Float::Float(RValue<Float> rhs)
5723         {
5724                 storeValue(rhs.value);
5725         }
5726
5727         Float::Float(const Float &rhs)
5728         {
5729                 Value *value = rhs.loadValue();
5730                 storeValue(value);
5731         }
5732
5733         Float::Float(const Reference<Float> &rhs)
5734         {
5735                 Value *value = rhs.loadValue();
5736                 storeValue(value);
5737         }
5738
5739         RValue<Float> Float::operator=(RValue<Float> rhs)
5740         {
5741                 storeValue(rhs.value);
5742
5743                 return rhs;
5744         }
5745
5746         RValue<Float> Float::operator=(const Float &rhs)
5747         {
5748                 Value *value = rhs.loadValue();
5749                 storeValue(value);
5750
5751                 return RValue<Float>(value);
5752         }
5753
5754         RValue<Float> Float::operator=(const Reference<Float> &rhs)
5755         {
5756                 Value *value = rhs.loadValue();
5757                 storeValue(value);
5758
5759                 return RValue<Float>(value);
5760         }
5761
5762         RValue<Float> operator+(RValue<Float> lhs, RValue<Float> rhs)
5763         {
5764                 return RValue<Float>(Nucleus::createFAdd(lhs.value, rhs.value));
5765         }
5766
5767         RValue<Float> operator-(RValue<Float> lhs, RValue<Float> rhs)
5768         {
5769                 return RValue<Float>(Nucleus::createFSub(lhs.value, rhs.value));
5770         }
5771
5772         RValue<Float> operator*(RValue<Float> lhs, RValue<Float> rhs)
5773         {
5774                 return RValue<Float>(Nucleus::createFMul(lhs.value, rhs.value));
5775         }
5776
5777         RValue<Float> operator/(RValue<Float> lhs, RValue<Float> rhs)
5778         {
5779                 return RValue<Float>(Nucleus::createFDiv(lhs.value, rhs.value));
5780         }
5781
5782         RValue<Float> operator+=(Float &lhs, RValue<Float> rhs)
5783         {
5784                 return lhs = lhs + rhs;
5785         }
5786
5787         RValue<Float> operator-=(Float &lhs, RValue<Float> rhs)
5788         {
5789                 return lhs = lhs - rhs;
5790         }
5791
5792         RValue<Float> operator*=(Float &lhs, RValue<Float> rhs)
5793         {
5794                 return lhs = lhs * rhs;
5795         }
5796
5797         RValue<Float> operator/=(Float &lhs, RValue<Float> rhs)
5798         {
5799                 return lhs = lhs / rhs;
5800         }
5801
5802         RValue<Float> operator+(RValue<Float> val)
5803         {
5804                 return val;
5805         }
5806
5807         RValue<Float> operator-(RValue<Float> val)
5808         {
5809                 return RValue<Float>(Nucleus::createFNeg(val.value));
5810         }
5811
5812         RValue<Bool> operator<(RValue<Float> lhs, RValue<Float> rhs)
5813         {
5814                 return RValue<Bool>(Nucleus::createFCmpOLT(lhs.value, rhs.value));
5815         }
5816
5817         RValue<Bool> operator<=(RValue<Float> lhs, RValue<Float> rhs)
5818         {
5819                 return RValue<Bool>(Nucleus::createFCmpOLE(lhs.value, rhs.value));
5820         }
5821
5822         RValue<Bool> operator>(RValue<Float> lhs, RValue<Float> rhs)
5823         {
5824                 return RValue<Bool>(Nucleus::createFCmpOGT(lhs.value, rhs.value));
5825         }
5826
5827         RValue<Bool> operator>=(RValue<Float> lhs, RValue<Float> rhs)
5828         {
5829                 return RValue<Bool>(Nucleus::createFCmpOGE(lhs.value, rhs.value));
5830         }
5831
5832         RValue<Bool> operator!=(RValue<Float> lhs, RValue<Float> rhs)
5833         {
5834                 return RValue<Bool>(Nucleus::createFCmpONE(lhs.value, rhs.value));
5835         }
5836
5837         RValue<Bool> operator==(RValue<Float> lhs, RValue<Float> rhs)
5838         {
5839                 return RValue<Bool>(Nucleus::createFCmpOEQ(lhs.value, rhs.value));
5840         }
5841
5842         RValue<Float> Abs(RValue<Float> x)
5843         {
5844                 return IfThenElse(x > 0.0f, x, -x);
5845         }
5846
5847         RValue<Float> Max(RValue<Float> x, RValue<Float> y)
5848         {
5849                 return IfThenElse(x > y, x, y);
5850         }
5851
5852         RValue<Float> Min(RValue<Float> x, RValue<Float> y)
5853         {
5854                 return IfThenElse(x < y, x, y);
5855         }
5856
5857         RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
5858         {
5859                 return 1.0f / x;
5860         }
5861
5862         RValue<Float> RcpSqrt_pp(RValue<Float> x)
5863         {
5864                 return Rcp_pp(Sqrt(x));
5865         }
5866
5867         RValue<Float> Sqrt(RValue<Float> x)
5868         {
5869                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_f32);
5870                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
5871                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
5872                 auto sqrt = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
5873                 sqrt->addArg(x.value);
5874                 ::basicBlock->appendInst(sqrt);
5875
5876                 return RValue<Float>(V(result));
5877         }
5878
5879         RValue<Float> Round(RValue<Float> x)
5880         {
5881                 return Float4(Round(Float4(x))).x;
5882         }
5883
5884         RValue<Float> Trunc(RValue<Float> x)
5885         {
5886                 return Float4(Trunc(Float4(x))).x;
5887         }
5888
5889         RValue<Float> Frac(RValue<Float> x)
5890         {
5891                 return Float4(Frac(Float4(x))).x;
5892         }
5893
5894         RValue<Float> Floor(RValue<Float> x)
5895         {
5896                 return Float4(Floor(Float4(x))).x;
5897         }
5898
5899         RValue<Float> Ceil(RValue<Float> x)
5900         {
5901                 return Float4(Ceil(Float4(x))).x;
5902         }
5903
5904         Type *Float::getType()
5905         {
5906                 return T(Ice::IceType_f32);
5907         }
5908
5909         Float2::Float2(RValue<Float4> cast)
5910         {
5911                 storeValue(Nucleus::createBitCast(cast.value, getType()));
5912         }
5913
5914         Type *Float2::getType()
5915         {
5916                 return T(Type_v2f32);
5917         }
5918
5919         Float4::Float4(RValue<Byte4> cast) : FloatXYZW(this)
5920         {
5921                 Value *a = Int4(cast).loadValue();
5922                 Value *xyzw = Nucleus::createSIToFP(a, Float4::getType());
5923
5924                 storeValue(xyzw);
5925         }
5926
5927         Float4::Float4(RValue<SByte4> cast) : FloatXYZW(this)
5928         {
5929                 Value *a = Int4(cast).loadValue();
5930                 Value *xyzw = Nucleus::createSIToFP(a, Float4::getType());
5931
5932                 storeValue(xyzw);
5933         }
5934
5935         Float4::Float4(RValue<Short4> cast) : FloatXYZW(this)
5936         {
5937                 Int4 c(cast);
5938                 storeValue(Nucleus::createSIToFP(RValue<Int4>(c).value, Float4::getType()));
5939         }
5940
5941         Float4::Float4(RValue<UShort4> cast) : FloatXYZW(this)
5942         {
5943                 Int4 c(cast);
5944                 storeValue(Nucleus::createSIToFP(RValue<Int4>(c).value, Float4::getType()));
5945         }
5946
5947         Float4::Float4(RValue<Int4> cast) : FloatXYZW(this)
5948         {
5949                 Value *xyzw = Nucleus::createSIToFP(cast.value, Float4::getType());
5950
5951                 storeValue(xyzw);
5952         }
5953
5954         Float4::Float4(RValue<UInt4> cast) : FloatXYZW(this)
5955         {
5956                 RValue<Float4> result = Float4(Int4(cast & UInt4(0x7FFFFFFF))) +
5957                                         As<Float4>((As<Int4>(cast) >> 31) & As<Int4>(Float4(0x80000000u)));
5958
5959                 storeValue(result.value);
5960         }
5961
5962         Float4::Float4() : FloatXYZW(this)
5963         {
5964         }
5965
5966         Float4::Float4(float xyzw) : FloatXYZW(this)
5967         {
5968                 constant(xyzw, xyzw, xyzw, xyzw);
5969         }
5970
5971         Float4::Float4(float x, float yzw) : FloatXYZW(this)
5972         {
5973                 constant(x, yzw, yzw, yzw);
5974         }
5975
5976         Float4::Float4(float x, float y, float zw) : FloatXYZW(this)
5977         {
5978                 constant(x, y, zw, zw);
5979         }
5980
5981         Float4::Float4(float x, float y, float z, float w) : FloatXYZW(this)
5982         {
5983                 constant(x, y, z, w);
5984         }
5985
5986         void Float4::constant(float x, float y, float z, float w)
5987         {
5988                 double constantVector[4] = {x, y, z, w};
5989                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
5990         }
5991
5992         Float4::Float4(RValue<Float4> rhs) : FloatXYZW(this)
5993         {
5994                 storeValue(rhs.value);
5995         }
5996
5997         Float4::Float4(const Float4 &rhs) : FloatXYZW(this)
5998         {
5999                 Value *value = rhs.loadValue();
6000                 storeValue(value);
6001         }
6002
6003         Float4::Float4(const Reference<Float4> &rhs) : FloatXYZW(this)
6004         {
6005                 Value *value = rhs.loadValue();
6006                 storeValue(value);
6007         }
6008
6009         Float4::Float4(RValue<Float> rhs) : FloatXYZW(this)
6010         {
6011                 Value *vector = Nucleus::createBitCast(rhs.value, Float4::getType());
6012
6013                 int swizzle[4] = {0, 0, 0, 0};
6014                 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
6015
6016                 storeValue(replicate);
6017         }
6018
6019         Float4::Float4(const Float &rhs) : FloatXYZW(this)
6020         {
6021                 *this = RValue<Float>(rhs.loadValue());
6022         }
6023
6024         Float4::Float4(const Reference<Float> &rhs) : FloatXYZW(this)
6025         {
6026                 *this = RValue<Float>(rhs.loadValue());
6027         }
6028
6029         RValue<Float4> Float4::operator=(float x)
6030         {
6031                 return *this = Float4(x, x, x, x);
6032         }
6033
6034         RValue<Float4> Float4::operator=(RValue<Float4> rhs)
6035         {
6036                 storeValue(rhs.value);
6037
6038                 return rhs;
6039         }
6040
6041         RValue<Float4> Float4::operator=(const Float4 &rhs)
6042         {
6043                 Value *value = rhs.loadValue();
6044                 storeValue(value);
6045
6046                 return RValue<Float4>(value);
6047         }
6048
6049         RValue<Float4> Float4::operator=(const Reference<Float4> &rhs)
6050         {
6051                 Value *value = rhs.loadValue();
6052                 storeValue(value);
6053
6054                 return RValue<Float4>(value);
6055         }
6056
6057         RValue<Float4> Float4::operator=(RValue<Float> rhs)
6058         {
6059                 return *this = Float4(rhs);
6060         }
6061
6062         RValue<Float4> Float4::operator=(const Float &rhs)
6063         {
6064                 return *this = Float4(rhs);
6065         }
6066
6067         RValue<Float4> Float4::operator=(const Reference<Float> &rhs)
6068         {
6069                 return *this = Float4(rhs);
6070         }
6071
6072         RValue<Float4> operator+(RValue<Float4> lhs, RValue<Float4> rhs)
6073         {
6074                 return RValue<Float4>(Nucleus::createFAdd(lhs.value, rhs.value));
6075         }
6076
6077         RValue<Float4> operator-(RValue<Float4> lhs, RValue<Float4> rhs)
6078         {
6079                 return RValue<Float4>(Nucleus::createFSub(lhs.value, rhs.value));
6080         }
6081
6082         RValue<Float4> operator*(RValue<Float4> lhs, RValue<Float4> rhs)
6083         {
6084                 return RValue<Float4>(Nucleus::createFMul(lhs.value, rhs.value));
6085         }
6086
6087         RValue<Float4> operator/(RValue<Float4> lhs, RValue<Float4> rhs)
6088         {
6089                 return RValue<Float4>(Nucleus::createFDiv(lhs.value, rhs.value));
6090         }
6091
6092         RValue<Float4> operator%(RValue<Float4> lhs, RValue<Float4> rhs)
6093         {
6094                 return RValue<Float4>(Nucleus::createFRem(lhs.value, rhs.value));
6095         }
6096
6097         RValue<Float4> operator+=(Float4 &lhs, RValue<Float4> rhs)
6098         {
6099                 return lhs = lhs + rhs;
6100         }
6101
6102         RValue<Float4> operator-=(Float4 &lhs, RValue<Float4> rhs)
6103         {
6104                 return lhs = lhs - rhs;
6105         }
6106
6107         RValue<Float4> operator*=(Float4 &lhs, RValue<Float4> rhs)
6108         {
6109                 return lhs = lhs * rhs;
6110         }
6111
6112         RValue<Float4> operator/=(Float4 &lhs, RValue<Float4> rhs)
6113         {
6114                 return lhs = lhs / rhs;
6115         }
6116
6117         RValue<Float4> operator%=(Float4 &lhs, RValue<Float4> rhs)
6118         {
6119                 return lhs = lhs % rhs;
6120         }
6121
6122         RValue<Float4> operator+(RValue<Float4> val)
6123         {
6124                 return val;
6125         }
6126
6127         RValue<Float4> operator-(RValue<Float4> val)
6128         {
6129                 return RValue<Float4>(Nucleus::createFNeg(val.value));
6130         }
6131
6132         RValue<Float4> Abs(RValue<Float4> x)
6133         {
6134                 Value *vector = Nucleus::createBitCast(x.value, Int4::getType());
6135                 int64_t constantVector[4] = {0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF};
6136                 Value *result = Nucleus::createAnd(vector, V(Nucleus::createConstantVector(constantVector, Int4::getType())));
6137
6138                 return As<Float4>(result);
6139         }
6140
6141         RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
6142         {
6143                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
6144                 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Ogt, condition, x.value, y.value);
6145                 ::basicBlock->appendInst(cmp);
6146
6147                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
6148                 auto select = Ice::InstSelect::create(::function, result, condition, x.value, y.value);
6149                 ::basicBlock->appendInst(select);
6150
6151                 return RValue<Float4>(V(result));
6152         }
6153
6154         RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
6155         {
6156                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
6157                 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Olt, condition, x.value, y.value);
6158                 ::basicBlock->appendInst(cmp);
6159
6160                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
6161                 auto select = Ice::InstSelect::create(::function, result, condition, x.value, y.value);
6162                 ::basicBlock->appendInst(select);
6163
6164                 return RValue<Float4>(V(result));
6165         }
6166
6167         RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
6168         {
6169                 return Float4(1.0f) / x;
6170         }
6171
6172         RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
6173         {
6174                 return Rcp_pp(Sqrt(x));
6175         }
6176
6177         RValue<Float4> Sqrt(RValue<Float4> x)
6178         {
6179                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
6180                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
6181                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
6182                 auto sqrt = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
6183                 sqrt->addArg(x.value);
6184                 ::basicBlock->appendInst(sqrt);
6185
6186                 return RValue<Float4>(V(result));
6187         }
6188
6189         RValue<Float4> Insert(RValue<Float4> x, RValue<Float> element, int i)
6190         {
6191                 return RValue<Float4>(Nucleus::createInsertElement(x.value, element.value, i));
6192         }
6193
6194         RValue<Float> Extract(RValue<Float4> x, int i)
6195         {
6196                 return RValue<Float>(Nucleus::createExtractElement(x.value, Float::getType(), i));
6197         }
6198
6199         RValue<Float4> Swizzle(RValue<Float4> x, unsigned char select)
6200         {
6201                 return RValue<Float4>(createSwizzle4(x.value, select));
6202         }
6203
6204         RValue<Float4> ShuffleLowHigh(RValue<Float4> x, RValue<Float4> y, unsigned char imm)
6205         {
6206                 int shuffle[4] =
6207                 {
6208                         ((imm >> 0) & 0x03) + 0,
6209                         ((imm >> 2) & 0x03) + 0,
6210                         ((imm >> 4) & 0x03) + 4,
6211                         ((imm >> 6) & 0x03) + 4,
6212                 };
6213
6214                 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
6215         }
6216
6217         RValue<Float4> UnpackLow(RValue<Float4> x, RValue<Float4> y)
6218         {
6219                 int shuffle[4] = {0, 4, 1, 5};
6220                 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
6221         }
6222
6223         RValue<Float4> UnpackHigh(RValue<Float4> x, RValue<Float4> y)
6224         {
6225                 int shuffle[4] = {2, 6, 3, 7};
6226                 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
6227         }
6228
6229         RValue<Float4> Mask(Float4 &lhs, RValue<Float4> rhs, unsigned char select)
6230         {
6231                 Value *vector = lhs.loadValue();
6232                 Value *result = createMask4(vector, rhs.value, select);
6233                 lhs.storeValue(result);
6234
6235                 return RValue<Float4>(result);
6236         }
6237
6238         RValue<Int> SignMask(RValue<Float4> x)
6239         {
6240                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
6241                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
6242                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
6243                 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
6244                 movmsk->addArg(x.value);
6245                 ::basicBlock->appendInst(movmsk);
6246
6247                 return RValue<Int>(V(result));
6248         }
6249
6250         RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
6251         {
6252                 return RValue<Int4>(Nucleus::createFCmpOEQ(x.value, y.value));
6253         }
6254
6255         RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
6256         {
6257                 return RValue<Int4>(Nucleus::createFCmpOLT(x.value, y.value));
6258         }
6259
6260         RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
6261         {
6262                 return RValue<Int4>(Nucleus::createFCmpOLE(x.value, y.value));
6263         }
6264
6265         RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
6266         {
6267                 return RValue<Int4>(Nucleus::createFCmpONE(x.value, y.value));
6268         }
6269
6270         RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
6271         {
6272                 return RValue<Int4>(Nucleus::createFCmpOGE(x.value, y.value));
6273         }
6274
6275         RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
6276         {
6277                 return RValue<Int4>(Nucleus::createFCmpOGT(x.value, y.value));
6278         }
6279
6280         RValue<Float4> Round(RValue<Float4> x)
6281         {
6282                 if(emulateIntrinsics)
6283                 {
6284                         // Push the fractional part off the mantissa. Accurate up to +/-2^22.
6285                         return (x + Float4(0x00C00000)) - Float4(0x00C00000);
6286                 }
6287                 else if(CPUID::SSE4_1)
6288                 {
6289                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
6290                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
6291                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
6292                         auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
6293                         round->addArg(x.value);
6294                         round->addArg(::context->getConstantInt32(0));
6295                         ::basicBlock->appendInst(round);
6296
6297                         return RValue<Float4>(V(result));
6298                 }
6299                 else
6300                 {
6301                         return Float4(RoundInt(x));
6302                 }
6303         }
6304
6305         RValue<Float4> Trunc(RValue<Float4> x)
6306         {
6307                 if(CPUID::SSE4_1)
6308                 {
6309                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
6310                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
6311                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
6312                         auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
6313                         round->addArg(x.value);
6314                         round->addArg(::context->getConstantInt32(3));
6315                         ::basicBlock->appendInst(round);
6316
6317                         return RValue<Float4>(V(result));
6318                 }
6319                 else
6320                 {
6321                         return Float4(Int4(x));
6322                 }
6323         }
6324
6325         RValue<Float4> Frac(RValue<Float4> x)
6326         {
6327                 if(CPUID::SSE4_1)
6328                 {
6329                         return x - Floor(x);
6330                 }
6331                 else
6332                 {
6333                         Float4 frc = x - Float4(Int4(x));   // Signed fractional part
6334
6335                         return frc + As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1, 1, 1, 1)));
6336                 }
6337         }
6338
6339         RValue<Float4> Floor(RValue<Float4> x)
6340         {
6341                 if(CPUID::SSE4_1)
6342                 {
6343                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
6344                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
6345                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
6346                         auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
6347                         round->addArg(x.value);
6348                         round->addArg(::context->getConstantInt32(1));
6349                         ::basicBlock->appendInst(round);
6350
6351                         return RValue<Float4>(V(result));
6352                 }
6353                 else
6354                 {
6355                         return x - Frac(x);
6356                 }
6357         }
6358
6359         RValue<Float4> Ceil(RValue<Float4> x)
6360         {
6361                 if(CPUID::SSE4_1)
6362                 {
6363                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
6364                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
6365                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
6366                         auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
6367                         round->addArg(x.value);
6368                         round->addArg(::context->getConstantInt32(2));
6369                         ::basicBlock->appendInst(round);
6370
6371                         return RValue<Float4>(V(result));
6372                 }
6373                 else
6374                 {
6375                         return -Floor(-x);
6376                 }
6377         }
6378
6379         Type *Float4::getType()
6380         {
6381                 return T(Ice::IceType_v4f32);
6382         }
6383
6384         RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, int offset)
6385         {
6386                 return lhs + RValue<Int>(Nucleus::createConstantInt(offset));
6387         }
6388
6389         RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<Int> offset)
6390         {
6391                 return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), offset.value, false));
6392         }
6393
6394         RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<UInt> offset)
6395         {
6396                 return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), offset.value, true));
6397         }
6398
6399         RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, int offset)
6400         {
6401                 return lhs = lhs + offset;
6402         }
6403
6404         RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, RValue<Int> offset)
6405         {
6406                 return lhs = lhs + offset;
6407         }
6408
6409         RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, RValue<UInt> offset)
6410         {
6411                 return lhs = lhs + offset;
6412         }
6413
6414         RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, int offset)
6415         {
6416                 return lhs + -offset;
6417         }
6418
6419         RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, RValue<Int> offset)
6420         {
6421                 return lhs + -offset;
6422         }
6423
6424         RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, RValue<UInt> offset)
6425         {
6426                 return lhs + -offset;
6427         }
6428
6429         RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, int offset)
6430         {
6431                 return lhs = lhs - offset;
6432         }
6433
6434         RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, RValue<Int> offset)
6435         {
6436                 return lhs = lhs - offset;
6437         }
6438
6439         RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, RValue<UInt> offset)
6440         {
6441                 return lhs = lhs - offset;
6442         }
6443
6444         void Return()
6445         {
6446                 Nucleus::createRetVoid();
6447                 Nucleus::setInsertBlock(Nucleus::createBasicBlock());
6448                 Nucleus::createUnreachable();
6449         }
6450
6451         void Return(RValue<Int> ret)
6452         {
6453                 Nucleus::createRet(ret.value);
6454                 Nucleus::setInsertBlock(Nucleus::createBasicBlock());
6455                 Nucleus::createUnreachable();
6456         }
6457
6458         bool branch(RValue<Bool> cmp, BasicBlock *bodyBB, BasicBlock *endBB)
6459         {
6460                 Nucleus::createCondBr(cmp.value, bodyBB, endBB);
6461                 Nucleus::setInsertBlock(bodyBB);
6462
6463                 return true;
6464         }
6465
6466         RValue<Long> Ticks()
6467         {
6468                 assert(false && "UNIMPLEMENTED"); return RValue<Long>(V(nullptr));
6469         }
6470 }