OSDN Git Service

Emulate MultiplyHigh intrinsics.
[android-x86/external-swiftshader.git] / src / Reactor / SubzeroReactor.cpp
1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "Nucleus.hpp"
16
17 #include "Reactor.hpp"
18 #include "Routine.hpp"
19
20 #include "Optimizer.hpp"
21
22 #include "src/IceTypes.h"
23 #include "src/IceCfg.h"
24 #include "src/IceELFStreamer.h"
25 #include "src/IceGlobalContext.h"
26 #include "src/IceCfgNode.h"
27 #include "src/IceELFObjectWriter.h"
28 #include "src/IceGlobalInits.h"
29
30 #include "llvm/Support/FileSystem.h"
31 #include "llvm/Support/raw_os_ostream.h"
32
33 #if defined(_WIN32)
34 #ifndef WIN32_LEAN_AND_MEAN
35 #define WIN32_LEAN_AND_MEAN
36 #endif // !WIN32_LEAN_AND_MEAN
37 #ifndef NOMINMAX
38 #define NOMINMAX
39 #endif // !NOMINMAX
40 #include <Windows.h>
41 #else
42 #include <sys/mman.h>
43 #if !defined(MAP_ANONYMOUS)
44 #define MAP_ANONYMOUS MAP_ANON
45 #endif
46 #endif
47
48 #include <mutex>
49 #include <limits>
50 #include <iostream>
51 #include <cassert>
52
53 namespace
54 {
55         Ice::GlobalContext *context = nullptr;
56         Ice::Cfg *function = nullptr;
57         Ice::CfgNode *basicBlock = nullptr;
58         Ice::CfgLocalAllocatorScope *allocator = nullptr;
59         sw::Routine *routine = nullptr;
60
61         std::mutex codegenMutex;
62
63         Ice::ELFFileStreamer *elfFile = nullptr;
64         Ice::Fdstream *out = nullptr;
65 }
66
67 namespace
68 {
69         #if !defined(__i386__) && defined(_M_IX86)
70                 #define __i386__ 1
71         #endif
72
73         #if !defined(__x86_64__) && (defined(_M_AMD64) || defined (_M_X64))
74                 #define __x86_64__ 1
75         #endif
76
77         class CPUID
78         {
79         public:
80                 const static bool ARM;
81                 const static bool SSE4_1;
82
83         private:
84                 static void cpuid(int registers[4], int info)
85                 {
86                         #if defined(__i386__) || defined(__x86_64__)
87                                 #if defined(_WIN32)
88                                         __cpuid(registers, info);
89                                 #else
90                                         __asm volatile("cpuid": "=a" (registers[0]), "=b" (registers[1]), "=c" (registers[2]), "=d" (registers[3]): "a" (info));
91                                 #endif
92                         #else
93                                 registers[0] = 0;
94                                 registers[1] = 0;
95                                 registers[2] = 0;
96                                 registers[3] = 0;
97                         #endif
98                 }
99
100                 static bool detectARM()
101                 {
102                         #if defined(__arm__)
103                                 return true;
104                         #elif defined(__i386__) || defined(__x86_64__)
105                                 return false;
106                         #else
107                                 #error "Unknown architecture"
108                         #endif
109                 }
110
111                 static bool detectSSE4_1()
112                 {
113                         #if defined(__i386__) || defined(__x86_64__)
114                                 int registers[4];
115                                 cpuid(registers, 1);
116                                 return (registers[2] & 0x00080000) != 0;
117                         #else
118                                 return false;
119                         #endif
120                 }
121         };
122
123         const bool CPUID::ARM = CPUID::detectARM();
124         const bool CPUID::SSE4_1 = CPUID::detectSSE4_1();
125         const bool emulateIntrinsics = CPUID::ARM;
126         const bool emulateMismatchedBitCast = CPUID::ARM;
127 }
128
129 namespace sw
130 {
131         enum EmulatedType
132         {
133                 EmulatedShift = 16,
134                 EmulatedV2 = 2 << EmulatedShift,
135                 EmulatedV4 = 4 << EmulatedShift,
136                 EmulatedV8 = 8 << EmulatedShift,
137                 EmulatedBits = EmulatedV2 | EmulatedV4 | EmulatedV8,
138
139                 Type_v2i32 = Ice::IceType_v4i32 | EmulatedV2,
140                 Type_v4i16 = Ice::IceType_v8i16 | EmulatedV4,
141                 Type_v2i16 = Ice::IceType_v8i16 | EmulatedV2,
142                 Type_v8i8 =  Ice::IceType_v16i8 | EmulatedV8,
143                 Type_v4i8 =  Ice::IceType_v16i8 | EmulatedV4,
144                 Type_v2f32 = Ice::IceType_v4f32 | EmulatedV2,
145         };
146
147         class Value : public Ice::Operand {};
148         class SwitchCases : public Ice::InstSwitch {};
149         class BasicBlock : public Ice::CfgNode {};
150
151         Ice::Type T(Type *t)
152         {
153                 static_assert(static_cast<unsigned int>(Ice::IceType_NUM) < static_cast<unsigned int>(EmulatedBits), "Ice::Type overlaps with our emulated types!");
154                 return (Ice::Type)(reinterpret_cast<std::intptr_t>(t) & ~EmulatedBits);
155         }
156
157         Type *T(Ice::Type t)
158         {
159                 return reinterpret_cast<Type*>(t);
160         }
161
162         Type *T(EmulatedType t)
163         {
164                 return reinterpret_cast<Type*>(t);
165         }
166
167         Value *V(Ice::Operand *v)
168         {
169                 return reinterpret_cast<Value*>(v);
170         }
171
172         BasicBlock *B(Ice::CfgNode *b)
173         {
174                 return reinterpret_cast<BasicBlock*>(b);
175         }
176
177         static size_t typeSize(Type *type)
178         {
179                 if(reinterpret_cast<std::intptr_t>(type) & EmulatedBits)
180                 {
181                         switch(reinterpret_cast<std::intptr_t>(type))
182                         {
183                         case Type_v2i32: return 8;
184                         case Type_v4i16: return 8;
185                         case Type_v2i16: return 4;
186                         case Type_v8i8:  return 8;
187                         case Type_v4i8:  return 4;
188                         case Type_v2f32: return 8;
189                         default: assert(false);
190                         }
191                 }
192
193                 return Ice::typeWidthInBytes(T(type));
194         }
195
196         Optimization optimization[10] = {InstructionCombining, Disabled};
197
198         using ElfHeader = std::conditional<sizeof(void*) == 8, Elf64_Ehdr, Elf32_Ehdr>::type;
199         using SectionHeader = std::conditional<sizeof(void*) == 8, Elf64_Shdr, Elf32_Shdr>::type;
200
201         inline const SectionHeader *sectionHeader(const ElfHeader *elfHeader)
202         {
203                 return reinterpret_cast<const SectionHeader*>((intptr_t)elfHeader + elfHeader->e_shoff);
204         }
205
206         inline const SectionHeader *elfSection(const ElfHeader *elfHeader, int index)
207         {
208                 return &sectionHeader(elfHeader)[index];
209         }
210
211         static void *relocateSymbol(const ElfHeader *elfHeader, const Elf32_Rel &relocation, const SectionHeader &relocationTable)
212         {
213                 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
214
215                 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
216                 int32_t *patchSite = (int*)(address + relocation.r_offset);
217                 uint32_t index = relocation.getSymbol();
218                 int table = relocationTable.sh_link;
219                 void *symbolValue = nullptr;
220
221                 if(index != SHN_UNDEF)
222                 {
223                         if(table == SHN_UNDEF) return nullptr;
224                         const SectionHeader *symbolTable = elfSection(elfHeader, table);
225
226                         uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
227                         if(index >= symtab_entries)
228                         {
229                                 assert(index < symtab_entries && "Symbol Index out of range");
230                                 return nullptr;
231                         }
232
233                         intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
234                         Elf32_Sym &symbol = ((Elf32_Sym*)symbolAddress)[index];
235                         uint16_t section = symbol.st_shndx;
236
237                         if(section != SHN_UNDEF && section < SHN_LORESERVE)
238                         {
239                                 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
240                                 symbolValue = reinterpret_cast<void*>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
241                         }
242                         else
243                         {
244                                 return nullptr;
245                         }
246                 }
247
248                 if(CPUID::ARM)
249                 {
250                         switch(relocation.getType())
251                         {
252                         case R_ARM_NONE:
253                                 // No relocation
254                                 break;
255                         case R_ARM_MOVW_ABS_NC:
256                                 {
257                                         uint32_t thumb = 0;   // Calls to Thumb code not supported.
258                                         uint32_t lo = (uint32_t)(intptr_t)symbolValue | thumb;
259                                         *patchSite = (*patchSite & 0xFFF0F000) | ((lo & 0xF000) << 4) | (lo & 0x0FFF);
260                                 }
261                                 break;
262                         case R_ARM_MOVT_ABS:
263                                 {
264                                         uint32_t hi = (uint32_t)(intptr_t)(symbolValue) >> 16;
265                                         *patchSite = (*patchSite & 0xFFF0F000) | ((hi & 0xF000) << 4) | (hi & 0x0FFF);
266                                 }
267                                 break;
268                         default:
269                                 assert(false && "Unsupported relocation type");
270                                 return nullptr;
271                         }
272                 }
273                 else
274                 {
275                         switch(relocation.getType())
276                         {
277                         case R_386_NONE:
278                                 // No relocation
279                                 break;
280                         case R_386_32:
281                                 *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite);
282                                 break;
283                 //      case R_386_PC32:
284                 //              *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite - (intptr_t)patchSite);
285                 //              break;
286                         default:
287                                 assert(false && "Unsupported relocation type");
288                                 return nullptr;
289                         }
290                 }
291
292
293                 return symbolValue;
294         }
295
296         static void *relocateSymbol(const ElfHeader *elfHeader, const Elf64_Rela &relocation, const SectionHeader &relocationTable)
297         {
298                 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
299
300                 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
301                 int32_t *patchSite = (int*)(address + relocation.r_offset);
302                 uint32_t index = relocation.getSymbol();
303                 int table = relocationTable.sh_link;
304                 void *symbolValue = nullptr;
305
306                 if(index != SHN_UNDEF)
307                 {
308                         if(table == SHN_UNDEF) return nullptr;
309                         const SectionHeader *symbolTable = elfSection(elfHeader, table);
310
311                         uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
312                         if(index >= symtab_entries)
313                         {
314                                 assert(index < symtab_entries && "Symbol Index out of range");
315                                 return nullptr;
316                         }
317
318                         intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
319                         Elf64_Sym &symbol = ((Elf64_Sym*)symbolAddress)[index];
320                         uint16_t section = symbol.st_shndx;
321
322                         if(section != SHN_UNDEF && section < SHN_LORESERVE)
323                         {
324                                 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
325                                 symbolValue = reinterpret_cast<void*>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
326                         }
327                         else
328                         {
329                                 return nullptr;
330                         }
331                 }
332
333                 switch(relocation.getType())
334                 {
335                 case R_X86_64_NONE:
336                         // No relocation
337                         break;
338                 case R_X86_64_64:
339                         *(int64_t*)patchSite = (int64_t)((intptr_t)symbolValue + *(int64_t*)patchSite) + relocation.r_addend;
340                         break;
341                 case R_X86_64_PC32:
342                         *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite - (intptr_t)patchSite) + relocation.r_addend;
343                         break;
344                 case R_X86_64_32S:
345                         *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite) + relocation.r_addend;
346                         break;
347                 default:
348                         assert(false && "Unsupported relocation type");
349                         return nullptr;
350                 }
351
352                 return symbolValue;
353         }
354
355         void *loadImage(uint8_t *const elfImage, size_t &codeSize)
356         {
357                 ElfHeader *elfHeader = (ElfHeader*)elfImage;
358
359                 if(!elfHeader->checkMagic())
360                 {
361                         return nullptr;
362                 }
363
364                 // Expect ELF bitness to match platform
365                 assert(sizeof(void*) == 8 ? elfHeader->getFileClass() == ELFCLASS64 : elfHeader->getFileClass() == ELFCLASS32);
366                 #if defined(__i386__)
367                         assert(sizeof(void*) == 4 && elfHeader->e_machine == EM_386);
368                 #elif defined(__x86_64__)
369                         assert(sizeof(void*) == 8 && elfHeader->e_machine == EM_X86_64);
370                 #elif defined(__arm__)
371                         assert(sizeof(void*) == 4 && elfHeader->e_machine == EM_ARM);
372                 #else
373                         #error "Unsupported platform"
374                 #endif
375
376                 SectionHeader *sectionHeader = (SectionHeader*)(elfImage + elfHeader->e_shoff);
377                 void *entry = nullptr;
378
379                 for(int i = 0; i < elfHeader->e_shnum; i++)
380                 {
381                         if(sectionHeader[i].sh_type == SHT_PROGBITS)
382                         {
383                                 if(sectionHeader[i].sh_flags & SHF_EXECINSTR)
384                                 {
385                                         entry = elfImage + sectionHeader[i].sh_offset;
386                                         codeSize = sectionHeader[i].sh_size;
387                                 }
388                         }
389                         else if(sectionHeader[i].sh_type == SHT_REL)
390                         {
391                                 assert(sizeof(void*) == 4 && "UNIMPLEMENTED");   // Only expected/implemented for 32-bit code
392
393                                 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
394                                 {
395                                         const Elf32_Rel &relocation = ((const Elf32_Rel*)(elfImage + sectionHeader[i].sh_offset))[index];
396                                         relocateSymbol(elfHeader, relocation, sectionHeader[i]);
397                                 }
398                         }
399                         else if(sectionHeader[i].sh_type == SHT_RELA)
400                         {
401                                 assert(sizeof(void*) == 8 && "UNIMPLEMENTED");   // Only expected/implemented for 64-bit code
402
403                                 for(Elf32_Word index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
404                                 {
405                                         const Elf64_Rela &relocation = ((const Elf64_Rela*)(elfImage + sectionHeader[i].sh_offset))[index];
406                                         relocateSymbol(elfHeader, relocation, sectionHeader[i]);
407                                 }
408                         }
409                 }
410
411                 return entry;
412         }
413
414         template<typename T>
415         struct ExecutableAllocator
416         {
417                 ExecutableAllocator() {};
418                 template<class U> ExecutableAllocator(const ExecutableAllocator<U> &other) {};
419
420                 using value_type = T;
421                 using size_type = std::size_t;
422
423                 T *allocate(size_type n)
424                 {
425                         #if defined(_WIN32)
426                                 return (T*)VirtualAlloc(NULL, sizeof(T) * n, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
427                         #else
428                                 return (T*)mmap(nullptr, sizeof(T) * n, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
429                         #endif
430                 }
431
432                 void deallocate(T *p, size_type n)
433                 {
434                         #if defined(_WIN32)
435                                 VirtualFree(p, 0, MEM_RELEASE);
436                         #else
437                                 munmap(p, sizeof(T) * n);
438                         #endif
439                 }
440         };
441
442         class ELFMemoryStreamer : public Ice::ELFStreamer, public Routine
443         {
444                 ELFMemoryStreamer(const ELFMemoryStreamer &) = delete;
445                 ELFMemoryStreamer &operator=(const ELFMemoryStreamer &) = delete;
446
447         public:
448                 ELFMemoryStreamer() : Routine(), entry(nullptr)
449                 {
450                         position = 0;
451                         buffer.reserve(0x1000);
452                 }
453
454                 ~ELFMemoryStreamer() override
455                 {
456                         #if defined(_WIN32)
457                                 if(buffer.size() != 0)
458                                 {
459                                         DWORD exeProtection;
460                                         VirtualProtect(&buffer[0], buffer.size(), oldProtection, &exeProtection);
461                                 }
462                         #endif
463                 }
464
465                 void write8(uint8_t Value) override
466                 {
467                         if(position == (uint64_t)buffer.size())
468                         {
469                                 buffer.push_back(Value);
470                                 position++;
471                         }
472                         else if(position < (uint64_t)buffer.size())
473                         {
474                                 buffer[position] = Value;
475                                 position++;
476                         }
477                         else assert(false && "UNIMPLEMENTED");
478                 }
479
480                 void writeBytes(llvm::StringRef Bytes) override
481                 {
482                         std::size_t oldSize = buffer.size();
483                         buffer.resize(oldSize + Bytes.size());
484                         memcpy(&buffer[oldSize], Bytes.begin(), Bytes.size());
485                         position += Bytes.size();
486                 }
487
488                 uint64_t tell() const override { return position; }
489
490                 void seek(uint64_t Off) override { position = Off; }
491
492                 const void *getEntry() override
493                 {
494                         if(!entry)
495                         {
496                                 position = std::numeric_limits<std::size_t>::max();   // Can't stream more data after this
497
498                                 size_t codeSize = 0;
499                                 entry = loadImage(&buffer[0], codeSize);
500
501                                 #if defined(_WIN32)
502                                         VirtualProtect(&buffer[0], buffer.size(), PAGE_EXECUTE_READ, &oldProtection);
503                                         FlushInstructionCache(GetCurrentProcess(), NULL, 0);
504                                 #else
505                                         mprotect(&buffer[0], buffer.size(), PROT_READ | PROT_EXEC);
506                                         __builtin___clear_cache((char*)entry, (char*)entry + codeSize);
507                                 #endif
508                         }
509
510                         return entry;
511                 }
512
513         private:
514                 void *entry;
515                 std::vector<uint8_t, ExecutableAllocator<uint8_t>> buffer;
516                 std::size_t position;
517
518                 #if defined(_WIN32)
519                 DWORD oldProtection;
520                 #endif
521         };
522
523         Nucleus::Nucleus()
524         {
525                 ::codegenMutex.lock();   // Reactor is currently not thread safe
526
527                 Ice::ClFlags &Flags = Ice::ClFlags::Flags;
528                 Ice::ClFlags::getParsedClFlags(Flags);
529
530                 #if defined(__arm__)
531                         Flags.setTargetArch(Ice::Target_ARM32);
532                         Flags.setTargetInstructionSet(Ice::ARM32InstructionSet_HWDivArm);
533                 #else   // x86
534                         Flags.setTargetArch(sizeof(void*) == 8 ? Ice::Target_X8664 : Ice::Target_X8632);
535                         Flags.setTargetInstructionSet(CPUID::SSE4_1 ? Ice::X86InstructionSet_SSE4_1 : Ice::X86InstructionSet_SSE2);
536                 #endif
537                 Flags.setOutFileType(Ice::FT_Elf);
538                 Flags.setOptLevel(Ice::Opt_2);
539                 Flags.setApplicationBinaryInterface(Ice::ABI_Platform);
540                 Flags.setVerbose(false ? Ice::IceV_Most : Ice::IceV_None);
541                 Flags.setDisableHybridAssembly(true);
542
543                 static llvm::raw_os_ostream cout(std::cout);
544                 static llvm::raw_os_ostream cerr(std::cerr);
545
546                 if(false)   // Write out to a file
547                 {
548                         std::error_code errorCode;
549                         ::out = new Ice::Fdstream("out.o", errorCode, llvm::sys::fs::F_None);
550                         ::elfFile = new Ice::ELFFileStreamer(*out);
551                         ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfFile);
552                 }
553                 else
554                 {
555                         ELFMemoryStreamer *elfMemory = new ELFMemoryStreamer();
556                         ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfMemory);
557                         ::routine = elfMemory;
558                 }
559         }
560
561         Nucleus::~Nucleus()
562         {
563                 delete ::routine;
564
565                 delete ::allocator;
566                 delete ::function;
567                 delete ::context;
568
569                 delete ::elfFile;
570                 delete ::out;
571
572                 ::codegenMutex.unlock();
573         }
574
575         Routine *Nucleus::acquireRoutine(const wchar_t *name, bool runOptimizations)
576         {
577                 if(basicBlock->getInsts().empty() || basicBlock->getInsts().back().getKind() != Ice::Inst::Ret)
578                 {
579                         createRetVoid();
580                 }
581
582                 std::wstring wideName(name);
583                 std::string asciiName(wideName.begin(), wideName.end());
584                 ::function->setFunctionName(Ice::GlobalString::createWithString(::context, asciiName));
585
586                 optimize();
587
588                 ::function->translate();
589                 assert(!::function->hasError());
590
591                 auto globals = ::function->getGlobalInits();
592
593                 if(globals && !globals->empty())
594                 {
595                         ::context->getGlobals()->merge(globals.get());
596                 }
597
598                 ::context->emitFileHeader();
599                 ::function->emitIAS();
600                 auto assembler = ::function->releaseAssembler();
601                 auto objectWriter = ::context->getObjectWriter();
602                 assembler->alignFunction();
603                 objectWriter->writeFunctionCode(::function->getFunctionName(), false, assembler.get());
604                 ::context->lowerGlobals("last");
605                 ::context->lowerConstants();
606                 ::context->lowerJumpTables();
607                 objectWriter->setUndefinedSyms(::context->getConstantExternSyms());
608                 objectWriter->writeNonUserSections();
609
610                 Routine *handoffRoutine = ::routine;
611                 ::routine = nullptr;
612
613                 return handoffRoutine;
614         }
615
616         void Nucleus::optimize()
617         {
618                 sw::optimize(::function);
619         }
620
621         Value *Nucleus::allocateStackVariable(Type *t, int arraySize)
622         {
623                 Ice::Type type = T(t);
624                 int typeSize = Ice::typeWidthInBytes(type);
625                 int totalSize = typeSize * (arraySize ? arraySize : 1);
626
627                 auto bytes = Ice::ConstantInteger32::create(::context, type, totalSize);
628                 auto address = ::function->makeVariable(T(getPointerType(t)));
629                 auto alloca = Ice::InstAlloca::create(::function, address, bytes, typeSize);
630                 ::function->getEntryNode()->getInsts().push_front(alloca);
631
632                 return V(address);
633         }
634
635         BasicBlock *Nucleus::createBasicBlock()
636         {
637                 return B(::function->makeNode());
638         }
639
640         BasicBlock *Nucleus::getInsertBlock()
641         {
642                 return B(::basicBlock);
643         }
644
645         void Nucleus::setInsertBlock(BasicBlock *basicBlock)
646         {
647         //      assert(::basicBlock->getInsts().back().getTerminatorEdges().size() >= 0 && "Previous basic block must have a terminator");
648                 ::basicBlock = basicBlock;
649         }
650
651         void Nucleus::createFunction(Type *ReturnType, std::vector<Type*> &Params)
652         {
653                 uint32_t sequenceNumber = 0;
654                 ::function = Ice::Cfg::create(::context, sequenceNumber).release();
655                 ::allocator = new Ice::CfgLocalAllocatorScope(::function);
656
657                 for(Type *type : Params)
658                 {
659                         Ice::Variable *arg = ::function->makeVariable(T(type));
660                         ::function->addArg(arg);
661                 }
662
663                 Ice::CfgNode *node = ::function->makeNode();
664                 ::function->setEntryNode(node);
665                 ::basicBlock = node;
666         }
667
668         Value *Nucleus::getArgument(unsigned int index)
669         {
670                 return V(::function->getArgs()[index]);
671         }
672
673         void Nucleus::createRetVoid()
674         {
675                 Ice::InstRet *ret = Ice::InstRet::create(::function);
676                 ::basicBlock->appendInst(ret);
677         }
678
679         void Nucleus::createRet(Value *v)
680         {
681                 Ice::InstRet *ret = Ice::InstRet::create(::function, v);
682                 ::basicBlock->appendInst(ret);
683         }
684
685         void Nucleus::createBr(BasicBlock *dest)
686         {
687                 auto br = Ice::InstBr::create(::function, dest);
688                 ::basicBlock->appendInst(br);
689         }
690
691         void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
692         {
693                 auto br = Ice::InstBr::create(::function, cond, ifTrue, ifFalse);
694                 ::basicBlock->appendInst(br);
695         }
696
697         static bool isCommutative(Ice::InstArithmetic::OpKind op)
698         {
699                 switch(op)
700                 {
701                 case Ice::InstArithmetic::Add:
702                 case Ice::InstArithmetic::Fadd:
703                 case Ice::InstArithmetic::Mul:
704                 case Ice::InstArithmetic::Fmul:
705                 case Ice::InstArithmetic::And:
706                 case Ice::InstArithmetic::Or:
707                 case Ice::InstArithmetic::Xor:
708                         return true;
709                 default:
710                         return false;
711                 }
712         }
713
714         static Value *createArithmetic(Ice::InstArithmetic::OpKind op, Value *lhs, Value *rhs)
715         {
716                 assert(lhs->getType() == rhs->getType() || (llvm::isa<Ice::Constant>(rhs) && (op == Ice::InstArithmetic::Shl || Ice::InstArithmetic::Lshr || Ice::InstArithmetic::Ashr)));
717
718                 bool swapOperands = llvm::isa<Ice::Constant>(lhs) && isCommutative(op);
719
720                 Ice::Variable *result = ::function->makeVariable(lhs->getType());
721                 Ice::InstArithmetic *arithmetic = Ice::InstArithmetic::create(::function, op, result, swapOperands ? rhs : lhs, swapOperands ? lhs : rhs);
722                 ::basicBlock->appendInst(arithmetic);
723
724                 return V(result);
725         }
726
727         Value *Nucleus::createAdd(Value *lhs, Value *rhs)
728         {
729                 return createArithmetic(Ice::InstArithmetic::Add, lhs, rhs);
730         }
731
732         Value *Nucleus::createSub(Value *lhs, Value *rhs)
733         {
734                 return createArithmetic(Ice::InstArithmetic::Sub, lhs, rhs);
735         }
736
737         Value *Nucleus::createMul(Value *lhs, Value *rhs)
738         {
739                 return createArithmetic(Ice::InstArithmetic::Mul, lhs, rhs);
740         }
741
742         Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
743         {
744                 return createArithmetic(Ice::InstArithmetic::Udiv, lhs, rhs);
745         }
746
747         Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
748         {
749                 return createArithmetic(Ice::InstArithmetic::Sdiv, lhs, rhs);
750         }
751
752         Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
753         {
754                 return createArithmetic(Ice::InstArithmetic::Fadd, lhs, rhs);
755         }
756
757         Value *Nucleus::createFSub(Value *lhs, Value *rhs)
758         {
759                 return createArithmetic(Ice::InstArithmetic::Fsub, lhs, rhs);
760         }
761
762         Value *Nucleus::createFMul(Value *lhs, Value *rhs)
763         {
764                 return createArithmetic(Ice::InstArithmetic::Fmul, lhs, rhs);
765         }
766
767         Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
768         {
769                 return createArithmetic(Ice::InstArithmetic::Fdiv, lhs, rhs);
770         }
771
772         Value *Nucleus::createURem(Value *lhs, Value *rhs)
773         {
774                 return createArithmetic(Ice::InstArithmetic::Urem, lhs, rhs);
775         }
776
777         Value *Nucleus::createSRem(Value *lhs, Value *rhs)
778         {
779                 return createArithmetic(Ice::InstArithmetic::Srem, lhs, rhs);
780         }
781
782         Value *Nucleus::createFRem(Value *lhs, Value *rhs)
783         {
784                 return createArithmetic(Ice::InstArithmetic::Frem, lhs, rhs);
785         }
786
787         Value *Nucleus::createShl(Value *lhs, Value *rhs)
788         {
789                 return createArithmetic(Ice::InstArithmetic::Shl, lhs, rhs);
790         }
791
792         Value *Nucleus::createLShr(Value *lhs, Value *rhs)
793         {
794                 return createArithmetic(Ice::InstArithmetic::Lshr, lhs, rhs);
795         }
796
797         Value *Nucleus::createAShr(Value *lhs, Value *rhs)
798         {
799                 return createArithmetic(Ice::InstArithmetic::Ashr, lhs, rhs);
800         }
801
802         Value *Nucleus::createAnd(Value *lhs, Value *rhs)
803         {
804                 return createArithmetic(Ice::InstArithmetic::And, lhs, rhs);
805         }
806
807         Value *Nucleus::createOr(Value *lhs, Value *rhs)
808         {
809                 return createArithmetic(Ice::InstArithmetic::Or, lhs, rhs);
810         }
811
812         Value *Nucleus::createXor(Value *lhs, Value *rhs)
813         {
814                 return createArithmetic(Ice::InstArithmetic::Xor, lhs, rhs);
815         }
816
817         Value *Nucleus::createNeg(Value *v)
818         {
819                 return createSub(createNullValue(T(v->getType())), v);
820         }
821
822         Value *Nucleus::createFNeg(Value *v)
823         {
824                 double c[4] = {-0.0, -0.0, -0.0, -0.0};
825                 Value *negativeZero = Ice::isVectorType(v->getType()) ?
826                                       createConstantVector(c, T(v->getType())) :
827                                       V(::context->getConstantFloat(-0.0f));
828
829                 return createFSub(negativeZero, v);
830         }
831
832         Value *Nucleus::createNot(Value *v)
833         {
834                 if(Ice::isScalarIntegerType(v->getType()))
835                 {
836                         return createXor(v, V(::context->getConstantInt(v->getType(), -1)));
837                 }
838                 else   // Vector
839                 {
840                         int64_t c[16] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1};
841                         return createXor(v, createConstantVector(c, T(v->getType())));
842                 }
843         }
844
845         Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int align)
846         {
847                 int valueType = (int)reinterpret_cast<intptr_t>(type);
848                 Ice::Variable *result = ::function->makeVariable(T(type));
849
850                 if(valueType & EmulatedBits)
851                 {
852                         if(emulateIntrinsics)
853                         {
854                                 if(typeSize(type) == 4)
855                                 {
856                                         auto pointer = RValue<Pointer<Byte>>(ptr);
857                                         Int x = *Pointer<Int>(pointer +1-1);
858
859                                         Int4 vector;
860                                         vector = Insert(vector, x, 0);
861
862                                         auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
863                                         ::basicBlock->appendInst(bitcast);
864                                 }
865                                 else if(typeSize(type) == 8)
866                                 {
867                                         auto pointer = RValue<Pointer<Byte>>(ptr);
868                                         Int x = *Pointer<Int>(pointer +1-1);
869                                         Int y = *Pointer<Int>(pointer + 4);
870
871                                         Int4 vector;
872                                         vector = Insert(vector, x, 0);
873                                         vector = Insert(vector, y, 1);
874
875                                         auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, result, vector.loadValue());
876                                         ::basicBlock->appendInst(bitcast);
877                                 }
878                                 else assert(false);
879                         }
880                         else
881                         {
882                                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::LoadSubVector, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
883                                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
884                                 auto load = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
885                                 load->addArg(ptr);
886                                 load->addArg(::context->getConstantInt32(typeSize(type)));
887                                 ::basicBlock->appendInst(load);
888                         }
889                 }
890                 else
891                 {
892                         auto load = Ice::InstLoad::create(::function, result, ptr, align);
893                         ::basicBlock->appendInst(load);
894                 }
895
896                 return V(result);
897         }
898
899         Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int align)
900         {
901                 int valueType = (int)reinterpret_cast<intptr_t>(type);
902
903                 if(valueType & EmulatedBits)
904                 {
905                         if(emulateIntrinsics)
906                         {
907                                 if(typeSize(type) == 4)
908                                 {
909                                         Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
910                                         auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
911                                         ::basicBlock->appendInst(bitcast);
912
913                                         RValue<Int4> v(V(vector));
914
915                                         auto pointer = RValue<Pointer<Byte>>(ptr);
916                                         Int x = Extract(v, 0);
917                                         *Pointer<Int>(pointer) = x;
918                                 }
919                                 else if(typeSize(type) == 8)
920                                 {
921                                         Ice::Variable *vector = ::function->makeVariable(Ice::IceType_v4i32);
922                                         auto bitcast = Ice::InstCast::create(::function, Ice::InstCast::Bitcast, vector, value);
923                                         ::basicBlock->appendInst(bitcast);
924
925                                         RValue<Int4> v(V(vector));
926
927                                         auto pointer = RValue<Pointer<Byte>>(ptr);
928                                         Int x = Extract(v, 0);
929                                         *Pointer<Int>(pointer) = x;
930                                         Int y = Extract(v, 1);
931                                         *Pointer<Int>(pointer + 4) = y;
932                                 }
933                                 else assert(false);
934                         }
935                         else
936                         {
937                                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::StoreSubVector, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T};
938                                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
939                                 auto store = Ice::InstIntrinsicCall::create(::function, 3, nullptr, target, intrinsic);
940                                 store->addArg(value);
941                                 store->addArg(ptr);
942                                 store->addArg(::context->getConstantInt32(typeSize(type)));
943                                 ::basicBlock->appendInst(store);
944                         }
945                 }
946                 else
947                 {
948                         assert(T(value->getType()) == type);
949
950                         auto store = Ice::InstStore::create(::function, value, ptr, align);
951                         ::basicBlock->appendInst(store);
952                 }
953
954                 return value;
955         }
956
957         Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex)
958         {
959                 assert(index->getType() == Ice::IceType_i32);
960
961                 if(auto *constant = llvm::dyn_cast<Ice::ConstantInteger32>(index))
962                 {
963                         int32_t offset = constant->getValue() * (int)typeSize(type);
964
965                         if(offset == 0)
966                         {
967                                 return ptr;
968                         }
969
970                         return createAdd(ptr, createConstantInt(offset));
971                 }
972
973                 if(!Ice::isByteSizedType(T(type)))
974                 {
975                         index = createMul(index, createConstantInt((int)typeSize(type)));
976                 }
977
978                 if(sizeof(void*) == 8)
979                 {
980                         if(unsignedIndex)
981                         {
982                                 index = createZExt(index, T(Ice::IceType_i64));
983                         }
984                         else
985                         {
986                                 index = createSExt(index, T(Ice::IceType_i64));
987                         }
988                 }
989
990                 return createAdd(ptr, index);
991         }
992
993         Value *Nucleus::createAtomicAdd(Value *ptr, Value *value)
994         {
995                 assert(false && "UNIMPLEMENTED"); return nullptr;
996         }
997
998         static Value *createCast(Ice::InstCast::OpKind op, Value *v, Type *destType)
999         {
1000                 if(v->getType() == T(destType))
1001                 {
1002                         return v;
1003                 }
1004
1005                 Ice::Variable *result = ::function->makeVariable(T(destType));
1006                 Ice::InstCast *cast = Ice::InstCast::create(::function, op, result, v);
1007                 ::basicBlock->appendInst(cast);
1008
1009                 return V(result);
1010         }
1011
1012         Value *Nucleus::createTrunc(Value *v, Type *destType)
1013         {
1014                 return createCast(Ice::InstCast::Trunc, v, destType);
1015         }
1016
1017         Value *Nucleus::createZExt(Value *v, Type *destType)
1018         {
1019                 return createCast(Ice::InstCast::Zext, v, destType);
1020         }
1021
1022         Value *Nucleus::createSExt(Value *v, Type *destType)
1023         {
1024                 return createCast(Ice::InstCast::Sext, v, destType);
1025         }
1026
1027         Value *Nucleus::createFPToSI(Value *v, Type *destType)
1028         {
1029                 return createCast(Ice::InstCast::Fptosi, v, destType);
1030         }
1031
1032         Value *Nucleus::createSIToFP(Value *v, Type *destType)
1033         {
1034                 return createCast(Ice::InstCast::Sitofp, v, destType);
1035         }
1036
1037         Value *Nucleus::createFPTrunc(Value *v, Type *destType)
1038         {
1039                 return createCast(Ice::InstCast::Fptrunc, v, destType);
1040         }
1041
1042         Value *Nucleus::createFPExt(Value *v, Type *destType)
1043         {
1044                 return createCast(Ice::InstCast::Fpext, v, destType);
1045         }
1046
1047         Value *Nucleus::createBitCast(Value *v, Type *destType)
1048         {
1049                 // Bitcasts must be between types of the same logical size. But with emulated narrow vectors we need
1050                 // support for casting between scalars and wide vectors. For platforms where this is not supported,
1051                 // emulate them by writing to the stack and reading back as the destination type.
1052                 if(emulateMismatchedBitCast)
1053                 {
1054                         if(!Ice::isVectorType(v->getType()) && Ice::isVectorType(T(destType)))
1055                         {
1056                                 Value *address = allocateStackVariable(destType);
1057                                 createStore(v, address, T(v->getType()));
1058                                 return createLoad(address, destType);
1059                         }
1060                         else if(Ice::isVectorType(v->getType()) && !Ice::isVectorType(T(destType)))
1061                         {
1062                                 Value *address = allocateStackVariable(T(v->getType()));
1063                                 createStore(v, address, T(v->getType()));
1064                                 return createLoad(address, destType);
1065                         }
1066                 }
1067
1068                 return createCast(Ice::InstCast::Bitcast, v, destType);
1069         }
1070
1071         static Value *createIntCompare(Ice::InstIcmp::ICond condition, Value *lhs, Value *rhs)
1072         {
1073                 assert(lhs->getType() == rhs->getType());
1074
1075                 auto result = ::function->makeVariable(Ice::isScalarIntegerType(lhs->getType()) ? Ice::IceType_i1 : lhs->getType());
1076                 auto cmp = Ice::InstIcmp::create(::function, condition, result, lhs, rhs);
1077                 ::basicBlock->appendInst(cmp);
1078
1079                 return V(result);
1080         }
1081
1082         Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
1083         {
1084                 return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
1085         }
1086
1087         Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
1088         {
1089                 return createIntCompare(Ice::InstIcmp::Ne, lhs, rhs);
1090         }
1091
1092         Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
1093         {
1094                 return createIntCompare(Ice::InstIcmp::Ugt, lhs, rhs);
1095         }
1096
1097         Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
1098         {
1099                 return createIntCompare(Ice::InstIcmp::Uge, lhs, rhs);
1100         }
1101
1102         Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
1103         {
1104                 return createIntCompare(Ice::InstIcmp::Ult, lhs, rhs);
1105         }
1106
1107         Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
1108         {
1109                 return createIntCompare(Ice::InstIcmp::Ule, lhs, rhs);
1110         }
1111
1112         Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
1113         {
1114                 return createIntCompare(Ice::InstIcmp::Sgt, lhs, rhs);
1115         }
1116
1117         Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
1118         {
1119                 return createIntCompare(Ice::InstIcmp::Sge, lhs, rhs);
1120         }
1121
1122         Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
1123         {
1124                 return createIntCompare(Ice::InstIcmp::Slt, lhs, rhs);
1125         }
1126
1127         Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
1128         {
1129                 return createIntCompare(Ice::InstIcmp::Sle, lhs, rhs);
1130         }
1131
1132         static Value *createFloatCompare(Ice::InstFcmp::FCond condition, Value *lhs, Value *rhs)
1133         {
1134                 assert(lhs->getType() == rhs->getType());
1135                 assert(Ice::isScalarFloatingType(lhs->getType()) || lhs->getType() == Ice::IceType_v4f32);
1136
1137                 auto result = ::function->makeVariable(Ice::isScalarFloatingType(lhs->getType()) ? Ice::IceType_i1 : Ice::IceType_v4i32);
1138                 auto cmp = Ice::InstFcmp::create(::function, condition, result, lhs, rhs);
1139                 ::basicBlock->appendInst(cmp);
1140
1141                 return V(result);
1142         }
1143
1144         Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
1145         {
1146                 return createFloatCompare(Ice::InstFcmp::Oeq, lhs, rhs);
1147         }
1148
1149         Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
1150         {
1151                 return createFloatCompare(Ice::InstFcmp::Ogt, lhs, rhs);
1152         }
1153
1154         Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
1155         {
1156                 return createFloatCompare(Ice::InstFcmp::Oge, lhs, rhs);
1157         }
1158
1159         Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
1160         {
1161                 return createFloatCompare(Ice::InstFcmp::Olt, lhs, rhs);
1162         }
1163
1164         Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
1165         {
1166                 return createFloatCompare(Ice::InstFcmp::Ole, lhs, rhs);
1167         }
1168
1169         Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
1170         {
1171                 return createFloatCompare(Ice::InstFcmp::One, lhs, rhs);
1172         }
1173
1174         Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
1175         {
1176                 return createFloatCompare(Ice::InstFcmp::Ord, lhs, rhs);
1177         }
1178
1179         Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
1180         {
1181                 return createFloatCompare(Ice::InstFcmp::Uno, lhs, rhs);
1182         }
1183
1184         Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
1185         {
1186                 return createFloatCompare(Ice::InstFcmp::Ueq, lhs, rhs);
1187         }
1188
1189         Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
1190         {
1191                 return createFloatCompare(Ice::InstFcmp::Ugt, lhs, rhs);
1192         }
1193
1194         Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
1195         {
1196                 return createFloatCompare(Ice::InstFcmp::Uge, lhs, rhs);
1197         }
1198
1199         Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
1200         {
1201                 return createFloatCompare(Ice::InstFcmp::Ult, lhs, rhs);
1202         }
1203
1204         Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
1205         {
1206                 return createFloatCompare(Ice::InstFcmp::Ule, lhs, rhs);
1207         }
1208
1209         Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
1210         {
1211                 return createFloatCompare(Ice::InstFcmp::Une, lhs, rhs);
1212         }
1213
1214         Value *Nucleus::createExtractElement(Value *vector, Type *type, int index)
1215         {
1216                 auto result = ::function->makeVariable(T(type));
1217                 auto extract = Ice::InstExtractElement::create(::function, result, vector, ::context->getConstantInt32(index));
1218                 ::basicBlock->appendInst(extract);
1219
1220                 return V(result);
1221         }
1222
1223         Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
1224         {
1225                 auto result = ::function->makeVariable(vector->getType());
1226                 auto insert = Ice::InstInsertElement::create(::function, result, vector, element, ::context->getConstantInt32(index));
1227                 ::basicBlock->appendInst(insert);
1228
1229                 return V(result);
1230         }
1231
1232         Value *Nucleus::createShuffleVector(Value *V1, Value *V2, const int *select)
1233         {
1234                 assert(V1->getType() == V2->getType());
1235
1236                 int size = Ice::typeNumElements(V1->getType());
1237                 auto result = ::function->makeVariable(V1->getType());
1238                 auto shuffle = Ice::InstShuffleVector::create(::function, result, V1, V2);
1239
1240                 for(int i = 0; i < size; i++)
1241                 {
1242                         shuffle->addIndex(llvm::cast<Ice::ConstantInteger32>(::context->getConstantInt32(select[i])));
1243                 }
1244
1245                 ::basicBlock->appendInst(shuffle);
1246
1247                 return V(result);
1248         }
1249
1250         Value *Nucleus::createSelect(Value *C, Value *ifTrue, Value *ifFalse)
1251         {
1252                 assert(ifTrue->getType() == ifFalse->getType());
1253
1254                 auto result = ::function->makeVariable(ifTrue->getType());
1255                 auto *select = Ice::InstSelect::create(::function, result, C, ifTrue, ifFalse);
1256                 ::basicBlock->appendInst(select);
1257
1258                 return V(result);
1259         }
1260
1261         SwitchCases *Nucleus::createSwitch(Value *control, BasicBlock *defaultBranch, unsigned numCases)
1262         {
1263                 auto switchInst = Ice::InstSwitch::create(::function, numCases, control, defaultBranch);
1264                 ::basicBlock->appendInst(switchInst);
1265
1266                 return reinterpret_cast<SwitchCases*>(switchInst);
1267         }
1268
1269         void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch)
1270         {
1271                 switchCases->addBranch(label, label, branch);
1272         }
1273
1274         void Nucleus::createUnreachable()
1275         {
1276                 Ice::InstUnreachable *unreachable = Ice::InstUnreachable::create(::function);
1277                 ::basicBlock->appendInst(unreachable);
1278         }
1279
1280         static Value *createSwizzle4(Value *val, unsigned char select)
1281         {
1282                 int swizzle[4] =
1283                 {
1284                         (select >> 0) & 0x03,
1285                         (select >> 2) & 0x03,
1286                         (select >> 4) & 0x03,
1287                         (select >> 6) & 0x03,
1288                 };
1289
1290                 return Nucleus::createShuffleVector(val, val, swizzle);
1291         }
1292
1293         static Value *createMask4(Value *lhs, Value *rhs, unsigned char select)
1294         {
1295                 int64_t mask[4] = {0, 0, 0, 0};
1296
1297                 mask[(select >> 0) & 0x03] = -1;
1298                 mask[(select >> 2) & 0x03] = -1;
1299                 mask[(select >> 4) & 0x03] = -1;
1300                 mask[(select >> 6) & 0x03] = -1;
1301
1302                 Value *condition = Nucleus::createConstantVector(mask, T(Ice::IceType_v4i1));
1303                 Value *result = Nucleus::createSelect(condition, rhs, lhs);
1304
1305                 return result;
1306         }
1307
1308         Type *Nucleus::getPointerType(Type *ElementType)
1309         {
1310                 if(sizeof(void*) == 8)
1311                 {
1312                         return T(Ice::IceType_i64);
1313                 }
1314                 else
1315                 {
1316                         return T(Ice::IceType_i32);
1317                 }
1318         }
1319
1320         Value *Nucleus::createNullValue(Type *Ty)
1321         {
1322                 if(Ice::isVectorType(T(Ty)))
1323                 {
1324                         assert(Ice::typeNumElements(T(Ty)) <= 16);
1325                         int64_t c[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
1326                         return createConstantVector(c, Ty);
1327                 }
1328                 else
1329                 {
1330                         return V(::context->getConstantZero(T(Ty)));
1331                 }
1332         }
1333
1334         Value *Nucleus::createConstantLong(int64_t i)
1335         {
1336                 return V(::context->getConstantInt64(i));
1337         }
1338
1339         Value *Nucleus::createConstantInt(int i)
1340         {
1341                 return V(::context->getConstantInt32(i));
1342         }
1343
1344         Value *Nucleus::createConstantInt(unsigned int i)
1345         {
1346                 return V(::context->getConstantInt32(i));
1347         }
1348
1349         Value *Nucleus::createConstantBool(bool b)
1350         {
1351                 return V(::context->getConstantInt1(b));
1352         }
1353
1354         Value *Nucleus::createConstantByte(signed char i)
1355         {
1356                 return V(::context->getConstantInt8(i));
1357         }
1358
1359         Value *Nucleus::createConstantByte(unsigned char i)
1360         {
1361                 return V(::context->getConstantInt8(i));
1362         }
1363
1364         Value *Nucleus::createConstantShort(short i)
1365         {
1366                 return V(::context->getConstantInt16(i));
1367         }
1368
1369         Value *Nucleus::createConstantShort(unsigned short i)
1370         {
1371                 return V(::context->getConstantInt16(i));
1372         }
1373
1374         Value *Nucleus::createConstantFloat(float x)
1375         {
1376                 return V(::context->getConstantFloat(x));
1377         }
1378
1379         Value *Nucleus::createNullPointer(Type *Ty)
1380         {
1381                 return createNullValue(T(sizeof(void*) == 8 ? Ice::IceType_i64 : Ice::IceType_i32));
1382         }
1383
1384         Value *Nucleus::createConstantVector(const int64_t *constants, Type *type)
1385         {
1386                 const int vectorSize = 16;
1387                 assert(Ice::typeWidthInBytes(T(type)) == vectorSize);
1388                 const int alignment = vectorSize;
1389                 auto globalPool = ::function->getGlobalPool();
1390
1391                 const int64_t *i = constants;
1392                 const double *f = reinterpret_cast<const double*>(constants);
1393                 Ice::VariableDeclaration::DataInitializer *dataInitializer = nullptr;
1394
1395                 switch((int)reinterpret_cast<intptr_t>(type))
1396                 {
1397                 case Ice::IceType_v4i32:
1398                 case Ice::IceType_v4i1:
1399                         {
1400                                 const int initializer[4] = {(int)i[0], (int)i[1], (int)i[2], (int)i[3]};
1401                                 static_assert(sizeof(initializer) == vectorSize, "!");
1402                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1403                         }
1404                         break;
1405                 case Ice::IceType_v4f32:
1406                         {
1407                                 const float initializer[4] = {(float)f[0], (float)f[1], (float)f[2], (float)f[3]};
1408                                 static_assert(sizeof(initializer) == vectorSize, "!");
1409                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1410                         }
1411                         break;
1412                 case Ice::IceType_v8i16:
1413                 case Ice::IceType_v8i1:
1414                         {
1415                                 const short initializer[8] = {(short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[4], (short)i[5], (short)i[6], (short)i[7]};
1416                                 static_assert(sizeof(initializer) == vectorSize, "!");
1417                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1418                         }
1419                         break;
1420                 case Ice::IceType_v16i8:
1421                 case Ice::IceType_v16i1:
1422                         {
1423                                 const char initializer[16] = {(char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[8], (char)i[9], (char)i[10], (char)i[11], (char)i[12], (char)i[13], (char)i[14], (char)i[15]};
1424                                 static_assert(sizeof(initializer) == vectorSize, "!");
1425                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1426                         }
1427                         break;
1428                 case Type_v2i32:
1429                         {
1430                                 const int initializer[4] = {(int)i[0], (int)i[1], (int)i[0], (int)i[1]};
1431                                 static_assert(sizeof(initializer) == vectorSize, "!");
1432                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1433                         }
1434                         break;
1435                 case Type_v2f32:
1436                         {
1437                                 const float initializer[4] = {(float)f[0], (float)f[1], (float)f[0], (float)f[1]};
1438                                 static_assert(sizeof(initializer) == vectorSize, "!");
1439                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1440                         }
1441                         break;
1442                 case Type_v4i16:
1443                         {
1444                                 const short initializer[8] = {(short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[0], (short)i[1], (short)i[2], (short)i[3]};
1445                                 static_assert(sizeof(initializer) == vectorSize, "!");
1446                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1447                         }
1448                         break;
1449                 case Type_v8i8:
1450                         {
1451                                 const char initializer[16] = {(char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7]};
1452                                 static_assert(sizeof(initializer) == vectorSize, "!");
1453                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1454                         }
1455                         break;
1456                 case Type_v4i8:
1457                         {
1458                                 const char initializer[16] = {(char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3]};
1459                                 static_assert(sizeof(initializer) == vectorSize, "!");
1460                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1461                         }
1462                         break;
1463                 default:
1464                         assert(false && "Unknown constant vector type" && type);
1465                 }
1466
1467                 auto name = Ice::GlobalString::createWithoutString(::context);
1468                 auto *variableDeclaration = Ice::VariableDeclaration::create(globalPool);
1469                 variableDeclaration->setName(name);
1470                 variableDeclaration->setAlignment(alignment);
1471                 variableDeclaration->setIsConstant(true);
1472                 variableDeclaration->addInitializer(dataInitializer);
1473
1474                 ::function->addGlobal(variableDeclaration);
1475
1476                 constexpr int32_t offset = 0;
1477                 Ice::Operand *ptr = ::context->getConstantSym(offset, name);
1478
1479                 Ice::Variable *result = ::function->makeVariable(T(type));
1480                 auto load = Ice::InstLoad::create(::function, result, ptr, alignment);
1481                 ::basicBlock->appendInst(load);
1482
1483                 return V(result);
1484         }
1485
1486         Value *Nucleus::createConstantVector(const double *constants, Type *type)
1487         {
1488                 return createConstantVector((const int64_t*)constants, type);
1489         }
1490
1491         Type *Void::getType()
1492         {
1493                 return T(Ice::IceType_void);
1494         }
1495
1496         Bool::Bool(Argument<Bool> argument)
1497         {
1498                 storeValue(argument.value);
1499         }
1500
1501         Bool::Bool(bool x)
1502         {
1503                 storeValue(Nucleus::createConstantBool(x));
1504         }
1505
1506         Bool::Bool(RValue<Bool> rhs)
1507         {
1508                 storeValue(rhs.value);
1509         }
1510
1511         Bool::Bool(const Bool &rhs)
1512         {
1513                 Value *value = rhs.loadValue();
1514                 storeValue(value);
1515         }
1516
1517         Bool::Bool(const Reference<Bool> &rhs)
1518         {
1519                 Value *value = rhs.loadValue();
1520                 storeValue(value);
1521         }
1522
1523         RValue<Bool> Bool::operator=(RValue<Bool> rhs)
1524         {
1525                 storeValue(rhs.value);
1526
1527                 return rhs;
1528         }
1529
1530         RValue<Bool> Bool::operator=(const Bool &rhs)
1531         {
1532                 Value *value = rhs.loadValue();
1533                 storeValue(value);
1534
1535                 return RValue<Bool>(value);
1536         }
1537
1538         RValue<Bool> Bool::operator=(const Reference<Bool> &rhs)
1539         {
1540                 Value *value = rhs.loadValue();
1541                 storeValue(value);
1542
1543                 return RValue<Bool>(value);
1544         }
1545
1546         RValue<Bool> operator!(RValue<Bool> val)
1547         {
1548                 return RValue<Bool>(Nucleus::createNot(val.value));
1549         }
1550
1551         RValue<Bool> operator&&(RValue<Bool> lhs, RValue<Bool> rhs)
1552         {
1553                 return RValue<Bool>(Nucleus::createAnd(lhs.value, rhs.value));
1554         }
1555
1556         RValue<Bool> operator||(RValue<Bool> lhs, RValue<Bool> rhs)
1557         {
1558                 return RValue<Bool>(Nucleus::createOr(lhs.value, rhs.value));
1559         }
1560
1561         Type *Bool::getType()
1562         {
1563                 return T(Ice::IceType_i1);
1564         }
1565
1566         Byte::Byte(Argument<Byte> argument)
1567         {
1568                 storeValue(argument.value);
1569         }
1570
1571         Byte::Byte(RValue<Int> cast)
1572         {
1573                 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
1574
1575                 storeValue(integer);
1576         }
1577
1578         Byte::Byte(RValue<UInt> cast)
1579         {
1580                 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
1581
1582                 storeValue(integer);
1583         }
1584
1585         Byte::Byte(RValue<UShort> cast)
1586         {
1587                 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
1588
1589                 storeValue(integer);
1590         }
1591
1592         Byte::Byte(int x)
1593         {
1594                 storeValue(Nucleus::createConstantByte((unsigned char)x));
1595         }
1596
1597         Byte::Byte(unsigned char x)
1598         {
1599                 storeValue(Nucleus::createConstantByte(x));
1600         }
1601
1602         Byte::Byte(RValue<Byte> rhs)
1603         {
1604                 storeValue(rhs.value);
1605         }
1606
1607         Byte::Byte(const Byte &rhs)
1608         {
1609                 Value *value = rhs.loadValue();
1610                 storeValue(value);
1611         }
1612
1613         Byte::Byte(const Reference<Byte> &rhs)
1614         {
1615                 Value *value = rhs.loadValue();
1616                 storeValue(value);
1617         }
1618
1619         RValue<Byte> Byte::operator=(RValue<Byte> rhs)
1620         {
1621                 storeValue(rhs.value);
1622
1623                 return rhs;
1624         }
1625
1626         RValue<Byte> Byte::operator=(const Byte &rhs)
1627         {
1628                 Value *value = rhs.loadValue();
1629                 storeValue(value);
1630
1631                 return RValue<Byte>(value);
1632         }
1633
1634         RValue<Byte> Byte::operator=(const Reference<Byte> &rhs)
1635         {
1636                 Value *value = rhs.loadValue();
1637                 storeValue(value);
1638
1639                 return RValue<Byte>(value);
1640         }
1641
1642         RValue<Byte> operator+(RValue<Byte> lhs, RValue<Byte> rhs)
1643         {
1644                 return RValue<Byte>(Nucleus::createAdd(lhs.value, rhs.value));
1645         }
1646
1647         RValue<Byte> operator-(RValue<Byte> lhs, RValue<Byte> rhs)
1648         {
1649                 return RValue<Byte>(Nucleus::createSub(lhs.value, rhs.value));
1650         }
1651
1652         RValue<Byte> operator*(RValue<Byte> lhs, RValue<Byte> rhs)
1653         {
1654                 return RValue<Byte>(Nucleus::createMul(lhs.value, rhs.value));
1655         }
1656
1657         RValue<Byte> operator/(RValue<Byte> lhs, RValue<Byte> rhs)
1658         {
1659                 return RValue<Byte>(Nucleus::createUDiv(lhs.value, rhs.value));
1660         }
1661
1662         RValue<Byte> operator%(RValue<Byte> lhs, RValue<Byte> rhs)
1663         {
1664                 return RValue<Byte>(Nucleus::createURem(lhs.value, rhs.value));
1665         }
1666
1667         RValue<Byte> operator&(RValue<Byte> lhs, RValue<Byte> rhs)
1668         {
1669                 return RValue<Byte>(Nucleus::createAnd(lhs.value, rhs.value));
1670         }
1671
1672         RValue<Byte> operator|(RValue<Byte> lhs, RValue<Byte> rhs)
1673         {
1674                 return RValue<Byte>(Nucleus::createOr(lhs.value, rhs.value));
1675         }
1676
1677         RValue<Byte> operator^(RValue<Byte> lhs, RValue<Byte> rhs)
1678         {
1679                 return RValue<Byte>(Nucleus::createXor(lhs.value, rhs.value));
1680         }
1681
1682         RValue<Byte> operator<<(RValue<Byte> lhs, RValue<Byte> rhs)
1683         {
1684                 return RValue<Byte>(Nucleus::createShl(lhs.value, rhs.value));
1685         }
1686
1687         RValue<Byte> operator>>(RValue<Byte> lhs, RValue<Byte> rhs)
1688         {
1689                 return RValue<Byte>(Nucleus::createLShr(lhs.value, rhs.value));
1690         }
1691
1692         RValue<Byte> operator+=(Byte &lhs, RValue<Byte> rhs)
1693         {
1694                 return lhs = lhs + rhs;
1695         }
1696
1697         RValue<Byte> operator-=(Byte &lhs, RValue<Byte> rhs)
1698         {
1699                 return lhs = lhs - rhs;
1700         }
1701
1702         RValue<Byte> operator*=(Byte &lhs, RValue<Byte> rhs)
1703         {
1704                 return lhs = lhs * rhs;
1705         }
1706
1707         RValue<Byte> operator/=(Byte &lhs, RValue<Byte> rhs)
1708         {
1709                 return lhs = lhs / rhs;
1710         }
1711
1712         RValue<Byte> operator%=(Byte &lhs, RValue<Byte> rhs)
1713         {
1714                 return lhs = lhs % rhs;
1715         }
1716
1717         RValue<Byte> operator&=(Byte &lhs, RValue<Byte> rhs)
1718         {
1719                 return lhs = lhs & rhs;
1720         }
1721
1722         RValue<Byte> operator|=(Byte &lhs, RValue<Byte> rhs)
1723         {
1724                 return lhs = lhs | rhs;
1725         }
1726
1727         RValue<Byte> operator^=(Byte &lhs, RValue<Byte> rhs)
1728         {
1729                 return lhs = lhs ^ rhs;
1730         }
1731
1732         RValue<Byte> operator<<=(Byte &lhs, RValue<Byte> rhs)
1733         {
1734                 return lhs = lhs << rhs;
1735         }
1736
1737         RValue<Byte> operator>>=(Byte &lhs, RValue<Byte> rhs)
1738         {
1739                 return lhs = lhs >> rhs;
1740         }
1741
1742         RValue<Byte> operator+(RValue<Byte> val)
1743         {
1744                 return val;
1745         }
1746
1747         RValue<Byte> operator-(RValue<Byte> val)
1748         {
1749                 return RValue<Byte>(Nucleus::createNeg(val.value));
1750         }
1751
1752         RValue<Byte> operator~(RValue<Byte> val)
1753         {
1754                 return RValue<Byte>(Nucleus::createNot(val.value));
1755         }
1756
1757         RValue<Byte> operator++(Byte &val, int)   // Post-increment
1758         {
1759                 RValue<Byte> res = val;
1760                 val += Byte(1);
1761                 return res;
1762         }
1763
1764         const Byte &operator++(Byte &val)   // Pre-increment
1765         {
1766                 val += Byte(1);
1767                 return val;
1768         }
1769
1770         RValue<Byte> operator--(Byte &val, int)   // Post-decrement
1771         {
1772                 RValue<Byte> res = val;
1773                 val -= Byte(1);
1774                 return res;
1775         }
1776
1777         const Byte &operator--(Byte &val)   // Pre-decrement
1778         {
1779                 val -= Byte(1);
1780                 return val;
1781         }
1782
1783         RValue<Bool> operator<(RValue<Byte> lhs, RValue<Byte> rhs)
1784         {
1785                 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
1786         }
1787
1788         RValue<Bool> operator<=(RValue<Byte> lhs, RValue<Byte> rhs)
1789         {
1790                 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
1791         }
1792
1793         RValue<Bool> operator>(RValue<Byte> lhs, RValue<Byte> rhs)
1794         {
1795                 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
1796         }
1797
1798         RValue<Bool> operator>=(RValue<Byte> lhs, RValue<Byte> rhs)
1799         {
1800                 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
1801         }
1802
1803         RValue<Bool> operator!=(RValue<Byte> lhs, RValue<Byte> rhs)
1804         {
1805                 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
1806         }
1807
1808         RValue<Bool> operator==(RValue<Byte> lhs, RValue<Byte> rhs)
1809         {
1810                 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
1811         }
1812
1813         Type *Byte::getType()
1814         {
1815                 return T(Ice::IceType_i8);
1816         }
1817
1818         SByte::SByte(Argument<SByte> argument)
1819         {
1820                 storeValue(argument.value);
1821         }
1822
1823         SByte::SByte(RValue<Int> cast)
1824         {
1825                 Value *integer = Nucleus::createTrunc(cast.value, SByte::getType());
1826
1827                 storeValue(integer);
1828         }
1829
1830         SByte::SByte(RValue<Short> cast)
1831         {
1832                 Value *integer = Nucleus::createTrunc(cast.value, SByte::getType());
1833
1834                 storeValue(integer);
1835         }
1836
1837         SByte::SByte(signed char x)
1838         {
1839                 storeValue(Nucleus::createConstantByte(x));
1840         }
1841
1842         SByte::SByte(RValue<SByte> rhs)
1843         {
1844                 storeValue(rhs.value);
1845         }
1846
1847         SByte::SByte(const SByte &rhs)
1848         {
1849                 Value *value = rhs.loadValue();
1850                 storeValue(value);
1851         }
1852
1853         SByte::SByte(const Reference<SByte> &rhs)
1854         {
1855                 Value *value = rhs.loadValue();
1856                 storeValue(value);
1857         }
1858
1859         RValue<SByte> SByte::operator=(RValue<SByte> rhs)
1860         {
1861                 storeValue(rhs.value);
1862
1863                 return rhs;
1864         }
1865
1866         RValue<SByte> SByte::operator=(const SByte &rhs)
1867         {
1868                 Value *value = rhs.loadValue();
1869                 storeValue(value);
1870
1871                 return RValue<SByte>(value);
1872         }
1873
1874         RValue<SByte> SByte::operator=(const Reference<SByte> &rhs)
1875         {
1876                 Value *value = rhs.loadValue();
1877                 storeValue(value);
1878
1879                 return RValue<SByte>(value);
1880         }
1881
1882         RValue<SByte> operator+(RValue<SByte> lhs, RValue<SByte> rhs)
1883         {
1884                 return RValue<SByte>(Nucleus::createAdd(lhs.value, rhs.value));
1885         }
1886
1887         RValue<SByte> operator-(RValue<SByte> lhs, RValue<SByte> rhs)
1888         {
1889                 return RValue<SByte>(Nucleus::createSub(lhs.value, rhs.value));
1890         }
1891
1892         RValue<SByte> operator*(RValue<SByte> lhs, RValue<SByte> rhs)
1893         {
1894                 return RValue<SByte>(Nucleus::createMul(lhs.value, rhs.value));
1895         }
1896
1897         RValue<SByte> operator/(RValue<SByte> lhs, RValue<SByte> rhs)
1898         {
1899                 return RValue<SByte>(Nucleus::createSDiv(lhs.value, rhs.value));
1900         }
1901
1902         RValue<SByte> operator%(RValue<SByte> lhs, RValue<SByte> rhs)
1903         {
1904                 return RValue<SByte>(Nucleus::createSRem(lhs.value, rhs.value));
1905         }
1906
1907         RValue<SByte> operator&(RValue<SByte> lhs, RValue<SByte> rhs)
1908         {
1909                 return RValue<SByte>(Nucleus::createAnd(lhs.value, rhs.value));
1910         }
1911
1912         RValue<SByte> operator|(RValue<SByte> lhs, RValue<SByte> rhs)
1913         {
1914                 return RValue<SByte>(Nucleus::createOr(lhs.value, rhs.value));
1915         }
1916
1917         RValue<SByte> operator^(RValue<SByte> lhs, RValue<SByte> rhs)
1918         {
1919                 return RValue<SByte>(Nucleus::createXor(lhs.value, rhs.value));
1920         }
1921
1922         RValue<SByte> operator<<(RValue<SByte> lhs, RValue<SByte> rhs)
1923         {
1924                 return RValue<SByte>(Nucleus::createShl(lhs.value, rhs.value));
1925         }
1926
1927         RValue<SByte> operator>>(RValue<SByte> lhs, RValue<SByte> rhs)
1928         {
1929                 return RValue<SByte>(Nucleus::createAShr(lhs.value, rhs.value));
1930         }
1931
1932         RValue<SByte> operator+=(SByte &lhs, RValue<SByte> rhs)
1933         {
1934                 return lhs = lhs + rhs;
1935         }
1936
1937         RValue<SByte> operator-=(SByte &lhs, RValue<SByte> rhs)
1938         {
1939                 return lhs = lhs - rhs;
1940         }
1941
1942         RValue<SByte> operator*=(SByte &lhs, RValue<SByte> rhs)
1943         {
1944                 return lhs = lhs * rhs;
1945         }
1946
1947         RValue<SByte> operator/=(SByte &lhs, RValue<SByte> rhs)
1948         {
1949                 return lhs = lhs / rhs;
1950         }
1951
1952         RValue<SByte> operator%=(SByte &lhs, RValue<SByte> rhs)
1953         {
1954                 return lhs = lhs % rhs;
1955         }
1956
1957         RValue<SByte> operator&=(SByte &lhs, RValue<SByte> rhs)
1958         {
1959                 return lhs = lhs & rhs;
1960         }
1961
1962         RValue<SByte> operator|=(SByte &lhs, RValue<SByte> rhs)
1963         {
1964                 return lhs = lhs | rhs;
1965         }
1966
1967         RValue<SByte> operator^=(SByte &lhs, RValue<SByte> rhs)
1968         {
1969                 return lhs = lhs ^ rhs;
1970         }
1971
1972         RValue<SByte> operator<<=(SByte &lhs, RValue<SByte> rhs)
1973         {
1974                 return lhs = lhs << rhs;
1975         }
1976
1977         RValue<SByte> operator>>=(SByte &lhs, RValue<SByte> rhs)
1978         {
1979                 return lhs = lhs >> rhs;
1980         }
1981
1982         RValue<SByte> operator+(RValue<SByte> val)
1983         {
1984                 return val;
1985         }
1986
1987         RValue<SByte> operator-(RValue<SByte> val)
1988         {
1989                 return RValue<SByte>(Nucleus::createNeg(val.value));
1990         }
1991
1992         RValue<SByte> operator~(RValue<SByte> val)
1993         {
1994                 return RValue<SByte>(Nucleus::createNot(val.value));
1995         }
1996
1997         RValue<SByte> operator++(SByte &val, int)   // Post-increment
1998         {
1999                 RValue<SByte> res = val;
2000                 val += SByte(1);
2001                 return res;
2002         }
2003
2004         const SByte &operator++(SByte &val)   // Pre-increment
2005         {
2006                 val += SByte(1);
2007                 return val;
2008         }
2009
2010         RValue<SByte> operator--(SByte &val, int)   // Post-decrement
2011         {
2012                 RValue<SByte> res = val;
2013                 val -= SByte(1);
2014                 return res;
2015         }
2016
2017         const SByte &operator--(SByte &val)   // Pre-decrement
2018         {
2019                 val -= SByte(1);
2020                 return val;
2021         }
2022
2023         RValue<Bool> operator<(RValue<SByte> lhs, RValue<SByte> rhs)
2024         {
2025                 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
2026         }
2027
2028         RValue<Bool> operator<=(RValue<SByte> lhs, RValue<SByte> rhs)
2029         {
2030                 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
2031         }
2032
2033         RValue<Bool> operator>(RValue<SByte> lhs, RValue<SByte> rhs)
2034         {
2035                 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
2036         }
2037
2038         RValue<Bool> operator>=(RValue<SByte> lhs, RValue<SByte> rhs)
2039         {
2040                 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
2041         }
2042
2043         RValue<Bool> operator!=(RValue<SByte> lhs, RValue<SByte> rhs)
2044         {
2045                 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
2046         }
2047
2048         RValue<Bool> operator==(RValue<SByte> lhs, RValue<SByte> rhs)
2049         {
2050                 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
2051         }
2052
2053         Type *SByte::getType()
2054         {
2055                 return T(Ice::IceType_i8);
2056         }
2057
2058         Short::Short(Argument<Short> argument)
2059         {
2060                 storeValue(argument.value);
2061         }
2062
2063         Short::Short(RValue<Int> cast)
2064         {
2065                 Value *integer = Nucleus::createTrunc(cast.value, Short::getType());
2066
2067                 storeValue(integer);
2068         }
2069
2070         Short::Short(short x)
2071         {
2072                 storeValue(Nucleus::createConstantShort(x));
2073         }
2074
2075         Short::Short(RValue<Short> rhs)
2076         {
2077                 storeValue(rhs.value);
2078         }
2079
2080         Short::Short(const Short &rhs)
2081         {
2082                 Value *value = rhs.loadValue();
2083                 storeValue(value);
2084         }
2085
2086         Short::Short(const Reference<Short> &rhs)
2087         {
2088                 Value *value = rhs.loadValue();
2089                 storeValue(value);
2090         }
2091
2092         RValue<Short> Short::operator=(RValue<Short> rhs)
2093         {
2094                 storeValue(rhs.value);
2095
2096                 return rhs;
2097         }
2098
2099         RValue<Short> Short::operator=(const Short &rhs)
2100         {
2101                 Value *value = rhs.loadValue();
2102                 storeValue(value);
2103
2104                 return RValue<Short>(value);
2105         }
2106
2107         RValue<Short> Short::operator=(const Reference<Short> &rhs)
2108         {
2109                 Value *value = rhs.loadValue();
2110                 storeValue(value);
2111
2112                 return RValue<Short>(value);
2113         }
2114
2115         RValue<Short> operator+(RValue<Short> lhs, RValue<Short> rhs)
2116         {
2117                 return RValue<Short>(Nucleus::createAdd(lhs.value, rhs.value));
2118         }
2119
2120         RValue<Short> operator-(RValue<Short> lhs, RValue<Short> rhs)
2121         {
2122                 return RValue<Short>(Nucleus::createSub(lhs.value, rhs.value));
2123         }
2124
2125         RValue<Short> operator*(RValue<Short> lhs, RValue<Short> rhs)
2126         {
2127                 return RValue<Short>(Nucleus::createMul(lhs.value, rhs.value));
2128         }
2129
2130         RValue<Short> operator/(RValue<Short> lhs, RValue<Short> rhs)
2131         {
2132                 return RValue<Short>(Nucleus::createSDiv(lhs.value, rhs.value));
2133         }
2134
2135         RValue<Short> operator%(RValue<Short> lhs, RValue<Short> rhs)
2136         {
2137                 return RValue<Short>(Nucleus::createSRem(lhs.value, rhs.value));
2138         }
2139
2140         RValue<Short> operator&(RValue<Short> lhs, RValue<Short> rhs)
2141         {
2142                 return RValue<Short>(Nucleus::createAnd(lhs.value, rhs.value));
2143         }
2144
2145         RValue<Short> operator|(RValue<Short> lhs, RValue<Short> rhs)
2146         {
2147                 return RValue<Short>(Nucleus::createOr(lhs.value, rhs.value));
2148         }
2149
2150         RValue<Short> operator^(RValue<Short> lhs, RValue<Short> rhs)
2151         {
2152                 return RValue<Short>(Nucleus::createXor(lhs.value, rhs.value));
2153         }
2154
2155         RValue<Short> operator<<(RValue<Short> lhs, RValue<Short> rhs)
2156         {
2157                 return RValue<Short>(Nucleus::createShl(lhs.value, rhs.value));
2158         }
2159
2160         RValue<Short> operator>>(RValue<Short> lhs, RValue<Short> rhs)
2161         {
2162                 return RValue<Short>(Nucleus::createAShr(lhs.value, rhs.value));
2163         }
2164
2165         RValue<Short> operator+=(Short &lhs, RValue<Short> rhs)
2166         {
2167                 return lhs = lhs + rhs;
2168         }
2169
2170         RValue<Short> operator-=(Short &lhs, RValue<Short> rhs)
2171         {
2172                 return lhs = lhs - rhs;
2173         }
2174
2175         RValue<Short> operator*=(Short &lhs, RValue<Short> rhs)
2176         {
2177                 return lhs = lhs * rhs;
2178         }
2179
2180         RValue<Short> operator/=(Short &lhs, RValue<Short> rhs)
2181         {
2182                 return lhs = lhs / rhs;
2183         }
2184
2185         RValue<Short> operator%=(Short &lhs, RValue<Short> rhs)
2186         {
2187                 return lhs = lhs % rhs;
2188         }
2189
2190         RValue<Short> operator&=(Short &lhs, RValue<Short> rhs)
2191         {
2192                 return lhs = lhs & rhs;
2193         }
2194
2195         RValue<Short> operator|=(Short &lhs, RValue<Short> rhs)
2196         {
2197                 return lhs = lhs | rhs;
2198         }
2199
2200         RValue<Short> operator^=(Short &lhs, RValue<Short> rhs)
2201         {
2202                 return lhs = lhs ^ rhs;
2203         }
2204
2205         RValue<Short> operator<<=(Short &lhs, RValue<Short> rhs)
2206         {
2207                 return lhs = lhs << rhs;
2208         }
2209
2210         RValue<Short> operator>>=(Short &lhs, RValue<Short> rhs)
2211         {
2212                 return lhs = lhs >> rhs;
2213         }
2214
2215         RValue<Short> operator+(RValue<Short> val)
2216         {
2217                 return val;
2218         }
2219
2220         RValue<Short> operator-(RValue<Short> val)
2221         {
2222                 return RValue<Short>(Nucleus::createNeg(val.value));
2223         }
2224
2225         RValue<Short> operator~(RValue<Short> val)
2226         {
2227                 return RValue<Short>(Nucleus::createNot(val.value));
2228         }
2229
2230         RValue<Short> operator++(Short &val, int)   // Post-increment
2231         {
2232                 RValue<Short> res = val;
2233                 val += Short(1);
2234                 return res;
2235         }
2236
2237         const Short &operator++(Short &val)   // Pre-increment
2238         {
2239                 val += Short(1);
2240                 return val;
2241         }
2242
2243         RValue<Short> operator--(Short &val, int)   // Post-decrement
2244         {
2245                 RValue<Short> res = val;
2246                 val -= Short(1);
2247                 return res;
2248         }
2249
2250         const Short &operator--(Short &val)   // Pre-decrement
2251         {
2252                 val -= Short(1);
2253                 return val;
2254         }
2255
2256         RValue<Bool> operator<(RValue<Short> lhs, RValue<Short> rhs)
2257         {
2258                 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
2259         }
2260
2261         RValue<Bool> operator<=(RValue<Short> lhs, RValue<Short> rhs)
2262         {
2263                 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
2264         }
2265
2266         RValue<Bool> operator>(RValue<Short> lhs, RValue<Short> rhs)
2267         {
2268                 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
2269         }
2270
2271         RValue<Bool> operator>=(RValue<Short> lhs, RValue<Short> rhs)
2272         {
2273                 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
2274         }
2275
2276         RValue<Bool> operator!=(RValue<Short> lhs, RValue<Short> rhs)
2277         {
2278                 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
2279         }
2280
2281         RValue<Bool> operator==(RValue<Short> lhs, RValue<Short> rhs)
2282         {
2283                 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
2284         }
2285
2286         Type *Short::getType()
2287         {
2288                 return T(Ice::IceType_i16);
2289         }
2290
2291         UShort::UShort(Argument<UShort> argument)
2292         {
2293                 storeValue(argument.value);
2294         }
2295
2296         UShort::UShort(RValue<UInt> cast)
2297         {
2298                 Value *integer = Nucleus::createTrunc(cast.value, UShort::getType());
2299
2300                 storeValue(integer);
2301         }
2302
2303         UShort::UShort(RValue<Int> cast)
2304         {
2305                 Value *integer = Nucleus::createTrunc(cast.value, UShort::getType());
2306
2307                 storeValue(integer);
2308         }
2309
2310         UShort::UShort(unsigned short x)
2311         {
2312                 storeValue(Nucleus::createConstantShort(x));
2313         }
2314
2315         UShort::UShort(RValue<UShort> rhs)
2316         {
2317                 storeValue(rhs.value);
2318         }
2319
2320         UShort::UShort(const UShort &rhs)
2321         {
2322                 Value *value = rhs.loadValue();
2323                 storeValue(value);
2324         }
2325
2326         UShort::UShort(const Reference<UShort> &rhs)
2327         {
2328                 Value *value = rhs.loadValue();
2329                 storeValue(value);
2330         }
2331
2332         RValue<UShort> UShort::operator=(RValue<UShort> rhs)
2333         {
2334                 storeValue(rhs.value);
2335
2336                 return rhs;
2337         }
2338
2339         RValue<UShort> UShort::operator=(const UShort &rhs)
2340         {
2341                 Value *value = rhs.loadValue();
2342                 storeValue(value);
2343
2344                 return RValue<UShort>(value);
2345         }
2346
2347         RValue<UShort> UShort::operator=(const Reference<UShort> &rhs)
2348         {
2349                 Value *value = rhs.loadValue();
2350                 storeValue(value);
2351
2352                 return RValue<UShort>(value);
2353         }
2354
2355         RValue<UShort> operator+(RValue<UShort> lhs, RValue<UShort> rhs)
2356         {
2357                 return RValue<UShort>(Nucleus::createAdd(lhs.value, rhs.value));
2358         }
2359
2360         RValue<UShort> operator-(RValue<UShort> lhs, RValue<UShort> rhs)
2361         {
2362                 return RValue<UShort>(Nucleus::createSub(lhs.value, rhs.value));
2363         }
2364
2365         RValue<UShort> operator*(RValue<UShort> lhs, RValue<UShort> rhs)
2366         {
2367                 return RValue<UShort>(Nucleus::createMul(lhs.value, rhs.value));
2368         }
2369
2370         RValue<UShort> operator/(RValue<UShort> lhs, RValue<UShort> rhs)
2371         {
2372                 return RValue<UShort>(Nucleus::createUDiv(lhs.value, rhs.value));
2373         }
2374
2375         RValue<UShort> operator%(RValue<UShort> lhs, RValue<UShort> rhs)
2376         {
2377                 return RValue<UShort>(Nucleus::createURem(lhs.value, rhs.value));
2378         }
2379
2380         RValue<UShort> operator&(RValue<UShort> lhs, RValue<UShort> rhs)
2381         {
2382                 return RValue<UShort>(Nucleus::createAnd(lhs.value, rhs.value));
2383         }
2384
2385         RValue<UShort> operator|(RValue<UShort> lhs, RValue<UShort> rhs)
2386         {
2387                 return RValue<UShort>(Nucleus::createOr(lhs.value, rhs.value));
2388         }
2389
2390         RValue<UShort> operator^(RValue<UShort> lhs, RValue<UShort> rhs)
2391         {
2392                 return RValue<UShort>(Nucleus::createXor(lhs.value, rhs.value));
2393         }
2394
2395         RValue<UShort> operator<<(RValue<UShort> lhs, RValue<UShort> rhs)
2396         {
2397                 return RValue<UShort>(Nucleus::createShl(lhs.value, rhs.value));
2398         }
2399
2400         RValue<UShort> operator>>(RValue<UShort> lhs, RValue<UShort> rhs)
2401         {
2402                 return RValue<UShort>(Nucleus::createLShr(lhs.value, rhs.value));
2403         }
2404
2405         RValue<UShort> operator+=(UShort &lhs, RValue<UShort> rhs)
2406         {
2407                 return lhs = lhs + rhs;
2408         }
2409
2410         RValue<UShort> operator-=(UShort &lhs, RValue<UShort> rhs)
2411         {
2412                 return lhs = lhs - rhs;
2413         }
2414
2415         RValue<UShort> operator*=(UShort &lhs, RValue<UShort> rhs)
2416         {
2417                 return lhs = lhs * rhs;
2418         }
2419
2420         RValue<UShort> operator/=(UShort &lhs, RValue<UShort> rhs)
2421         {
2422                 return lhs = lhs / rhs;
2423         }
2424
2425         RValue<UShort> operator%=(UShort &lhs, RValue<UShort> rhs)
2426         {
2427                 return lhs = lhs % rhs;
2428         }
2429
2430         RValue<UShort> operator&=(UShort &lhs, RValue<UShort> rhs)
2431         {
2432                 return lhs = lhs & rhs;
2433         }
2434
2435         RValue<UShort> operator|=(UShort &lhs, RValue<UShort> rhs)
2436         {
2437                 return lhs = lhs | rhs;
2438         }
2439
2440         RValue<UShort> operator^=(UShort &lhs, RValue<UShort> rhs)
2441         {
2442                 return lhs = lhs ^ rhs;
2443         }
2444
2445         RValue<UShort> operator<<=(UShort &lhs, RValue<UShort> rhs)
2446         {
2447                 return lhs = lhs << rhs;
2448         }
2449
2450         RValue<UShort> operator>>=(UShort &lhs, RValue<UShort> rhs)
2451         {
2452                 return lhs = lhs >> rhs;
2453         }
2454
2455         RValue<UShort> operator+(RValue<UShort> val)
2456         {
2457                 return val;
2458         }
2459
2460         RValue<UShort> operator-(RValue<UShort> val)
2461         {
2462                 return RValue<UShort>(Nucleus::createNeg(val.value));
2463         }
2464
2465         RValue<UShort> operator~(RValue<UShort> val)
2466         {
2467                 return RValue<UShort>(Nucleus::createNot(val.value));
2468         }
2469
2470         RValue<UShort> operator++(UShort &val, int)   // Post-increment
2471         {
2472                 RValue<UShort> res = val;
2473                 val += UShort(1);
2474                 return res;
2475         }
2476
2477         const UShort &operator++(UShort &val)   // Pre-increment
2478         {
2479                 val += UShort(1);
2480                 return val;
2481         }
2482
2483         RValue<UShort> operator--(UShort &val, int)   // Post-decrement
2484         {
2485                 RValue<UShort> res = val;
2486                 val -= UShort(1);
2487                 return res;
2488         }
2489
2490         const UShort &operator--(UShort &val)   // Pre-decrement
2491         {
2492                 val -= UShort(1);
2493                 return val;
2494         }
2495
2496         RValue<Bool> operator<(RValue<UShort> lhs, RValue<UShort> rhs)
2497         {
2498                 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
2499         }
2500
2501         RValue<Bool> operator<=(RValue<UShort> lhs, RValue<UShort> rhs)
2502         {
2503                 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
2504         }
2505
2506         RValue<Bool> operator>(RValue<UShort> lhs, RValue<UShort> rhs)
2507         {
2508                 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
2509         }
2510
2511         RValue<Bool> operator>=(RValue<UShort> lhs, RValue<UShort> rhs)
2512         {
2513                 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
2514         }
2515
2516         RValue<Bool> operator!=(RValue<UShort> lhs, RValue<UShort> rhs)
2517         {
2518                 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
2519         }
2520
2521         RValue<Bool> operator==(RValue<UShort> lhs, RValue<UShort> rhs)
2522         {
2523                 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
2524         }
2525
2526         Type *UShort::getType()
2527         {
2528                 return T(Ice::IceType_i16);
2529         }
2530
2531         Byte4::Byte4(RValue<Byte8> cast)
2532         {
2533                 storeValue(Nucleus::createBitCast(cast.value, getType()));
2534         }
2535
2536         Byte4::Byte4(const Reference<Byte4> &rhs)
2537         {
2538                 Value *value = rhs.loadValue();
2539                 storeValue(value);
2540         }
2541
2542         Type *Byte4::getType()
2543         {
2544                 return T(Type_v4i8);
2545         }
2546
2547         Type *SByte4::getType()
2548         {
2549                 return T(Type_v4i8);
2550         }
2551
2552         Byte8::Byte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7)
2553         {
2554                 int64_t constantVector[8] = {x0, x1, x2, x3, x4, x5, x6, x7};
2555                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
2556         }
2557
2558         Byte8::Byte8(RValue<Byte8> rhs)
2559         {
2560                 storeValue(rhs.value);
2561         }
2562
2563         Byte8::Byte8(const Byte8 &rhs)
2564         {
2565                 Value *value = rhs.loadValue();
2566                 storeValue(value);
2567         }
2568
2569         Byte8::Byte8(const Reference<Byte8> &rhs)
2570         {
2571                 Value *value = rhs.loadValue();
2572                 storeValue(value);
2573         }
2574
2575         RValue<Byte8> Byte8::operator=(RValue<Byte8> rhs)
2576         {
2577                 storeValue(rhs.value);
2578
2579                 return rhs;
2580         }
2581
2582         RValue<Byte8> Byte8::operator=(const Byte8 &rhs)
2583         {
2584                 Value *value = rhs.loadValue();
2585                 storeValue(value);
2586
2587                 return RValue<Byte8>(value);
2588         }
2589
2590         RValue<Byte8> Byte8::operator=(const Reference<Byte8> &rhs)
2591         {
2592                 Value *value = rhs.loadValue();
2593                 storeValue(value);
2594
2595                 return RValue<Byte8>(value);
2596         }
2597
2598         RValue<Byte8> operator+(RValue<Byte8> lhs, RValue<Byte8> rhs)
2599         {
2600                 return RValue<Byte8>(Nucleus::createAdd(lhs.value, rhs.value));
2601         }
2602
2603         RValue<Byte8> operator-(RValue<Byte8> lhs, RValue<Byte8> rhs)
2604         {
2605                 return RValue<Byte8>(Nucleus::createSub(lhs.value, rhs.value));
2606         }
2607
2608 //      RValue<Byte8> operator*(RValue<Byte8> lhs, RValue<Byte8> rhs)
2609 //      {
2610 //              return RValue<Byte8>(Nucleus::createMul(lhs.value, rhs.value));
2611 //      }
2612
2613 //      RValue<Byte8> operator/(RValue<Byte8> lhs, RValue<Byte8> rhs)
2614 //      {
2615 //              return RValue<Byte8>(Nucleus::createUDiv(lhs.value, rhs.value));
2616 //      }
2617
2618 //      RValue<Byte8> operator%(RValue<Byte8> lhs, RValue<Byte8> rhs)
2619 //      {
2620 //              return RValue<Byte8>(Nucleus::createURem(lhs.value, rhs.value));
2621 //      }
2622
2623         RValue<Byte8> operator&(RValue<Byte8> lhs, RValue<Byte8> rhs)
2624         {
2625                 return RValue<Byte8>(Nucleus::createAnd(lhs.value, rhs.value));
2626         }
2627
2628         RValue<Byte8> operator|(RValue<Byte8> lhs, RValue<Byte8> rhs)
2629         {
2630                 return RValue<Byte8>(Nucleus::createOr(lhs.value, rhs.value));
2631         }
2632
2633         RValue<Byte8> operator^(RValue<Byte8> lhs, RValue<Byte8> rhs)
2634         {
2635                 return RValue<Byte8>(Nucleus::createXor(lhs.value, rhs.value));
2636         }
2637
2638 //      RValue<Byte8> operator<<(RValue<Byte8> lhs, unsigned char rhs)
2639 //      {
2640 //              return RValue<Byte8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2641 //      }
2642
2643 //      RValue<Byte8> operator>>(RValue<Byte8> lhs, unsigned char rhs)
2644 //      {
2645 //              return RValue<Byte8>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
2646 //      }
2647
2648         RValue<Byte8> operator+=(Byte8 &lhs, RValue<Byte8> rhs)
2649         {
2650                 return lhs = lhs + rhs;
2651         }
2652
2653         RValue<Byte8> operator-=(Byte8 &lhs, RValue<Byte8> rhs)
2654         {
2655                 return lhs = lhs - rhs;
2656         }
2657
2658 //      RValue<Byte8> operator*=(Byte8 &lhs, RValue<Byte8> rhs)
2659 //      {
2660 //              return lhs = lhs * rhs;
2661 //      }
2662
2663 //      RValue<Byte8> operator/=(Byte8 &lhs, RValue<Byte8> rhs)
2664 //      {
2665 //              return lhs = lhs / rhs;
2666 //      }
2667
2668 //      RValue<Byte8> operator%=(Byte8 &lhs, RValue<Byte8> rhs)
2669 //      {
2670 //              return lhs = lhs % rhs;
2671 //      }
2672
2673         RValue<Byte8> operator&=(Byte8 &lhs, RValue<Byte8> rhs)
2674         {
2675                 return lhs = lhs & rhs;
2676         }
2677
2678         RValue<Byte8> operator|=(Byte8 &lhs, RValue<Byte8> rhs)
2679         {
2680                 return lhs = lhs | rhs;
2681         }
2682
2683         RValue<Byte8> operator^=(Byte8 &lhs, RValue<Byte8> rhs)
2684         {
2685                 return lhs = lhs ^ rhs;
2686         }
2687
2688 //      RValue<Byte8> operator<<=(Byte8 &lhs, RValue<Byte8> rhs)
2689 //      {
2690 //              return lhs = lhs << rhs;
2691 //      }
2692
2693 //      RValue<Byte8> operator>>=(Byte8 &lhs, RValue<Byte8> rhs)
2694 //      {
2695 //              return lhs = lhs >> rhs;
2696 //      }
2697
2698 //      RValue<Byte8> operator+(RValue<Byte8> val)
2699 //      {
2700 //              return val;
2701 //      }
2702
2703 //      RValue<Byte8> operator-(RValue<Byte8> val)
2704 //      {
2705 //              return RValue<Byte8>(Nucleus::createNeg(val.value));
2706 //      }
2707
2708         RValue<Byte8> operator~(RValue<Byte8> val)
2709         {
2710                 return RValue<Byte8>(Nucleus::createNot(val.value));
2711         }
2712
2713         RValue<Byte> Extract(RValue<Byte8> val, int i)
2714         {
2715                 return RValue<Byte>(Nucleus::createExtractElement(val.value, Byte::getType(), i));
2716         }
2717
2718         RValue<Byte8> Insert(RValue<Byte8> val, RValue<Byte> element, int i)
2719         {
2720                 return RValue<Byte8>(Nucleus::createInsertElement(val.value, element.value, i));
2721         }
2722
2723         RValue<Byte> Saturate(RValue<UShort> x)
2724         {
2725                 return Byte(IfThenElse(Int(x) > 0xFF, Int(0xFF), Int(x)));
2726         }
2727
2728         RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
2729         {
2730                 if(emulateIntrinsics)
2731                 {
2732                         Byte8 result;
2733                         result = Insert(result, Saturate(UShort(Int(Extract(x, 0))) + UShort(Int(Extract(y, 0)))), 0);
2734                         result = Insert(result, Saturate(UShort(Int(Extract(x, 1))) + UShort(Int(Extract(y, 1)))), 1);
2735                         result = Insert(result, Saturate(UShort(Int(Extract(x, 2))) + UShort(Int(Extract(y, 2)))), 2);
2736                         result = Insert(result, Saturate(UShort(Int(Extract(x, 3))) + UShort(Int(Extract(y, 3)))), 3);
2737                         result = Insert(result, Saturate(UShort(Int(Extract(x, 4))) + UShort(Int(Extract(y, 4)))), 4);
2738                         result = Insert(result, Saturate(UShort(Int(Extract(x, 5))) + UShort(Int(Extract(y, 5)))), 5);
2739                         result = Insert(result, Saturate(UShort(Int(Extract(x, 6))) + UShort(Int(Extract(y, 6)))), 6);
2740                         result = Insert(result, Saturate(UShort(Int(Extract(x, 7))) + UShort(Int(Extract(y, 7)))), 7);
2741
2742                         return result;
2743                 }
2744                 else
2745                 {
2746                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
2747                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2748                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
2749                         auto paddusb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2750                         paddusb->addArg(x.value);
2751                         paddusb->addArg(y.value);
2752                         ::basicBlock->appendInst(paddusb);
2753
2754                         return RValue<Byte8>(V(result));
2755                 }
2756         }
2757
2758         RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
2759         {
2760                 if(emulateIntrinsics)
2761                 {
2762                         Byte8 result;
2763                         result = Insert(result, Saturate(UShort(Int(Extract(x, 0))) - UShort(Int(Extract(y, 0)))), 0);
2764                         result = Insert(result, Saturate(UShort(Int(Extract(x, 1))) - UShort(Int(Extract(y, 1)))), 1);
2765                         result = Insert(result, Saturate(UShort(Int(Extract(x, 2))) - UShort(Int(Extract(y, 2)))), 2);
2766                         result = Insert(result, Saturate(UShort(Int(Extract(x, 3))) - UShort(Int(Extract(y, 3)))), 3);
2767                         result = Insert(result, Saturate(UShort(Int(Extract(x, 4))) - UShort(Int(Extract(y, 4)))), 4);
2768                         result = Insert(result, Saturate(UShort(Int(Extract(x, 5))) - UShort(Int(Extract(y, 5)))), 5);
2769                         result = Insert(result, Saturate(UShort(Int(Extract(x, 6))) - UShort(Int(Extract(y, 6)))), 6);
2770                         result = Insert(result, Saturate(UShort(Int(Extract(x, 7))) - UShort(Int(Extract(y, 7)))), 7);
2771
2772                         return result;
2773                 }
2774                 else
2775                 {
2776                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
2777                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2778                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
2779                         auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2780                         psubusw->addArg(x.value);
2781                         psubusw->addArg(y.value);
2782                         ::basicBlock->appendInst(psubusw);
2783
2784                         return RValue<Byte8>(V(result));
2785                 }
2786         }
2787
2788         RValue<Short4> Unpack(RValue<Byte4> x)
2789         {
2790                 int shuffle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7};   // Real type is v16i8
2791                 return As<Short4>(Nucleus::createShuffleVector(x.value, x.value, shuffle));
2792         }
2793
2794         RValue<Short4> Unpack(RValue<Byte4> x, RValue<Byte4> y)
2795         {
2796                 return UnpackLow(As<Byte8>(x), As<Byte8>(y));
2797         }
2798
2799         RValue<Short4> UnpackLow(RValue<Byte8> x, RValue<Byte8> y)
2800         {
2801                 int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};   // Real type is v16i8
2802                 return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
2803         }
2804
2805         RValue<Short4> UnpackHigh(RValue<Byte8> x, RValue<Byte8> y)
2806         {
2807                 int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};   // Real type is v16i8
2808                 auto lowHigh = RValue<Byte16>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
2809                 return As<Short4>(Swizzle(As<Int4>(lowHigh), 0xEE));
2810         }
2811
2812         RValue<SByte> Extract(RValue<SByte8> val, int i)
2813         {
2814                 return RValue<SByte>(Nucleus::createExtractElement(val.value, SByte::getType(), i));
2815         }
2816
2817         RValue<SByte8> Insert(RValue<SByte8> val, RValue<SByte> element, int i)
2818         {
2819                 return RValue<SByte8>(Nucleus::createInsertElement(val.value, element.value, i));
2820         }
2821
2822         RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2823         {
2824                 if(emulateIntrinsics)
2825                 {
2826                         SByte8 result;
2827                         result = Insert(result, Extract(lhs, 0) >> SByte(rhs), 0);
2828                         result = Insert(result, Extract(lhs, 1) >> SByte(rhs), 1);
2829                         result = Insert(result, Extract(lhs, 2) >> SByte(rhs), 2);
2830                         result = Insert(result, Extract(lhs, 3) >> SByte(rhs), 3);
2831                         result = Insert(result, Extract(lhs, 4) >> SByte(rhs), 4);
2832                         result = Insert(result, Extract(lhs, 5) >> SByte(rhs), 5);
2833                         result = Insert(result, Extract(lhs, 6) >> SByte(rhs), 6);
2834                         result = Insert(result, Extract(lhs, 7) >> SByte(rhs), 7);
2835
2836                         return result;
2837                 }
2838                 else
2839                 {
2840                         #if defined(__i386__) || defined(__x86_64__)
2841                                 // SSE2 doesn't support byte vector shifts, so shift as shorts and recombine.
2842                                 RValue<Short4> hi = (As<Short4>(lhs) >> rhs) & Short4(0xFF00);
2843                                 RValue<Short4> lo = As<Short4>(As<UShort4>((As<Short4>(lhs) << 8) >> rhs) >> 8);
2844
2845                                 return As<SByte8>(hi | lo);
2846                         #else
2847                                 return RValue<SByte8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
2848                         #endif
2849                 }
2850         }
2851
2852         RValue<Int> SignMask(RValue<Byte8> x)
2853         {
2854                 if(emulateIntrinsics)
2855                 {
2856                         Byte8 xx = As<Byte8>(As<SByte8>(x) >> 7) & Byte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
2857                         return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
2858                 }
2859                 else
2860                 {
2861                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
2862                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2863                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
2864                         auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
2865                         movmsk->addArg(x.value);
2866                         ::basicBlock->appendInst(movmsk);
2867
2868                         return RValue<Int>(V(result));
2869                 }
2870         }
2871
2872 //      RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
2873 //      {
2874 //              return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Ugt, x.value, y.value));
2875 //      }
2876
2877         RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
2878         {
2879                 return RValue<Byte8>(Nucleus::createICmpEQ(x.value, y.value));
2880         }
2881
2882         Type *Byte8::getType()
2883         {
2884                 return T(Type_v8i8);
2885         }
2886
2887         SByte8::SByte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7)
2888         {
2889                 int64_t constantVector[8] = { x0, x1, x2, x3, x4, x5, x6, x7 };
2890                 Value *vector = V(Nucleus::createConstantVector(constantVector, getType()));
2891
2892                 storeValue(Nucleus::createBitCast(vector, getType()));
2893         }
2894
2895         SByte8::SByte8(RValue<SByte8> rhs)
2896         {
2897                 storeValue(rhs.value);
2898         }
2899
2900         SByte8::SByte8(const SByte8 &rhs)
2901         {
2902                 Value *value = rhs.loadValue();
2903                 storeValue(value);
2904         }
2905
2906         SByte8::SByte8(const Reference<SByte8> &rhs)
2907         {
2908                 Value *value = rhs.loadValue();
2909                 storeValue(value);
2910         }
2911
2912         RValue<SByte8> SByte8::operator=(RValue<SByte8> rhs)
2913         {
2914                 storeValue(rhs.value);
2915
2916                 return rhs;
2917         }
2918
2919         RValue<SByte8> SByte8::operator=(const SByte8 &rhs)
2920         {
2921                 Value *value = rhs.loadValue();
2922                 storeValue(value);
2923
2924                 return RValue<SByte8>(value);
2925         }
2926
2927         RValue<SByte8> SByte8::operator=(const Reference<SByte8> &rhs)
2928         {
2929                 Value *value = rhs.loadValue();
2930                 storeValue(value);
2931
2932                 return RValue<SByte8>(value);
2933         }
2934
2935         RValue<SByte8> operator+(RValue<SByte8> lhs, RValue<SByte8> rhs)
2936         {
2937                 return RValue<SByte8>(Nucleus::createAdd(lhs.value, rhs.value));
2938         }
2939
2940         RValue<SByte8> operator-(RValue<SByte8> lhs, RValue<SByte8> rhs)
2941         {
2942                 return RValue<SByte8>(Nucleus::createSub(lhs.value, rhs.value));
2943         }
2944
2945 //      RValue<SByte8> operator*(RValue<SByte8> lhs, RValue<SByte8> rhs)
2946 //      {
2947 //              return RValue<SByte8>(Nucleus::createMul(lhs.value, rhs.value));
2948 //      }
2949
2950 //      RValue<SByte8> operator/(RValue<SByte8> lhs, RValue<SByte8> rhs)
2951 //      {
2952 //              return RValue<SByte8>(Nucleus::createSDiv(lhs.value, rhs.value));
2953 //      }
2954
2955 //      RValue<SByte8> operator%(RValue<SByte8> lhs, RValue<SByte8> rhs)
2956 //      {
2957 //              return RValue<SByte8>(Nucleus::createSRem(lhs.value, rhs.value));
2958 //      }
2959
2960         RValue<SByte8> operator&(RValue<SByte8> lhs, RValue<SByte8> rhs)
2961         {
2962                 return RValue<SByte8>(Nucleus::createAnd(lhs.value, rhs.value));
2963         }
2964
2965         RValue<SByte8> operator|(RValue<SByte8> lhs, RValue<SByte8> rhs)
2966         {
2967                 return RValue<SByte8>(Nucleus::createOr(lhs.value, rhs.value));
2968         }
2969
2970         RValue<SByte8> operator^(RValue<SByte8> lhs, RValue<SByte8> rhs)
2971         {
2972                 return RValue<SByte8>(Nucleus::createXor(lhs.value, rhs.value));
2973         }
2974
2975 //      RValue<SByte8> operator<<(RValue<SByte8> lhs, unsigned char rhs)
2976 //      {
2977 //              return RValue<SByte8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2978 //      }
2979
2980 //      RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2981 //      {
2982 //              return RValue<SByte8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
2983 //      }
2984
2985         RValue<SByte8> operator+=(SByte8 &lhs, RValue<SByte8> rhs)
2986         {
2987                 return lhs = lhs + rhs;
2988         }
2989
2990         RValue<SByte8> operator-=(SByte8 &lhs, RValue<SByte8> rhs)
2991         {
2992                 return lhs = lhs - rhs;
2993         }
2994
2995 //      RValue<SByte8> operator*=(SByte8 &lhs, RValue<SByte8> rhs)
2996 //      {
2997 //              return lhs = lhs * rhs;
2998 //      }
2999
3000 //      RValue<SByte8> operator/=(SByte8 &lhs, RValue<SByte8> rhs)
3001 //      {
3002 //              return lhs = lhs / rhs;
3003 //      }
3004
3005 //      RValue<SByte8> operator%=(SByte8 &lhs, RValue<SByte8> rhs)
3006 //      {
3007 //              return lhs = lhs % rhs;
3008 //      }
3009
3010         RValue<SByte8> operator&=(SByte8 &lhs, RValue<SByte8> rhs)
3011         {
3012                 return lhs = lhs & rhs;
3013         }
3014
3015         RValue<SByte8> operator|=(SByte8 &lhs, RValue<SByte8> rhs)
3016         {
3017                 return lhs = lhs | rhs;
3018         }
3019
3020         RValue<SByte8> operator^=(SByte8 &lhs, RValue<SByte8> rhs)
3021         {
3022                 return lhs = lhs ^ rhs;
3023         }
3024
3025 //      RValue<SByte8> operator<<=(SByte8 &lhs, RValue<SByte8> rhs)
3026 //      {
3027 //              return lhs = lhs << rhs;
3028 //      }
3029
3030 //      RValue<SByte8> operator>>=(SByte8 &lhs, RValue<SByte8> rhs)
3031 //      {
3032 //              return lhs = lhs >> rhs;
3033 //      }
3034
3035 //      RValue<SByte8> operator+(RValue<SByte8> val)
3036 //      {
3037 //              return val;
3038 //      }
3039
3040 //      RValue<SByte8> operator-(RValue<SByte8> val)
3041 //      {
3042 //              return RValue<SByte8>(Nucleus::createNeg(val.value));
3043 //      }
3044
3045         RValue<SByte8> operator~(RValue<SByte8> val)
3046         {
3047                 return RValue<SByte8>(Nucleus::createNot(val.value));
3048         }
3049
3050         RValue<SByte> Saturate(RValue<Short> x)
3051         {
3052                 return SByte(IfThenElse(Int(x) > 0x7F, Int(0x7F), IfThenElse(Int(x) < -0x80, Int(0x80), Int(x))));
3053         }
3054
3055         RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
3056         {
3057                 if(emulateIntrinsics)
3058                 {
3059                         SByte8 result;
3060                         result = Insert(result, Saturate(Short(Int(Extract(x, 0))) + Short(Int(Extract(y, 0)))), 0);
3061                         result = Insert(result, Saturate(Short(Int(Extract(x, 1))) + Short(Int(Extract(y, 1)))), 1);
3062                         result = Insert(result, Saturate(Short(Int(Extract(x, 2))) + Short(Int(Extract(y, 2)))), 2);
3063                         result = Insert(result, Saturate(Short(Int(Extract(x, 3))) + Short(Int(Extract(y, 3)))), 3);
3064                         result = Insert(result, Saturate(Short(Int(Extract(x, 4))) + Short(Int(Extract(y, 4)))), 4);
3065                         result = Insert(result, Saturate(Short(Int(Extract(x, 5))) + Short(Int(Extract(y, 5)))), 5);
3066                         result = Insert(result, Saturate(Short(Int(Extract(x, 6))) + Short(Int(Extract(y, 6)))), 6);
3067                         result = Insert(result, Saturate(Short(Int(Extract(x, 7))) + Short(Int(Extract(y, 7)))), 7);
3068
3069                         return result;
3070                 }
3071                 else
3072                 {
3073                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
3074                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3075                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
3076                         auto paddsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3077                         paddsb->addArg(x.value);
3078                         paddsb->addArg(y.value);
3079                         ::basicBlock->appendInst(paddsb);
3080
3081                         return RValue<SByte8>(V(result));
3082                 }
3083         }
3084
3085         RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
3086         {
3087                 if(emulateIntrinsics)
3088                 {
3089                         SByte8 result;
3090                         result = Insert(result, Saturate(Short(Int(Extract(x, 0))) - Short(Int(Extract(y, 0)))), 0);
3091                         result = Insert(result, Saturate(Short(Int(Extract(x, 1))) - Short(Int(Extract(y, 1)))), 1);
3092                         result = Insert(result, Saturate(Short(Int(Extract(x, 2))) - Short(Int(Extract(y, 2)))), 2);
3093                         result = Insert(result, Saturate(Short(Int(Extract(x, 3))) - Short(Int(Extract(y, 3)))), 3);
3094                         result = Insert(result, Saturate(Short(Int(Extract(x, 4))) - Short(Int(Extract(y, 4)))), 4);
3095                         result = Insert(result, Saturate(Short(Int(Extract(x, 5))) - Short(Int(Extract(y, 5)))), 5);
3096                         result = Insert(result, Saturate(Short(Int(Extract(x, 6))) - Short(Int(Extract(y, 6)))), 6);
3097                         result = Insert(result, Saturate(Short(Int(Extract(x, 7))) - Short(Int(Extract(y, 7)))), 7);
3098
3099                         return result;
3100                 }
3101                 else
3102                 {
3103                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
3104                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3105                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
3106                         auto psubsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3107                         psubsb->addArg(x.value);
3108                         psubsb->addArg(y.value);
3109                         ::basicBlock->appendInst(psubsb);
3110
3111                         return RValue<SByte8>(V(result));
3112                 }
3113         }
3114
3115         RValue<Short4> UnpackLow(RValue<SByte8> x, RValue<SByte8> y)
3116         {
3117                 int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};   // Real type is v16i8
3118                 return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
3119         }
3120
3121         RValue<Short4> UnpackHigh(RValue<SByte8> x, RValue<SByte8> y)
3122         {
3123                 int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};   // Real type is v16i8
3124                 auto lowHigh = RValue<Byte16>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
3125                 return As<Short4>(Swizzle(As<Int4>(lowHigh), 0xEE));
3126         }
3127
3128         RValue<Int> SignMask(RValue<SByte8> x)
3129         {
3130                 if(emulateIntrinsics)
3131                 {
3132                         SByte8 xx = (x >> 7) & SByte8(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
3133                         return Int(Extract(xx, 0)) | Int(Extract(xx, 1)) | Int(Extract(xx, 2)) | Int(Extract(xx, 3)) | Int(Extract(xx, 4)) | Int(Extract(xx, 5)) | Int(Extract(xx, 6)) | Int(Extract(xx, 7));
3134                 }
3135                 else
3136                 {
3137                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
3138                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3139                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
3140                         auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
3141                         movmsk->addArg(x.value);
3142                         ::basicBlock->appendInst(movmsk);
3143
3144                         return RValue<Int>(V(result));
3145                 }
3146         }
3147
3148         RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
3149         {
3150                 return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Sgt, x.value, y.value));
3151         }
3152
3153         RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
3154         {
3155                 return RValue<Byte8>(Nucleus::createICmpEQ(x.value, y.value));
3156         }
3157
3158         Type *SByte8::getType()
3159         {
3160                 return T(Type_v8i8);
3161         }
3162
3163         Byte16::Byte16(RValue<Byte16> rhs)
3164         {
3165                 storeValue(rhs.value);
3166         }
3167
3168         Byte16::Byte16(const Byte16 &rhs)
3169         {
3170                 Value *value = rhs.loadValue();
3171                 storeValue(value);
3172         }
3173
3174         Byte16::Byte16(const Reference<Byte16> &rhs)
3175         {
3176                 Value *value = rhs.loadValue();
3177                 storeValue(value);
3178         }
3179
3180         RValue<Byte16> Byte16::operator=(RValue<Byte16> rhs)
3181         {
3182                 storeValue(rhs.value);
3183
3184                 return rhs;
3185         }
3186
3187         RValue<Byte16> Byte16::operator=(const Byte16 &rhs)
3188         {
3189                 Value *value = rhs.loadValue();
3190                 storeValue(value);
3191
3192                 return RValue<Byte16>(value);
3193         }
3194
3195         RValue<Byte16> Byte16::operator=(const Reference<Byte16> &rhs)
3196         {
3197                 Value *value = rhs.loadValue();
3198                 storeValue(value);
3199
3200                 return RValue<Byte16>(value);
3201         }
3202
3203         Type *Byte16::getType()
3204         {
3205                 return T(Ice::IceType_v16i8);
3206         }
3207
3208         Type *SByte16::getType()
3209         {
3210                 return T(Ice::IceType_v16i8);
3211         }
3212
3213         Short2::Short2(RValue<Short4> cast)
3214         {
3215                 storeValue(Nucleus::createBitCast(cast.value, getType()));
3216         }
3217
3218         Type *Short2::getType()
3219         {
3220                 return T(Type_v2i16);
3221         }
3222
3223         UShort2::UShort2(RValue<UShort4> cast)
3224         {
3225                 storeValue(Nucleus::createBitCast(cast.value, getType()));
3226         }
3227
3228         Type *UShort2::getType()
3229         {
3230                 return T(Type_v2i16);
3231         }
3232
3233         Short4::Short4(RValue<Int> cast)
3234         {
3235                 Value *vector = loadValue();
3236                 Value *element = Nucleus::createTrunc(cast.value, Short::getType());
3237                 Value *insert = Nucleus::createInsertElement(vector, element, 0);
3238                 Value *swizzle = Swizzle(RValue<Short4>(insert), 0x00).value;
3239
3240                 storeValue(swizzle);
3241         }
3242
3243         Short4::Short4(RValue<Int4> cast)
3244         {
3245                 int select[8] = {0, 2, 4, 6, 0, 2, 4, 6};
3246                 Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType());
3247                 Value *packed = Nucleus::createShuffleVector(short8, short8, select);
3248
3249                 Value *int2 = RValue<Int2>(Int2(As<Int4>(packed))).value;
3250                 Value *short4 = Nucleus::createBitCast(int2, Short4::getType());
3251
3252                 storeValue(short4);
3253         }
3254
3255 //      Short4::Short4(RValue<Float> cast)
3256 //      {
3257 //      }
3258
3259         Short4::Short4(RValue<Float4> cast)
3260         {
3261                 assert(false && "UNIMPLEMENTED");
3262         }
3263
3264         Short4::Short4(short xyzw)
3265         {
3266                 int64_t constantVector[4] = {xyzw, xyzw, xyzw, xyzw};
3267                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3268         }
3269
3270         Short4::Short4(short x, short y, short z, short w)
3271         {
3272                 int64_t constantVector[4] = {x, y, z, w};
3273                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3274         }
3275
3276         Short4::Short4(RValue<Short4> rhs)
3277         {
3278                 storeValue(rhs.value);
3279         }
3280
3281         Short4::Short4(const Short4 &rhs)
3282         {
3283                 Value *value = rhs.loadValue();
3284                 storeValue(value);
3285         }
3286
3287         Short4::Short4(const Reference<Short4> &rhs)
3288         {
3289                 Value *value = rhs.loadValue();
3290                 storeValue(value);
3291         }
3292
3293         Short4::Short4(RValue<UShort4> rhs)
3294         {
3295                 storeValue(rhs.value);
3296         }
3297
3298         Short4::Short4(const UShort4 &rhs)
3299         {
3300                 storeValue(rhs.loadValue());
3301         }
3302
3303         Short4::Short4(const Reference<UShort4> &rhs)
3304         {
3305                 storeValue(rhs.loadValue());
3306         }
3307
3308         RValue<Short4> Short4::operator=(RValue<Short4> rhs)
3309         {
3310                 storeValue(rhs.value);
3311
3312                 return rhs;
3313         }
3314
3315         RValue<Short4> Short4::operator=(const Short4 &rhs)
3316         {
3317                 Value *value = rhs.loadValue();
3318                 storeValue(value);
3319
3320                 return RValue<Short4>(value);
3321         }
3322
3323         RValue<Short4> Short4::operator=(const Reference<Short4> &rhs)
3324         {
3325                 Value *value = rhs.loadValue();
3326                 storeValue(value);
3327
3328                 return RValue<Short4>(value);
3329         }
3330
3331         RValue<Short4> Short4::operator=(RValue<UShort4> rhs)
3332         {
3333                 storeValue(rhs.value);
3334
3335                 return RValue<Short4>(rhs);
3336         }
3337
3338         RValue<Short4> Short4::operator=(const UShort4 &rhs)
3339         {
3340                 Value *value = rhs.loadValue();
3341                 storeValue(value);
3342
3343                 return RValue<Short4>(value);
3344         }
3345
3346         RValue<Short4> Short4::operator=(const Reference<UShort4> &rhs)
3347         {
3348                 Value *value = rhs.loadValue();
3349                 storeValue(value);
3350
3351                 return RValue<Short4>(value);
3352         }
3353
3354         RValue<Short4> operator+(RValue<Short4> lhs, RValue<Short4> rhs)
3355         {
3356                 return RValue<Short4>(Nucleus::createAdd(lhs.value, rhs.value));
3357         }
3358
3359         RValue<Short4> operator-(RValue<Short4> lhs, RValue<Short4> rhs)
3360         {
3361                 return RValue<Short4>(Nucleus::createSub(lhs.value, rhs.value));
3362         }
3363
3364         RValue<Short4> operator*(RValue<Short4> lhs, RValue<Short4> rhs)
3365         {
3366                 return RValue<Short4>(Nucleus::createMul(lhs.value, rhs.value));
3367         }
3368
3369 //      RValue<Short4> operator/(RValue<Short4> lhs, RValue<Short4> rhs)
3370 //      {
3371 //              return RValue<Short4>(Nucleus::createSDiv(lhs.value, rhs.value));
3372 //      }
3373
3374 //      RValue<Short4> operator%(RValue<Short4> lhs, RValue<Short4> rhs)
3375 //      {
3376 //              return RValue<Short4>(Nucleus::createSRem(lhs.value, rhs.value));
3377 //      }
3378
3379         RValue<Short4> operator&(RValue<Short4> lhs, RValue<Short4> rhs)
3380         {
3381                 return RValue<Short4>(Nucleus::createAnd(lhs.value, rhs.value));
3382         }
3383
3384         RValue<Short4> operator|(RValue<Short4> lhs, RValue<Short4> rhs)
3385         {
3386                 return RValue<Short4>(Nucleus::createOr(lhs.value, rhs.value));
3387         }
3388
3389         RValue<Short4> operator^(RValue<Short4> lhs, RValue<Short4> rhs)
3390         {
3391                 return RValue<Short4>(Nucleus::createXor(lhs.value, rhs.value));
3392         }
3393
3394         RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
3395         {
3396                 if(emulateIntrinsics)
3397                 {
3398                         Short4 result;
3399                         result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
3400                         result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
3401                         result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
3402                         result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
3403
3404                         return result;
3405                 }
3406                 else
3407                 {
3408                         return RValue<Short4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
3409                 }
3410         }
3411
3412         RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
3413         {
3414                 if(emulateIntrinsics)
3415                 {
3416                         Short4 result;
3417                         result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
3418                         result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
3419                         result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
3420                         result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
3421
3422                         return result;
3423                 }
3424                 else
3425                 {
3426                         return RValue<Short4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
3427                 }
3428         }
3429
3430         RValue<Short4> operator+=(Short4 &lhs, RValue<Short4> rhs)
3431         {
3432                 return lhs = lhs + rhs;
3433         }
3434
3435         RValue<Short4> operator-=(Short4 &lhs, RValue<Short4> rhs)
3436         {
3437                 return lhs = lhs - rhs;
3438         }
3439
3440         RValue<Short4> operator*=(Short4 &lhs, RValue<Short4> rhs)
3441         {
3442                 return lhs = lhs * rhs;
3443         }
3444
3445 //      RValue<Short4> operator/=(Short4 &lhs, RValue<Short4> rhs)
3446 //      {
3447 //              return lhs = lhs / rhs;
3448 //      }
3449
3450 //      RValue<Short4> operator%=(Short4 &lhs, RValue<Short4> rhs)
3451 //      {
3452 //              return lhs = lhs % rhs;
3453 //      }
3454
3455         RValue<Short4> operator&=(Short4 &lhs, RValue<Short4> rhs)
3456         {
3457                 return lhs = lhs & rhs;
3458         }
3459
3460         RValue<Short4> operator|=(Short4 &lhs, RValue<Short4> rhs)
3461         {
3462                 return lhs = lhs | rhs;
3463         }
3464
3465         RValue<Short4> operator^=(Short4 &lhs, RValue<Short4> rhs)
3466         {
3467                 return lhs = lhs ^ rhs;
3468         }
3469
3470         RValue<Short4> operator<<=(Short4 &lhs, unsigned char rhs)
3471         {
3472                 return lhs = lhs << rhs;
3473         }
3474
3475         RValue<Short4> operator>>=(Short4 &lhs, unsigned char rhs)
3476         {
3477                 return lhs = lhs >> rhs;
3478         }
3479
3480 //      RValue<Short4> operator+(RValue<Short4> val)
3481 //      {
3482 //              return val;
3483 //      }
3484
3485         RValue<Short4> operator-(RValue<Short4> val)
3486         {
3487                 return RValue<Short4>(Nucleus::createNeg(val.value));
3488         }
3489
3490         RValue<Short4> operator~(RValue<Short4> val)
3491         {
3492                 return RValue<Short4>(Nucleus::createNot(val.value));
3493         }
3494
3495         RValue<Short4> RoundShort4(RValue<Float4> cast)
3496         {
3497                 RValue<Int4> int4 = RoundInt(cast);
3498                 return As<Short4>(Pack(int4, int4));
3499         }
3500
3501         RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
3502         {
3503                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
3504                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value, y.value);
3505                 ::basicBlock->appendInst(cmp);
3506
3507                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3508                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3509                 ::basicBlock->appendInst(select);
3510
3511                 return RValue<Short4>(V(result));
3512         }
3513
3514         RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
3515         {
3516                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
3517                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value, y.value);
3518                 ::basicBlock->appendInst(cmp);
3519
3520                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3521                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3522                 ::basicBlock->appendInst(select);
3523
3524                 return RValue<Short4>(V(result));
3525         }
3526
3527         RValue<Short> Saturate(RValue<Int> x)
3528         {
3529                 return Short(IfThenElse(x > 0x7FFF, Int(0x7FFF), IfThenElse(x < -0x8000, Int(0x8000), x)));
3530         }
3531
3532         RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
3533         {
3534                 if(emulateIntrinsics)
3535                 {
3536                         Short4 result;
3537                         result = Insert(result, Saturate(Int(Extract(x, 0)) + Int(Extract(y, 0))), 0);
3538                         result = Insert(result, Saturate(Int(Extract(x, 1)) + Int(Extract(y, 1))), 1);
3539                         result = Insert(result, Saturate(Int(Extract(x, 2)) + Int(Extract(y, 2))), 2);
3540                         result = Insert(result, Saturate(Int(Extract(x, 3)) + Int(Extract(y, 3))), 3);
3541
3542                         return result;
3543                 }
3544                 else
3545                 {
3546                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3547                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3548                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
3549                         auto paddsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3550                         paddsw->addArg(x.value);
3551                         paddsw->addArg(y.value);
3552                         ::basicBlock->appendInst(paddsw);
3553
3554                         return RValue<Short4>(V(result));
3555                 }
3556         }
3557
3558         RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
3559         {
3560                 if(emulateIntrinsics)
3561                 {
3562                         Short4 result;
3563                         result = Insert(result, Saturate(Int(Extract(x, 0)) - Int(Extract(y, 0))), 0);
3564                         result = Insert(result, Saturate(Int(Extract(x, 1)) - Int(Extract(y, 1))), 1);
3565                         result = Insert(result, Saturate(Int(Extract(x, 2)) - Int(Extract(y, 2))), 2);
3566                         result = Insert(result, Saturate(Int(Extract(x, 3)) - Int(Extract(y, 3))), 3);
3567
3568                         return result;
3569                 }
3570                 else
3571                 {
3572                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3573                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3574                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
3575                         auto psubsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3576                         psubsw->addArg(x.value);
3577                         psubsw->addArg(y.value);
3578                         ::basicBlock->appendInst(psubsw);
3579
3580                         return RValue<Short4>(V(result));
3581                 }
3582         }
3583
3584         RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
3585         {
3586                 if(emulateIntrinsics)
3587                 {
3588                         Short4 result;
3589                         result = Insert(result, Short((Int(Extract(x, 0)) * Int(Extract(y, 0))) >> 16), 0);
3590                         result = Insert(result, Short((Int(Extract(x, 1)) * Int(Extract(y, 1))) >> 16), 1);
3591                         result = Insert(result, Short((Int(Extract(x, 2)) * Int(Extract(y, 2))) >> 16), 2);
3592                         result = Insert(result, Short((Int(Extract(x, 3)) * Int(Extract(y, 3))) >> 16), 3);
3593
3594                         return result;
3595                 }
3596                 else
3597                 {
3598                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3599                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::MultiplyHighSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3600                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
3601                         auto pmulhw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3602                         pmulhw->addArg(x.value);
3603                         pmulhw->addArg(y.value);
3604                         ::basicBlock->appendInst(pmulhw);
3605
3606                         return RValue<Short4>(V(result));
3607                 }
3608         }
3609
3610         RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
3611         {
3612                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3613                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::MultiplyAddPairs, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3614                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3615                 auto pmaddwd = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3616                 pmaddwd->addArg(x.value);
3617                 pmaddwd->addArg(y.value);
3618                 ::basicBlock->appendInst(pmaddwd);
3619
3620                 return As<Int2>(V(result));
3621         }
3622
3623         RValue<SByte8> Pack(RValue<Short4> x, RValue<Short4> y)
3624         {
3625                 if(emulateIntrinsics)
3626                 {
3627                         SByte8 result;
3628                         result = Insert(result, Saturate(Extract(x, 0)), 0);
3629                         result = Insert(result, Saturate(Extract(x, 1)), 1);
3630                         result = Insert(result, Saturate(Extract(x, 2)), 2);
3631                         result = Insert(result, Saturate(Extract(x, 3)), 3);
3632                         result = Insert(result, Saturate(Extract(y, 0)), 4);
3633                         result = Insert(result, Saturate(Extract(y, 1)), 5);
3634                         result = Insert(result, Saturate(Extract(y, 2)), 6);
3635                         result = Insert(result, Saturate(Extract(y, 3)), 7);
3636
3637                         return result;
3638                 }
3639                 else
3640                 {
3641                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
3642                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3643                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
3644                         auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3645                         pack->addArg(x.value);
3646                         pack->addArg(y.value);
3647                         ::basicBlock->appendInst(pack);
3648
3649                         return As<SByte8>(Swizzle(As<Int4>(V(result)), 0x88));
3650                 }
3651         }
3652
3653         RValue<Int2> UnpackLow(RValue<Short4> x, RValue<Short4> y)
3654         {
3655                 int shuffle[8] = {0, 8, 1, 9, 2, 10, 3, 11};   // Real type is v8i16
3656                 return As<Int2>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
3657         }
3658
3659         RValue<Int2> UnpackHigh(RValue<Short4> x, RValue<Short4> y)
3660         {
3661                 int shuffle[8] = {0, 8, 1, 9, 2, 10, 3, 11};   // Real type is v8i16
3662                 auto lowHigh = RValue<Short8>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
3663                 return As<Int2>(Swizzle(As<Int4>(lowHigh), 0xEE));
3664         }
3665
3666         RValue<Short4> Swizzle(RValue<Short4> x, unsigned char select)
3667         {
3668                 // Real type is v8i16
3669                 int shuffle[8] =
3670                 {
3671                         (select >> 0) & 0x03,
3672                         (select >> 2) & 0x03,
3673                         (select >> 4) & 0x03,
3674                         (select >> 6) & 0x03,
3675                         (select >> 0) & 0x03,
3676                         (select >> 2) & 0x03,
3677                         (select >> 4) & 0x03,
3678                         (select >> 6) & 0x03,
3679                 };
3680
3681                 return RValue<Short4>(Nucleus::createShuffleVector(x.value, x.value, shuffle));
3682         }
3683
3684         RValue<Short4> Insert(RValue<Short4> val, RValue<Short> element, int i)
3685         {
3686                 return RValue<Short4>(Nucleus::createInsertElement(val.value, element.value, i));
3687         }
3688
3689         RValue<Short> Extract(RValue<Short4> val, int i)
3690         {
3691                 return RValue<Short>(Nucleus::createExtractElement(val.value, Short::getType(), i));
3692         }
3693
3694         RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
3695         {
3696                 return RValue<Short4>(createIntCompare(Ice::InstIcmp::Sgt, x.value, y.value));
3697         }
3698
3699         RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
3700         {
3701                 return RValue<Short4>(Nucleus::createICmpEQ(x.value, y.value));
3702         }
3703
3704         Type *Short4::getType()
3705         {
3706                 return T(Type_v4i16);
3707         }
3708
3709         UShort4::UShort4(RValue<Int4> cast)
3710         {
3711                 *this = Short4(cast);
3712         }
3713
3714         UShort4::UShort4(RValue<Float4> cast, bool saturate)
3715         {
3716                 if(saturate)
3717                 {
3718                         if(CPUID::SSE4_1)
3719                         {
3720                                 Int4 int4(Min(cast, Float4(0xFFFF)));   // packusdw takes care of 0x0000 saturation
3721                                 *this = As<Short4>(Pack(As<UInt4>(int4), As<UInt4>(int4)));
3722                         }
3723                         else
3724                         {
3725                                 *this = Short4(Int4(Max(Min(cast, Float4(0xFFFF)), Float4(0x0000))));
3726                         }
3727                 }
3728                 else
3729                 {
3730                         *this = Short4(Int4(cast));
3731                 }
3732         }
3733
3734         UShort4::UShort4(unsigned short xyzw)
3735         {
3736                 int64_t constantVector[4] = {xyzw, xyzw, xyzw, xyzw};
3737                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3738         }
3739
3740         UShort4::UShort4(unsigned short x, unsigned short y, unsigned short z, unsigned short w)
3741         {
3742                 int64_t constantVector[4] = {x, y, z, w};
3743                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3744         }
3745
3746         UShort4::UShort4(RValue<UShort4> rhs)
3747         {
3748                 storeValue(rhs.value);
3749         }
3750
3751         UShort4::UShort4(const UShort4 &rhs)
3752         {
3753                 Value *value = rhs.loadValue();
3754                 storeValue(value);
3755         }
3756
3757         UShort4::UShort4(const Reference<UShort4> &rhs)
3758         {
3759                 Value *value = rhs.loadValue();
3760                 storeValue(value);
3761         }
3762
3763         UShort4::UShort4(RValue<Short4> rhs)
3764         {
3765                 storeValue(rhs.value);
3766         }
3767
3768         UShort4::UShort4(const Short4 &rhs)
3769         {
3770                 Value *value = rhs.loadValue();
3771                 storeValue(value);
3772         }
3773
3774         UShort4::UShort4(const Reference<Short4> &rhs)
3775         {
3776                 Value *value = rhs.loadValue();
3777                 storeValue(value);
3778         }
3779
3780         RValue<UShort4> UShort4::operator=(RValue<UShort4> rhs)
3781         {
3782                 storeValue(rhs.value);
3783
3784                 return rhs;
3785         }
3786
3787         RValue<UShort4> UShort4::operator=(const UShort4 &rhs)
3788         {
3789                 Value *value = rhs.loadValue();
3790                 storeValue(value);
3791
3792                 return RValue<UShort4>(value);
3793         }
3794
3795         RValue<UShort4> UShort4::operator=(const Reference<UShort4> &rhs)
3796         {
3797                 Value *value = rhs.loadValue();
3798                 storeValue(value);
3799
3800                 return RValue<UShort4>(value);
3801         }
3802
3803         RValue<UShort4> UShort4::operator=(RValue<Short4> rhs)
3804         {
3805                 storeValue(rhs.value);
3806
3807                 return RValue<UShort4>(rhs);
3808         }
3809
3810         RValue<UShort4> UShort4::operator=(const Short4 &rhs)
3811         {
3812                 Value *value = rhs.loadValue();
3813                 storeValue(value);
3814
3815                 return RValue<UShort4>(value);
3816         }
3817
3818         RValue<UShort4> UShort4::operator=(const Reference<Short4> &rhs)
3819         {
3820                 Value *value = rhs.loadValue();
3821                 storeValue(value);
3822
3823                 return RValue<UShort4>(value);
3824         }
3825
3826         RValue<UShort4> operator+(RValue<UShort4> lhs, RValue<UShort4> rhs)
3827         {
3828                 return RValue<UShort4>(Nucleus::createAdd(lhs.value, rhs.value));
3829         }
3830
3831         RValue<UShort4> operator-(RValue<UShort4> lhs, RValue<UShort4> rhs)
3832         {
3833                 return RValue<UShort4>(Nucleus::createSub(lhs.value, rhs.value));
3834         }
3835
3836         RValue<UShort4> operator*(RValue<UShort4> lhs, RValue<UShort4> rhs)
3837         {
3838                 return RValue<UShort4>(Nucleus::createMul(lhs.value, rhs.value));
3839         }
3840
3841         RValue<UShort4> operator&(RValue<UShort4> lhs, RValue<UShort4> rhs)
3842         {
3843                 return RValue<UShort4>(Nucleus::createAnd(lhs.value, rhs.value));
3844         }
3845
3846         RValue<UShort4> operator|(RValue<UShort4> lhs, RValue<UShort4> rhs)
3847         {
3848                 return RValue<UShort4>(Nucleus::createOr(lhs.value, rhs.value));
3849         }
3850
3851         RValue<UShort4> operator^(RValue<UShort4> lhs, RValue<UShort4> rhs)
3852         {
3853                 return RValue<UShort4>(Nucleus::createXor(lhs.value, rhs.value));
3854         }
3855
3856         RValue<UShort> Extract(RValue<UShort4> val, int i)
3857         {
3858                 return RValue<UShort>(Nucleus::createExtractElement(val.value, UShort::getType(), i));
3859         }
3860
3861         RValue<UShort4> Insert(RValue<UShort4> val, RValue<UShort> element, int i)
3862         {
3863                 return RValue<UShort4>(Nucleus::createInsertElement(val.value, element.value, i));
3864         }
3865
3866         RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
3867         {
3868                 if(emulateIntrinsics)
3869                 {
3870                         UShort4 result;
3871                         result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
3872                         result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
3873                         result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
3874                         result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
3875
3876                         return result;
3877                 }
3878                 else
3879                 {
3880                         return RValue<UShort4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
3881                 }
3882         }
3883
3884         RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
3885         {
3886                 if(emulateIntrinsics)
3887                 {
3888                         UShort4 result;
3889                         result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
3890                         result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
3891                         result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
3892                         result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
3893
3894                         return result;
3895                 }
3896                 else
3897                 {
3898                         return RValue<UShort4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
3899                 }
3900         }
3901
3902         RValue<UShort4> operator<<=(UShort4 &lhs, unsigned char rhs)
3903         {
3904                 return lhs = lhs << rhs;
3905         }
3906
3907         RValue<UShort4> operator>>=(UShort4 &lhs, unsigned char rhs)
3908         {
3909                 return lhs = lhs >> rhs;
3910         }
3911
3912         RValue<UShort4> operator~(RValue<UShort4> val)
3913         {
3914                 return RValue<UShort4>(Nucleus::createNot(val.value));
3915         }
3916
3917         RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
3918         {
3919                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
3920                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value, y.value);
3921                 ::basicBlock->appendInst(cmp);
3922
3923                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3924                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3925                 ::basicBlock->appendInst(select);
3926
3927                 return RValue<UShort4>(V(result));
3928         }
3929
3930         RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
3931         {
3932                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
3933                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value, y.value);
3934                 ::basicBlock->appendInst(cmp);
3935
3936                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3937                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3938                 ::basicBlock->appendInst(select);
3939
3940                 return RValue<UShort4>(V(result));
3941         }
3942
3943         RValue<UShort> SaturateUShort(RValue<Int> x)
3944         {
3945                 return UShort(IfThenElse(x > 0xFFFF, Int(0xFFFF), IfThenElse(x < 0, Int(0), x)));
3946         }
3947
3948         RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
3949         {
3950                 if(emulateIntrinsics)
3951                 {
3952                         UShort4 result;
3953                         result = Insert(result, SaturateUShort(Int(Extract(x, 0)) + Int(Extract(y, 0))), 0);
3954                         result = Insert(result, SaturateUShort(Int(Extract(x, 1)) + Int(Extract(y, 1))), 1);
3955                         result = Insert(result, SaturateUShort(Int(Extract(x, 2)) + Int(Extract(y, 2))), 2);
3956                         result = Insert(result, SaturateUShort(Int(Extract(x, 3)) + Int(Extract(y, 3))), 3);
3957
3958                         return result;
3959                 }
3960                 else
3961                 {
3962                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3963                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3964                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
3965                         auto paddusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3966                         paddusw->addArg(x.value);
3967                         paddusw->addArg(y.value);
3968                         ::basicBlock->appendInst(paddusw);
3969
3970                         return RValue<UShort4>(V(result));
3971                 }
3972         }
3973
3974         RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
3975         {
3976                 if(emulateIntrinsics)
3977                 {
3978                         UShort4 result;
3979                         result = Insert(result, SaturateUShort(Int(Extract(x, 0)) - Int(Extract(y, 0))), 0);
3980                         result = Insert(result, SaturateUShort(Int(Extract(x, 1)) - Int(Extract(y, 1))), 1);
3981                         result = Insert(result, SaturateUShort(Int(Extract(x, 2)) - Int(Extract(y, 2))), 2);
3982                         result = Insert(result, SaturateUShort(Int(Extract(x, 3)) - Int(Extract(y, 3))), 3);
3983
3984                         return result;
3985                 }
3986                 else
3987                 {
3988                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3989                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3990                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
3991                         auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3992                         psubusw->addArg(x.value);
3993                         psubusw->addArg(y.value);
3994                         ::basicBlock->appendInst(psubusw);
3995
3996                         return RValue<UShort4>(V(result));
3997                 }
3998         }
3999
4000         RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
4001         {
4002                 if(emulateIntrinsics)
4003                 {
4004                         UShort4 result;
4005                         result = Insert(result, UShort((UInt(Extract(x, 0)) * UInt(Extract(y, 0))) >> 16), 0);
4006                         result = Insert(result, UShort((UInt(Extract(x, 1)) * UInt(Extract(y, 1))) >> 16), 1);
4007                         result = Insert(result, UShort((UInt(Extract(x, 2)) * UInt(Extract(y, 2))) >> 16), 2);
4008                         result = Insert(result, UShort((UInt(Extract(x, 3)) * UInt(Extract(y, 3))) >> 16), 3);
4009
4010                         return result;
4011                 }
4012                 else
4013                 {
4014                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
4015                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::MultiplyHighUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
4016                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
4017                         auto pmulhuw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
4018                         pmulhuw->addArg(x.value);
4019                         pmulhuw->addArg(y.value);
4020                         ::basicBlock->appendInst(pmulhuw);
4021
4022                         return RValue<UShort4>(V(result));
4023                 }
4024         }
4025
4026         RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
4027         {
4028                 assert(false && "UNIMPLEMENTED"); return RValue<UShort4>(V(nullptr));
4029         }
4030
4031         RValue<Byte8> Pack(RValue<UShort4> x, RValue<UShort4> y)
4032         {
4033                 if(emulateIntrinsics)
4034                 {
4035                         Byte8 result;
4036                         result = Insert(result, Saturate(Extract(x, 0)), 0);
4037                         result = Insert(result, Saturate(Extract(x, 1)), 1);
4038                         result = Insert(result, Saturate(Extract(x, 2)), 2);
4039                         result = Insert(result, Saturate(Extract(x, 3)), 3);
4040                         result = Insert(result, Saturate(Extract(y, 0)), 4);
4041                         result = Insert(result, Saturate(Extract(y, 1)), 5);
4042                         result = Insert(result, Saturate(Extract(y, 2)), 6);
4043                         result = Insert(result, Saturate(Extract(y, 3)), 7);
4044
4045                         return result;
4046                 }
4047                 else
4048                 {
4049                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
4050                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
4051                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
4052                         auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
4053                         pack->addArg(x.value);
4054                         pack->addArg(y.value);
4055                         ::basicBlock->appendInst(pack);
4056
4057                         return As<Byte8>(Swizzle(As<Int4>(V(result)), 0x88));
4058                 }
4059         }
4060
4061         Type *UShort4::getType()
4062         {
4063                 return T(Type_v4i16);
4064         }
4065
4066         Short8::Short8(short c)
4067         {
4068                 int64_t constantVector[8] = {c, c, c, c, c, c, c, c};
4069                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
4070         }
4071
4072         Short8::Short8(short c0, short c1, short c2, short c3, short c4, short c5, short c6, short c7)
4073         {
4074                 int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7};
4075                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
4076         }
4077
4078         Short8::Short8(RValue<Short8> rhs)
4079         {
4080                 storeValue(rhs.value);
4081         }
4082
4083         Short8::Short8(const Reference<Short8> &rhs)
4084         {
4085                 Value *value = rhs.loadValue();
4086                 storeValue(value);
4087         }
4088
4089         Short8::Short8(RValue<Short4> lo, RValue<Short4> hi)
4090         {
4091                 int shuffle[8] = {0, 1, 2, 3, 8, 9, 10, 11};   // Real type is v8i16
4092                 Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
4093
4094                 storeValue(packed);
4095         }
4096
4097         RValue<Short8> operator+(RValue<Short8> lhs, RValue<Short8> rhs)
4098         {
4099                 return RValue<Short8>(Nucleus::createAdd(lhs.value, rhs.value));
4100         }
4101
4102         RValue<Short8> operator&(RValue<Short8> lhs, RValue<Short8> rhs)
4103         {
4104                 return RValue<Short8>(Nucleus::createAnd(lhs.value, rhs.value));
4105         }
4106
4107         RValue<Short> Extract(RValue<Short8> val, int i)
4108         {
4109                 return RValue<Short>(Nucleus::createExtractElement(val.value, Short::getType(), i));
4110         }
4111
4112         RValue<Short8> Insert(RValue<Short8> val, RValue<Short> element, int i)
4113         {
4114                 return RValue<Short8>(Nucleus::createInsertElement(val.value, element.value, i));
4115         }
4116
4117         RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
4118         {
4119                 if(emulateIntrinsics)
4120                 {
4121                         Short8 result;
4122                         result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
4123                         result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
4124                         result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
4125                         result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
4126                         result = Insert(result, Extract(lhs, 4) << Short(rhs), 4);
4127                         result = Insert(result, Extract(lhs, 5) << Short(rhs), 5);
4128                         result = Insert(result, Extract(lhs, 6) << Short(rhs), 6);
4129                         result = Insert(result, Extract(lhs, 7) << Short(rhs), 7);
4130
4131                         return result;
4132                 }
4133                 else
4134                 {
4135                         return RValue<Short8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
4136                 }
4137         }
4138
4139         RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
4140         {
4141                 if(emulateIntrinsics)
4142                 {
4143                         Short8 result;
4144                         result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
4145                         result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
4146                         result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
4147                         result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
4148                         result = Insert(result, Extract(lhs, 4) >> Short(rhs), 4);
4149                         result = Insert(result, Extract(lhs, 5) >> Short(rhs), 5);
4150                         result = Insert(result, Extract(lhs, 6) >> Short(rhs), 6);
4151                         result = Insert(result, Extract(lhs, 7) >> Short(rhs), 7);
4152
4153                         return result;
4154                 }
4155                 else
4156                 {
4157                         return RValue<Short8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
4158                 }
4159         }
4160
4161         RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
4162         {
4163                 assert(false && "UNIMPLEMENTED"); return RValue<Int4>(V(nullptr));
4164         }
4165
4166         RValue<Int4> Abs(RValue<Int4> x)
4167         {
4168                 auto negative = x >> 31;
4169                 return (x ^ negative) - negative;
4170         }
4171
4172         RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
4173         {
4174                 assert(false && "UNIMPLEMENTED"); return RValue<Short8>(V(nullptr));
4175         }
4176
4177         Type *Short8::getType()
4178         {
4179                 return T(Ice::IceType_v8i16);
4180         }
4181
4182         UShort8::UShort8(unsigned short c)
4183         {
4184                 int64_t constantVector[8] = {c, c, c, c, c, c, c, c};
4185                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
4186         }
4187
4188         UShort8::UShort8(unsigned short c0, unsigned short c1, unsigned short c2, unsigned short c3, unsigned short c4, unsigned short c5, unsigned short c6, unsigned short c7)
4189         {
4190                 int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7};
4191                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
4192         }
4193
4194         UShort8::UShort8(RValue<UShort8> rhs)
4195         {
4196                 storeValue(rhs.value);
4197         }
4198
4199         UShort8::UShort8(const Reference<UShort8> &rhs)
4200         {
4201                 Value *value = rhs.loadValue();
4202                 storeValue(value);
4203         }
4204
4205         UShort8::UShort8(RValue<UShort4> lo, RValue<UShort4> hi)
4206         {
4207                 int shuffle[8] = {0, 1, 2, 3, 8, 9, 10, 11};   // Real type is v8i16
4208                 Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
4209
4210                 storeValue(packed);
4211         }
4212
4213         RValue<UShort8> UShort8::operator=(RValue<UShort8> rhs)
4214         {
4215                 storeValue(rhs.value);
4216
4217                 return rhs;
4218         }
4219
4220         RValue<UShort8> UShort8::operator=(const UShort8 &rhs)
4221         {
4222                 Value *value = rhs.loadValue();
4223                 storeValue(value);
4224
4225                 return RValue<UShort8>(value);
4226         }
4227
4228         RValue<UShort8> UShort8::operator=(const Reference<UShort8> &rhs)
4229         {
4230                 Value *value = rhs.loadValue();
4231                 storeValue(value);
4232
4233                 return RValue<UShort8>(value);
4234         }
4235
4236         RValue<UShort8> operator&(RValue<UShort8> lhs, RValue<UShort8> rhs)
4237         {
4238                 return RValue<UShort8>(Nucleus::createAnd(lhs.value, rhs.value));
4239         }
4240
4241         RValue<UShort> Extract(RValue<UShort8> val, int i)
4242         {
4243                 return RValue<UShort>(Nucleus::createExtractElement(val.value, UShort::getType(), i));
4244         }
4245
4246         RValue<UShort8> Insert(RValue<UShort8> val, RValue<UShort> element, int i)
4247         {
4248                 return RValue<UShort8>(Nucleus::createInsertElement(val.value, element.value, i));
4249         }
4250
4251         RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
4252         {
4253                 if(emulateIntrinsics)
4254                 {
4255                         UShort8 result;
4256                         result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
4257                         result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
4258                         result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
4259                         result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
4260                         result = Insert(result, Extract(lhs, 4) << UShort(rhs), 4);
4261                         result = Insert(result, Extract(lhs, 5) << UShort(rhs), 5);
4262                         result = Insert(result, Extract(lhs, 6) << UShort(rhs), 6);
4263                         result = Insert(result, Extract(lhs, 7) << UShort(rhs), 7);
4264
4265                         return result;
4266                 }
4267                 else
4268                 {
4269                         return RValue<UShort8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
4270                 }
4271         }
4272
4273         RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
4274         {
4275                 if(emulateIntrinsics)
4276                 {
4277                         UShort8 result;
4278                         result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
4279                         result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
4280                         result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
4281                         result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
4282                         result = Insert(result, Extract(lhs, 4) >> UShort(rhs), 4);
4283                         result = Insert(result, Extract(lhs, 5) >> UShort(rhs), 5);
4284                         result = Insert(result, Extract(lhs, 6) >> UShort(rhs), 6);
4285                         result = Insert(result, Extract(lhs, 7) >> UShort(rhs), 7);
4286
4287                         return result;
4288                 }
4289                 else
4290                 {
4291                         return RValue<UShort8>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
4292                 }
4293         }
4294
4295         RValue<UShort8> operator+(RValue<UShort8> lhs, RValue<UShort8> rhs)
4296         {
4297                 return RValue<UShort8>(Nucleus::createAdd(lhs.value, rhs.value));
4298         }
4299
4300         RValue<UShort8> operator*(RValue<UShort8> lhs, RValue<UShort8> rhs)
4301         {
4302                 return RValue<UShort8>(Nucleus::createMul(lhs.value, rhs.value));
4303         }
4304
4305         RValue<UShort8> operator+=(UShort8 &lhs, RValue<UShort8> rhs)
4306         {
4307                 return lhs = lhs + rhs;
4308         }
4309
4310         RValue<UShort8> operator~(RValue<UShort8> val)
4311         {
4312                 return RValue<UShort8>(Nucleus::createNot(val.value));
4313         }
4314
4315         RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7)
4316         {
4317                 assert(false && "UNIMPLEMENTED"); return RValue<UShort8>(V(nullptr));
4318         }
4319
4320         RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
4321         {
4322                 assert(false && "UNIMPLEMENTED"); return RValue<UShort8>(V(nullptr));
4323         }
4324
4325         // FIXME: Implement as Shuffle(x, y, Select(i0, ..., i16)) and Shuffle(x, y, SELECT_PACK_REPEAT(element))
4326 //      RValue<UShort8> PackRepeat(RValue<Byte16> x, RValue<Byte16> y, int element)
4327 //      {
4328 //              assert(false && "UNIMPLEMENTED"); return RValue<UShort8>(V(nullptr));
4329 //      }
4330
4331         Type *UShort8::getType()
4332         {
4333                 return T(Ice::IceType_v8i16);
4334         }
4335
4336         Int::Int(Argument<Int> argument)
4337         {
4338                 storeValue(argument.value);
4339         }
4340
4341         Int::Int(RValue<Byte> cast)
4342         {
4343                 Value *integer = Nucleus::createZExt(cast.value, Int::getType());
4344
4345                 storeValue(integer);
4346         }
4347
4348         Int::Int(RValue<SByte> cast)
4349         {
4350                 Value *integer = Nucleus::createSExt(cast.value, Int::getType());
4351
4352                 storeValue(integer);
4353         }
4354
4355         Int::Int(RValue<Short> cast)
4356         {
4357                 Value *integer = Nucleus::createSExt(cast.value, Int::getType());
4358
4359                 storeValue(integer);
4360         }
4361
4362         Int::Int(RValue<UShort> cast)
4363         {
4364                 Value *integer = Nucleus::createZExt(cast.value, Int::getType());
4365
4366                 storeValue(integer);
4367         }
4368
4369         Int::Int(RValue<Int2> cast)
4370         {
4371                 *this = Extract(cast, 0);
4372         }
4373
4374         Int::Int(RValue<Long> cast)
4375         {
4376                 Value *integer = Nucleus::createTrunc(cast.value, Int::getType());
4377
4378                 storeValue(integer);
4379         }
4380
4381         Int::Int(RValue<Float> cast)
4382         {
4383                 Value *integer = Nucleus::createFPToSI(cast.value, Int::getType());
4384
4385                 storeValue(integer);
4386         }
4387
4388         Int::Int(int x)
4389         {
4390                 storeValue(Nucleus::createConstantInt(x));
4391         }
4392
4393         Int::Int(RValue<Int> rhs)
4394         {
4395                 storeValue(rhs.value);
4396         }
4397
4398         Int::Int(RValue<UInt> rhs)
4399         {
4400                 storeValue(rhs.value);
4401         }
4402
4403         Int::Int(const Int &rhs)
4404         {
4405                 Value *value = rhs.loadValue();
4406                 storeValue(value);
4407         }
4408
4409         Int::Int(const Reference<Int> &rhs)
4410         {
4411                 Value *value = rhs.loadValue();
4412                 storeValue(value);
4413         }
4414
4415         Int::Int(const UInt &rhs)
4416         {
4417                 Value *value = rhs.loadValue();
4418                 storeValue(value);
4419         }
4420
4421         Int::Int(const Reference<UInt> &rhs)
4422         {
4423                 Value *value = rhs.loadValue();
4424                 storeValue(value);
4425         }
4426
4427         RValue<Int> Int::operator=(int rhs)
4428         {
4429                 return RValue<Int>(storeValue(Nucleus::createConstantInt(rhs)));
4430         }
4431
4432         RValue<Int> Int::operator=(RValue<Int> rhs)
4433         {
4434                 storeValue(rhs.value);
4435
4436                 return rhs;
4437         }
4438
4439         RValue<Int> Int::operator=(RValue<UInt> rhs)
4440         {
4441                 storeValue(rhs.value);
4442
4443                 return RValue<Int>(rhs);
4444         }
4445
4446         RValue<Int> Int::operator=(const Int &rhs)
4447         {
4448                 Value *value = rhs.loadValue();
4449                 storeValue(value);
4450
4451                 return RValue<Int>(value);
4452         }
4453
4454         RValue<Int> Int::operator=(const Reference<Int> &rhs)
4455         {
4456                 Value *value = rhs.loadValue();
4457                 storeValue(value);
4458
4459                 return RValue<Int>(value);
4460         }
4461
4462         RValue<Int> Int::operator=(const UInt &rhs)
4463         {
4464                 Value *value = rhs.loadValue();
4465                 storeValue(value);
4466
4467                 return RValue<Int>(value);
4468         }
4469
4470         RValue<Int> Int::operator=(const Reference<UInt> &rhs)
4471         {
4472                 Value *value = rhs.loadValue();
4473                 storeValue(value);
4474
4475                 return RValue<Int>(value);
4476         }
4477
4478         RValue<Int> operator+(RValue<Int> lhs, RValue<Int> rhs)
4479         {
4480                 return RValue<Int>(Nucleus::createAdd(lhs.value, rhs.value));
4481         }
4482
4483         RValue<Int> operator-(RValue<Int> lhs, RValue<Int> rhs)
4484         {
4485                 return RValue<Int>(Nucleus::createSub(lhs.value, rhs.value));
4486         }
4487
4488         RValue<Int> operator*(RValue<Int> lhs, RValue<Int> rhs)
4489         {
4490                 return RValue<Int>(Nucleus::createMul(lhs.value, rhs.value));
4491         }
4492
4493         RValue<Int> operator/(RValue<Int> lhs, RValue<Int> rhs)
4494         {
4495                 return RValue<Int>(Nucleus::createSDiv(lhs.value, rhs.value));
4496         }
4497
4498         RValue<Int> operator%(RValue<Int> lhs, RValue<Int> rhs)
4499         {
4500                 return RValue<Int>(Nucleus::createSRem(lhs.value, rhs.value));
4501         }
4502
4503         RValue<Int> operator&(RValue<Int> lhs, RValue<Int> rhs)
4504         {
4505                 return RValue<Int>(Nucleus::createAnd(lhs.value, rhs.value));
4506         }
4507
4508         RValue<Int> operator|(RValue<Int> lhs, RValue<Int> rhs)
4509         {
4510                 return RValue<Int>(Nucleus::createOr(lhs.value, rhs.value));
4511         }
4512
4513         RValue<Int> operator^(RValue<Int> lhs, RValue<Int> rhs)
4514         {
4515                 return RValue<Int>(Nucleus::createXor(lhs.value, rhs.value));
4516         }
4517
4518         RValue<Int> operator<<(RValue<Int> lhs, RValue<Int> rhs)
4519         {
4520                 return RValue<Int>(Nucleus::createShl(lhs.value, rhs.value));
4521         }
4522
4523         RValue<Int> operator>>(RValue<Int> lhs, RValue<Int> rhs)
4524         {
4525                 return RValue<Int>(Nucleus::createAShr(lhs.value, rhs.value));
4526         }
4527
4528         RValue<Int> operator+=(Int &lhs, RValue<Int> rhs)
4529         {
4530                 return lhs = lhs + rhs;
4531         }
4532
4533         RValue<Int> operator-=(Int &lhs, RValue<Int> rhs)
4534         {
4535                 return lhs = lhs - rhs;
4536         }
4537
4538         RValue<Int> operator*=(Int &lhs, RValue<Int> rhs)
4539         {
4540                 return lhs = lhs * rhs;
4541         }
4542
4543         RValue<Int> operator/=(Int &lhs, RValue<Int> rhs)
4544         {
4545                 return lhs = lhs / rhs;
4546         }
4547
4548         RValue<Int> operator%=(Int &lhs, RValue<Int> rhs)
4549         {
4550                 return lhs = lhs % rhs;
4551         }
4552
4553         RValue<Int> operator&=(Int &lhs, RValue<Int> rhs)
4554         {
4555                 return lhs = lhs & rhs;
4556         }
4557
4558         RValue<Int> operator|=(Int &lhs, RValue<Int> rhs)
4559         {
4560                 return lhs = lhs | rhs;
4561         }
4562
4563         RValue<Int> operator^=(Int &lhs, RValue<Int> rhs)
4564         {
4565                 return lhs = lhs ^ rhs;
4566         }
4567
4568         RValue<Int> operator<<=(Int &lhs, RValue<Int> rhs)
4569         {
4570                 return lhs = lhs << rhs;
4571         }
4572
4573         RValue<Int> operator>>=(Int &lhs, RValue<Int> rhs)
4574         {
4575                 return lhs = lhs >> rhs;
4576         }
4577
4578         RValue<Int> operator+(RValue<Int> val)
4579         {
4580                 return val;
4581         }
4582
4583         RValue<Int> operator-(RValue<Int> val)
4584         {
4585                 return RValue<Int>(Nucleus::createNeg(val.value));
4586         }
4587
4588         RValue<Int> operator~(RValue<Int> val)
4589         {
4590                 return RValue<Int>(Nucleus::createNot(val.value));
4591         }
4592
4593         RValue<Int> operator++(Int &val, int)   // Post-increment
4594         {
4595                 RValue<Int> res = val;
4596                 val += 1;
4597                 return res;
4598         }
4599
4600         const Int &operator++(Int &val)   // Pre-increment
4601         {
4602                 val += 1;
4603                 return val;
4604         }
4605
4606         RValue<Int> operator--(Int &val, int)   // Post-decrement
4607         {
4608                 RValue<Int> res = val;
4609                 val -= 1;
4610                 return res;
4611         }
4612
4613         const Int &operator--(Int &val)   // Pre-decrement
4614         {
4615                 val -= 1;
4616                 return val;
4617         }
4618
4619         RValue<Bool> operator<(RValue<Int> lhs, RValue<Int> rhs)
4620         {
4621                 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
4622         }
4623
4624         RValue<Bool> operator<=(RValue<Int> lhs, RValue<Int> rhs)
4625         {
4626                 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
4627         }
4628
4629         RValue<Bool> operator>(RValue<Int> lhs, RValue<Int> rhs)
4630         {
4631                 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
4632         }
4633
4634         RValue<Bool> operator>=(RValue<Int> lhs, RValue<Int> rhs)
4635         {
4636                 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
4637         }
4638
4639         RValue<Bool> operator!=(RValue<Int> lhs, RValue<Int> rhs)
4640         {
4641                 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
4642         }
4643
4644         RValue<Bool> operator==(RValue<Int> lhs, RValue<Int> rhs)
4645         {
4646                 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
4647         }
4648
4649         RValue<Int> Max(RValue<Int> x, RValue<Int> y)
4650         {
4651                 return IfThenElse(x > y, x, y);
4652         }
4653
4654         RValue<Int> Min(RValue<Int> x, RValue<Int> y)
4655         {
4656                 return IfThenElse(x < y, x, y);
4657         }
4658
4659         RValue<Int> Clamp(RValue<Int> x, RValue<Int> min, RValue<Int> max)
4660         {
4661                 return Min(Max(x, min), max);
4662         }
4663
4664         RValue<Int> RoundInt(RValue<Float> cast)
4665         {
4666                 if(emulateIntrinsics)
4667                 {
4668                         // Push the fractional part off the mantissa. Accurate up to +/-2^22.
4669                         return Int((cast + Float(0x00C00000)) - Float(0x00C00000));
4670                 }
4671                 else
4672                 {
4673                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
4674                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
4675                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
4676                         auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
4677                         nearbyint->addArg(cast.value);
4678                         ::basicBlock->appendInst(nearbyint);
4679
4680                         return RValue<Int>(V(result));
4681                 }
4682         }
4683
4684         Type *Int::getType()
4685         {
4686                 return T(Ice::IceType_i32);
4687         }
4688
4689         Long::Long(RValue<Int> cast)
4690         {
4691                 Value *integer = Nucleus::createSExt(cast.value, Long::getType());
4692
4693                 storeValue(integer);
4694         }
4695
4696         Long::Long(RValue<UInt> cast)
4697         {
4698                 Value *integer = Nucleus::createZExt(cast.value, Long::getType());
4699
4700                 storeValue(integer);
4701         }
4702
4703         Long::Long(RValue<Long> rhs)
4704         {
4705                 storeValue(rhs.value);
4706         }
4707
4708         RValue<Long> Long::operator=(int64_t rhs)
4709         {
4710                 return RValue<Long>(storeValue(Nucleus::createConstantLong(rhs)));
4711         }
4712
4713         RValue<Long> Long::operator=(RValue<Long> rhs)
4714         {
4715                 storeValue(rhs.value);
4716
4717                 return rhs;
4718         }
4719
4720         RValue<Long> Long::operator=(const Long &rhs)
4721         {
4722                 Value *value = rhs.loadValue();
4723                 storeValue(value);
4724
4725                 return RValue<Long>(value);
4726         }
4727
4728         RValue<Long> Long::operator=(const Reference<Long> &rhs)
4729         {
4730                 Value *value = rhs.loadValue();
4731                 storeValue(value);
4732
4733                 return RValue<Long>(value);
4734         }
4735
4736         RValue<Long> operator+(RValue<Long> lhs, RValue<Long> rhs)
4737         {
4738                 return RValue<Long>(Nucleus::createAdd(lhs.value, rhs.value));
4739         }
4740
4741         RValue<Long> operator-(RValue<Long> lhs, RValue<Long> rhs)
4742         {
4743                 return RValue<Long>(Nucleus::createSub(lhs.value, rhs.value));
4744         }
4745
4746         RValue<Long> operator+=(Long &lhs, RValue<Long> rhs)
4747         {
4748                 return lhs = lhs + rhs;
4749         }
4750
4751         RValue<Long> operator-=(Long &lhs, RValue<Long> rhs)
4752         {
4753                 return lhs = lhs - rhs;
4754         }
4755
4756         RValue<Long> AddAtomic(RValue<Pointer<Long> > x, RValue<Long> y)
4757         {
4758                 return RValue<Long>(Nucleus::createAtomicAdd(x.value, y.value));
4759         }
4760
4761         Type *Long::getType()
4762         {
4763                 return T(Ice::IceType_i64);
4764         }
4765
4766         UInt::UInt(Argument<UInt> argument)
4767         {
4768                 storeValue(argument.value);
4769         }
4770
4771         UInt::UInt(RValue<UShort> cast)
4772         {
4773                 Value *integer = Nucleus::createZExt(cast.value, UInt::getType());
4774
4775                 storeValue(integer);
4776         }
4777
4778         UInt::UInt(RValue<Long> cast)
4779         {
4780                 Value *integer = Nucleus::createTrunc(cast.value, UInt::getType());
4781
4782                 storeValue(integer);
4783         }
4784
4785         UInt::UInt(RValue<Float> cast)
4786         {
4787                 // Smallest positive value representable in UInt, but not in Int
4788                 const unsigned int ustart = 0x80000000u;
4789                 const float ustartf = float(ustart);
4790
4791                 // If the value is negative, store 0, otherwise store the result of the conversion
4792                 storeValue((~(As<Int>(cast) >> 31) &
4793                 // Check if the value can be represented as an Int
4794                         IfThenElse(cast >= ustartf,
4795                 // If the value is too large, subtract ustart and re-add it after conversion.
4796                                 As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)),
4797                 // Otherwise, just convert normally
4798                                 Int(cast))).value);
4799         }
4800
4801         UInt::UInt(int x)
4802         {
4803                 storeValue(Nucleus::createConstantInt(x));
4804         }
4805
4806         UInt::UInt(unsigned int x)
4807         {
4808                 storeValue(Nucleus::createConstantInt(x));
4809         }
4810
4811         UInt::UInt(RValue<UInt> rhs)
4812         {
4813                 storeValue(rhs.value);
4814         }
4815
4816         UInt::UInt(RValue<Int> rhs)
4817         {
4818                 storeValue(rhs.value);
4819         }
4820
4821         UInt::UInt(const UInt &rhs)
4822         {
4823                 Value *value = rhs.loadValue();
4824                 storeValue(value);
4825         }
4826
4827         UInt::UInt(const Reference<UInt> &rhs)
4828         {
4829                 Value *value = rhs.loadValue();
4830                 storeValue(value);
4831         }
4832
4833         UInt::UInt(const Int &rhs)
4834         {
4835                 Value *value = rhs.loadValue();
4836                 storeValue(value);
4837         }
4838
4839         UInt::UInt(const Reference<Int> &rhs)
4840         {
4841                 Value *value = rhs.loadValue();
4842                 storeValue(value);
4843         }
4844
4845         RValue<UInt> UInt::operator=(unsigned int rhs)
4846         {
4847                 return RValue<UInt>(storeValue(Nucleus::createConstantInt(rhs)));
4848         }
4849
4850         RValue<UInt> UInt::operator=(RValue<UInt> rhs)
4851         {
4852                 storeValue(rhs.value);
4853
4854                 return rhs;
4855         }
4856
4857         RValue<UInt> UInt::operator=(RValue<Int> rhs)
4858         {
4859                 storeValue(rhs.value);
4860
4861                 return RValue<UInt>(rhs);
4862         }
4863
4864         RValue<UInt> UInt::operator=(const UInt &rhs)
4865         {
4866                 Value *value = rhs.loadValue();
4867                 storeValue(value);
4868
4869                 return RValue<UInt>(value);
4870         }
4871
4872         RValue<UInt> UInt::operator=(const Reference<UInt> &rhs)
4873         {
4874                 Value *value = rhs.loadValue();
4875                 storeValue(value);
4876
4877                 return RValue<UInt>(value);
4878         }
4879
4880         RValue<UInt> UInt::operator=(const Int &rhs)
4881         {
4882                 Value *value = rhs.loadValue();
4883                 storeValue(value);
4884
4885                 return RValue<UInt>(value);
4886         }
4887
4888         RValue<UInt> UInt::operator=(const Reference<Int> &rhs)
4889         {
4890                 Value *value = rhs.loadValue();
4891                 storeValue(value);
4892
4893                 return RValue<UInt>(value);
4894         }
4895
4896         RValue<UInt> operator+(RValue<UInt> lhs, RValue<UInt> rhs)
4897         {
4898                 return RValue<UInt>(Nucleus::createAdd(lhs.value, rhs.value));
4899         }
4900
4901         RValue<UInt> operator-(RValue<UInt> lhs, RValue<UInt> rhs)
4902         {
4903                 return RValue<UInt>(Nucleus::createSub(lhs.value, rhs.value));
4904         }
4905
4906         RValue<UInt> operator*(RValue<UInt> lhs, RValue<UInt> rhs)
4907         {
4908                 return RValue<UInt>(Nucleus::createMul(lhs.value, rhs.value));
4909         }
4910
4911         RValue<UInt> operator/(RValue<UInt> lhs, RValue<UInt> rhs)
4912         {
4913                 return RValue<UInt>(Nucleus::createUDiv(lhs.value, rhs.value));
4914         }
4915
4916         RValue<UInt> operator%(RValue<UInt> lhs, RValue<UInt> rhs)
4917         {
4918                 return RValue<UInt>(Nucleus::createURem(lhs.value, rhs.value));
4919         }
4920
4921         RValue<UInt> operator&(RValue<UInt> lhs, RValue<UInt> rhs)
4922         {
4923                 return RValue<UInt>(Nucleus::createAnd(lhs.value, rhs.value));
4924         }
4925
4926         RValue<UInt> operator|(RValue<UInt> lhs, RValue<UInt> rhs)
4927         {
4928                 return RValue<UInt>(Nucleus::createOr(lhs.value, rhs.value));
4929         }
4930
4931         RValue<UInt> operator^(RValue<UInt> lhs, RValue<UInt> rhs)
4932         {
4933                 return RValue<UInt>(Nucleus::createXor(lhs.value, rhs.value));
4934         }
4935
4936         RValue<UInt> operator<<(RValue<UInt> lhs, RValue<UInt> rhs)
4937         {
4938                 return RValue<UInt>(Nucleus::createShl(lhs.value, rhs.value));
4939         }
4940
4941         RValue<UInt> operator>>(RValue<UInt> lhs, RValue<UInt> rhs)
4942         {
4943                 return RValue<UInt>(Nucleus::createLShr(lhs.value, rhs.value));
4944         }
4945
4946         RValue<UInt> operator+=(UInt &lhs, RValue<UInt> rhs)
4947         {
4948                 return lhs = lhs + rhs;
4949         }
4950
4951         RValue<UInt> operator-=(UInt &lhs, RValue<UInt> rhs)
4952         {
4953                 return lhs = lhs - rhs;
4954         }
4955
4956         RValue<UInt> operator*=(UInt &lhs, RValue<UInt> rhs)
4957         {
4958                 return lhs = lhs * rhs;
4959         }
4960
4961         RValue<UInt> operator/=(UInt &lhs, RValue<UInt> rhs)
4962         {
4963                 return lhs = lhs / rhs;
4964         }
4965
4966         RValue<UInt> operator%=(UInt &lhs, RValue<UInt> rhs)
4967         {
4968                 return lhs = lhs % rhs;
4969         }
4970
4971         RValue<UInt> operator&=(UInt &lhs, RValue<UInt> rhs)
4972         {
4973                 return lhs = lhs & rhs;
4974         }
4975
4976         RValue<UInt> operator|=(UInt &lhs, RValue<UInt> rhs)
4977         {
4978                 return lhs = lhs | rhs;
4979         }
4980
4981         RValue<UInt> operator^=(UInt &lhs, RValue<UInt> rhs)
4982         {
4983                 return lhs = lhs ^ rhs;
4984         }
4985
4986         RValue<UInt> operator<<=(UInt &lhs, RValue<UInt> rhs)
4987         {
4988                 return lhs = lhs << rhs;
4989         }
4990
4991         RValue<UInt> operator>>=(UInt &lhs, RValue<UInt> rhs)
4992         {
4993                 return lhs = lhs >> rhs;
4994         }
4995
4996         RValue<UInt> operator+(RValue<UInt> val)
4997         {
4998                 return val;
4999         }
5000
5001         RValue<UInt> operator-(RValue<UInt> val)
5002         {
5003                 return RValue<UInt>(Nucleus::createNeg(val.value));
5004         }
5005
5006         RValue<UInt> operator~(RValue<UInt> val)
5007         {
5008                 return RValue<UInt>(Nucleus::createNot(val.value));
5009         }
5010
5011         RValue<UInt> operator++(UInt &val, int)   // Post-increment
5012         {
5013                 RValue<UInt> res = val;
5014                 val += 1;
5015                 return res;
5016         }
5017
5018         const UInt &operator++(UInt &val)   // Pre-increment
5019         {
5020                 val += 1;
5021                 return val;
5022         }
5023
5024         RValue<UInt> operator--(UInt &val, int)   // Post-decrement
5025         {
5026                 RValue<UInt> res = val;
5027                 val -= 1;
5028                 return res;
5029         }
5030
5031         const UInt &operator--(UInt &val)   // Pre-decrement
5032         {
5033                 val -= 1;
5034                 return val;
5035         }
5036
5037         RValue<UInt> Max(RValue<UInt> x, RValue<UInt> y)
5038         {
5039                 return IfThenElse(x > y, x, y);
5040         }
5041
5042         RValue<UInt> Min(RValue<UInt> x, RValue<UInt> y)
5043         {
5044                 return IfThenElse(x < y, x, y);
5045         }
5046
5047         RValue<UInt> Clamp(RValue<UInt> x, RValue<UInt> min, RValue<UInt> max)
5048         {
5049                 return Min(Max(x, min), max);
5050         }
5051
5052         RValue<Bool> operator<(RValue<UInt> lhs, RValue<UInt> rhs)
5053         {
5054                 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
5055         }
5056
5057         RValue<Bool> operator<=(RValue<UInt> lhs, RValue<UInt> rhs)
5058         {
5059                 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
5060         }
5061
5062         RValue<Bool> operator>(RValue<UInt> lhs, RValue<UInt> rhs)
5063         {
5064                 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
5065         }
5066
5067         RValue<Bool> operator>=(RValue<UInt> lhs, RValue<UInt> rhs)
5068         {
5069                 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
5070         }
5071
5072         RValue<Bool> operator!=(RValue<UInt> lhs, RValue<UInt> rhs)
5073         {
5074                 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
5075         }
5076
5077         RValue<Bool> operator==(RValue<UInt> lhs, RValue<UInt> rhs)
5078         {
5079                 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
5080         }
5081
5082 //      RValue<UInt> RoundUInt(RValue<Float> cast)
5083 //      {
5084 //              assert(false && "UNIMPLEMENTED"); return RValue<UInt>(V(nullptr));
5085 //      }
5086
5087         Type *UInt::getType()
5088         {
5089                 return T(Ice::IceType_i32);
5090         }
5091
5092 //      Int2::Int2(RValue<Int> cast)
5093 //      {
5094 //              Value *extend = Nucleus::createZExt(cast.value, Long::getType());
5095 //              Value *vector = Nucleus::createBitCast(extend, Int2::getType());
5096 //
5097 //              Constant *shuffle[2];
5098 //              shuffle[0] = Nucleus::createConstantInt(0);
5099 //              shuffle[1] = Nucleus::createConstantInt(0);
5100 //
5101 //              Value *replicate = Nucleus::createShuffleVector(vector, UndefValue::get(Int2::getType()), Nucleus::createConstantVector(shuffle, 2));
5102 //
5103 //              storeValue(replicate);
5104 //      }
5105
5106         Int2::Int2(RValue<Int4> cast)
5107         {
5108                 storeValue(Nucleus::createBitCast(cast.value, getType()));
5109         }
5110
5111         Int2::Int2(int x, int y)
5112         {
5113                 int64_t constantVector[2] = {x, y};
5114                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
5115         }
5116
5117         Int2::Int2(RValue<Int2> rhs)
5118         {
5119                 storeValue(rhs.value);
5120         }
5121
5122         Int2::Int2(const Int2 &rhs)
5123         {
5124                 Value *value = rhs.loadValue();
5125                 storeValue(value);
5126         }
5127
5128         Int2::Int2(const Reference<Int2> &rhs)
5129         {
5130                 Value *value = rhs.loadValue();
5131                 storeValue(value);
5132         }
5133
5134         Int2::Int2(RValue<Int> lo, RValue<Int> hi)
5135         {
5136                 int shuffle[4] = {0, 4, 1, 5};
5137                 Value *packed = Nucleus::createShuffleVector(Int4(lo).loadValue(), Int4(hi).loadValue(), shuffle);
5138
5139                 storeValue(Nucleus::createBitCast(packed, Int2::getType()));
5140         }
5141
5142         RValue<Int2> Int2::operator=(RValue<Int2> rhs)
5143         {
5144                 storeValue(rhs.value);
5145
5146                 return rhs;
5147         }
5148
5149         RValue<Int2> Int2::operator=(const Int2 &rhs)
5150         {
5151                 Value *value = rhs.loadValue();
5152                 storeValue(value);
5153
5154                 return RValue<Int2>(value);
5155         }
5156
5157         RValue<Int2> Int2::operator=(const Reference<Int2> &rhs)
5158         {
5159                 Value *value = rhs.loadValue();
5160                 storeValue(value);
5161
5162                 return RValue<Int2>(value);
5163         }
5164
5165         RValue<Int2> operator+(RValue<Int2> lhs, RValue<Int2> rhs)
5166         {
5167                 return RValue<Int2>(Nucleus::createAdd(lhs.value, rhs.value));
5168         }
5169
5170         RValue<Int2> operator-(RValue<Int2> lhs, RValue<Int2> rhs)
5171         {
5172                 return RValue<Int2>(Nucleus::createSub(lhs.value, rhs.value));
5173         }
5174
5175 //      RValue<Int2> operator*(RValue<Int2> lhs, RValue<Int2> rhs)
5176 //      {
5177 //              return RValue<Int2>(Nucleus::createMul(lhs.value, rhs.value));
5178 //      }
5179
5180 //      RValue<Int2> operator/(RValue<Int2> lhs, RValue<Int2> rhs)
5181 //      {
5182 //              return RValue<Int2>(Nucleus::createSDiv(lhs.value, rhs.value));
5183 //      }
5184
5185 //      RValue<Int2> operator%(RValue<Int2> lhs, RValue<Int2> rhs)
5186 //      {
5187 //              return RValue<Int2>(Nucleus::createSRem(lhs.value, rhs.value));
5188 //      }
5189
5190         RValue<Int2> operator&(RValue<Int2> lhs, RValue<Int2> rhs)
5191         {
5192                 return RValue<Int2>(Nucleus::createAnd(lhs.value, rhs.value));
5193         }
5194
5195         RValue<Int2> operator|(RValue<Int2> lhs, RValue<Int2> rhs)
5196         {
5197                 return RValue<Int2>(Nucleus::createOr(lhs.value, rhs.value));
5198         }
5199
5200         RValue<Int2> operator^(RValue<Int2> lhs, RValue<Int2> rhs)
5201         {
5202                 return RValue<Int2>(Nucleus::createXor(lhs.value, rhs.value));
5203         }
5204
5205         RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
5206         {
5207                 if(emulateIntrinsics)
5208                 {
5209                         Int2 result;
5210                         result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
5211                         result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
5212
5213                         return result;
5214                 }
5215                 else
5216                 {
5217                         return RValue<Int2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
5218                 }
5219         }
5220
5221         RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
5222         {
5223                 if(emulateIntrinsics)
5224                 {
5225                         Int2 result;
5226                         result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
5227                         result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
5228
5229                         return result;
5230                 }
5231                 else
5232                 {
5233                         return RValue<Int2>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
5234                 }
5235         }
5236
5237         RValue<Int2> operator+=(Int2 &lhs, RValue<Int2> rhs)
5238         {
5239                 return lhs = lhs + rhs;
5240         }
5241
5242         RValue<Int2> operator-=(Int2 &lhs, RValue<Int2> rhs)
5243         {
5244                 return lhs = lhs - rhs;
5245         }
5246
5247 //      RValue<Int2> operator*=(Int2 &lhs, RValue<Int2> rhs)
5248 //      {
5249 //              return lhs = lhs * rhs;
5250 //      }
5251
5252 //      RValue<Int2> operator/=(Int2 &lhs, RValue<Int2> rhs)
5253 //      {
5254 //              return lhs = lhs / rhs;
5255 //      }
5256
5257 //      RValue<Int2> operator%=(Int2 &lhs, RValue<Int2> rhs)
5258 //      {
5259 //              return lhs = lhs % rhs;
5260 //      }
5261
5262         RValue<Int2> operator&=(Int2 &lhs, RValue<Int2> rhs)
5263         {
5264                 return lhs = lhs & rhs;
5265         }
5266
5267         RValue<Int2> operator|=(Int2 &lhs, RValue<Int2> rhs)
5268         {
5269                 return lhs = lhs | rhs;
5270         }
5271
5272         RValue<Int2> operator^=(Int2 &lhs, RValue<Int2> rhs)
5273         {
5274                 return lhs = lhs ^ rhs;
5275         }
5276
5277         RValue<Int2> operator<<=(Int2 &lhs, unsigned char rhs)
5278         {
5279                 return lhs = lhs << rhs;
5280         }
5281
5282         RValue<Int2> operator>>=(Int2 &lhs, unsigned char rhs)
5283         {
5284                 return lhs = lhs >> rhs;
5285         }
5286
5287 //      RValue<Int2> operator+(RValue<Int2> val)
5288 //      {
5289 //              return val;
5290 //      }
5291
5292 //      RValue<Int2> operator-(RValue<Int2> val)
5293 //      {
5294 //              return RValue<Int2>(Nucleus::createNeg(val.value));
5295 //      }
5296
5297         RValue<Int2> operator~(RValue<Int2> val)
5298         {
5299                 return RValue<Int2>(Nucleus::createNot(val.value));
5300         }
5301
5302         RValue<Short4> UnpackLow(RValue<Int2> x, RValue<Int2> y)
5303         {
5304                 int shuffle[4] = {0, 4, 1, 5};   // Real type is v4i32
5305                 return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
5306         }
5307
5308         RValue<Short4> UnpackHigh(RValue<Int2> x, RValue<Int2> y)
5309         {
5310                 int shuffle[4] = {0, 4, 1, 5};   // Real type is v4i32
5311                 auto lowHigh = RValue<Int4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
5312                 return As<Short4>(Swizzle(lowHigh, 0xEE));
5313         }
5314
5315         RValue<Int> Extract(RValue<Int2> val, int i)
5316         {
5317                 return RValue<Int>(Nucleus::createExtractElement(val.value, Int::getType(), i));
5318         }
5319
5320         RValue<Int2> Insert(RValue<Int2> val, RValue<Int> element, int i)
5321         {
5322                 return RValue<Int2>(Nucleus::createInsertElement(val.value, element.value, i));
5323         }
5324
5325         Type *Int2::getType()
5326         {
5327                 return T(Type_v2i32);
5328         }
5329
5330         UInt2::UInt2(unsigned int x, unsigned int y)
5331         {
5332                 int64_t constantVector[2] = {x, y};
5333                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
5334         }
5335
5336         UInt2::UInt2(RValue<UInt2> rhs)
5337         {
5338                 storeValue(rhs.value);
5339         }
5340
5341         UInt2::UInt2(const UInt2 &rhs)
5342         {
5343                 Value *value = rhs.loadValue();
5344                 storeValue(value);
5345         }
5346
5347         UInt2::UInt2(const Reference<UInt2> &rhs)
5348         {
5349                 Value *value = rhs.loadValue();
5350                 storeValue(value);
5351         }
5352
5353         RValue<UInt2> UInt2::operator=(RValue<UInt2> rhs)
5354         {
5355                 storeValue(rhs.value);
5356
5357                 return rhs;
5358         }
5359
5360         RValue<UInt2> UInt2::operator=(const UInt2 &rhs)
5361         {
5362                 Value *value = rhs.loadValue();
5363                 storeValue(value);
5364
5365                 return RValue<UInt2>(value);
5366         }
5367
5368         RValue<UInt2> UInt2::operator=(const Reference<UInt2> &rhs)
5369         {
5370                 Value *value = rhs.loadValue();
5371                 storeValue(value);
5372
5373                 return RValue<UInt2>(value);
5374         }
5375
5376         RValue<UInt2> operator+(RValue<UInt2> lhs, RValue<UInt2> rhs)
5377         {
5378                 return RValue<UInt2>(Nucleus::createAdd(lhs.value, rhs.value));
5379         }
5380
5381         RValue<UInt2> operator-(RValue<UInt2> lhs, RValue<UInt2> rhs)
5382         {
5383                 return RValue<UInt2>(Nucleus::createSub(lhs.value, rhs.value));
5384         }
5385
5386 //      RValue<UInt2> operator*(RValue<UInt2> lhs, RValue<UInt2> rhs)
5387 //      {
5388 //              return RValue<UInt2>(Nucleus::createMul(lhs.value, rhs.value));
5389 //      }
5390
5391 //      RValue<UInt2> operator/(RValue<UInt2> lhs, RValue<UInt2> rhs)
5392 //      {
5393 //              return RValue<UInt2>(Nucleus::createUDiv(lhs.value, rhs.value));
5394 //      }
5395
5396 //      RValue<UInt2> operator%(RValue<UInt2> lhs, RValue<UInt2> rhs)
5397 //      {
5398 //              return RValue<UInt2>(Nucleus::createURem(lhs.value, rhs.value));
5399 //      }
5400
5401         RValue<UInt2> operator&(RValue<UInt2> lhs, RValue<UInt2> rhs)
5402         {
5403                 return RValue<UInt2>(Nucleus::createAnd(lhs.value, rhs.value));
5404         }
5405
5406         RValue<UInt2> operator|(RValue<UInt2> lhs, RValue<UInt2> rhs)
5407         {
5408                 return RValue<UInt2>(Nucleus::createOr(lhs.value, rhs.value));
5409         }
5410
5411         RValue<UInt2> operator^(RValue<UInt2> lhs, RValue<UInt2> rhs)
5412         {
5413                 return RValue<UInt2>(Nucleus::createXor(lhs.value, rhs.value));
5414         }
5415
5416         RValue<UInt> Extract(RValue<UInt2> val, int i)
5417         {
5418                 return RValue<UInt>(Nucleus::createExtractElement(val.value, UInt::getType(), i));
5419         }
5420
5421         RValue<UInt2> Insert(RValue<UInt2> val, RValue<UInt> element, int i)
5422         {
5423                 return RValue<UInt2>(Nucleus::createInsertElement(val.value, element.value, i));
5424         }
5425
5426         RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
5427         {
5428                 if(emulateIntrinsics)
5429                 {
5430                         UInt2 result;
5431                         result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
5432                         result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
5433
5434                         return result;
5435                 }
5436                 else
5437                 {
5438                         return RValue<UInt2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
5439                 }
5440         }
5441
5442         RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
5443         {
5444                 if(emulateIntrinsics)
5445                 {
5446                         UInt2 result;
5447                         result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
5448                         result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
5449
5450                         return result;
5451                 }
5452                 else
5453                 {
5454                         return RValue<UInt2>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
5455                 }
5456         }
5457
5458         RValue<UInt2> operator+=(UInt2 &lhs, RValue<UInt2> rhs)
5459         {
5460                 return lhs = lhs + rhs;
5461         }
5462
5463         RValue<UInt2> operator-=(UInt2 &lhs, RValue<UInt2> rhs)
5464         {
5465                 return lhs = lhs - rhs;
5466         }
5467
5468 //      RValue<UInt2> operator*=(UInt2 &lhs, RValue<UInt2> rhs)
5469 //      {
5470 //              return lhs = lhs * rhs;
5471 //      }
5472
5473 //      RValue<UInt2> operator/=(UInt2 &lhs, RValue<UInt2> rhs)
5474 //      {
5475 //              return lhs = lhs / rhs;
5476 //      }
5477
5478 //      RValue<UInt2> operator%=(UInt2 &lhs, RValue<UInt2> rhs)
5479 //      {
5480 //              return lhs = lhs % rhs;
5481 //      }
5482
5483         RValue<UInt2> operator&=(UInt2 &lhs, RValue<UInt2> rhs)
5484         {
5485                 return lhs = lhs & rhs;
5486         }
5487
5488         RValue<UInt2> operator|=(UInt2 &lhs, RValue<UInt2> rhs)
5489         {
5490                 return lhs = lhs | rhs;
5491         }
5492
5493         RValue<UInt2> operator^=(UInt2 &lhs, RValue<UInt2> rhs)
5494         {
5495                 return lhs = lhs ^ rhs;
5496         }
5497
5498         RValue<UInt2> operator<<=(UInt2 &lhs, unsigned char rhs)
5499         {
5500                 return lhs = lhs << rhs;
5501         }
5502
5503         RValue<UInt2> operator>>=(UInt2 &lhs, unsigned char rhs)
5504         {
5505                 return lhs = lhs >> rhs;
5506         }
5507
5508 //      RValue<UInt2> operator+(RValue<UInt2> val)
5509 //      {
5510 //              return val;
5511 //      }
5512
5513 //      RValue<UInt2> operator-(RValue<UInt2> val)
5514 //      {
5515 //              return RValue<UInt2>(Nucleus::createNeg(val.value));
5516 //      }
5517
5518         RValue<UInt2> operator~(RValue<UInt2> val)
5519         {
5520                 return RValue<UInt2>(Nucleus::createNot(val.value));
5521         }
5522
5523         Type *UInt2::getType()
5524         {
5525                 return T(Type_v2i32);
5526         }
5527
5528         Int4::Int4(RValue<Byte4> cast)
5529         {
5530                 Value *x = Nucleus::createBitCast(cast.value, Int::getType());
5531                 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
5532
5533                 Value *e;
5534                 int swizzle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};
5535                 Value *b = Nucleus::createBitCast(a, Byte16::getType());
5536                 Value *c = Nucleus::createShuffleVector(b, V(Nucleus::createNullValue(Byte16::getType())), swizzle);
5537
5538                 int swizzle2[8] = {0, 8, 1, 9, 2, 10, 3, 11};
5539                 Value *d = Nucleus::createBitCast(c, Short8::getType());
5540                 e = Nucleus::createShuffleVector(d, V(Nucleus::createNullValue(Short8::getType())), swizzle2);
5541
5542                 Value *f = Nucleus::createBitCast(e, Int4::getType());
5543                 storeValue(f);
5544         }
5545
5546         Int4::Int4(RValue<SByte4> cast)
5547         {
5548                 Value *x = Nucleus::createBitCast(cast.value, Int::getType());
5549                 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
5550
5551                 int swizzle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7};
5552                 Value *b = Nucleus::createBitCast(a, Byte16::getType());
5553                 Value *c = Nucleus::createShuffleVector(b, b, swizzle);
5554
5555                 int swizzle2[8] = {0, 0, 1, 1, 2, 2, 3, 3};
5556                 Value *d = Nucleus::createBitCast(c, Short8::getType());
5557                 Value *e = Nucleus::createShuffleVector(d, d, swizzle2);
5558
5559                 *this = As<Int4>(e) >> 24;
5560         }
5561
5562         Int4::Int4(RValue<Float4> cast)
5563         {
5564                 Value *xyzw = Nucleus::createFPToSI(cast.value, Int4::getType());
5565
5566                 storeValue(xyzw);
5567         }
5568
5569         Int4::Int4(RValue<Short4> cast)
5570         {
5571                 int swizzle[8] = {0, 0, 1, 1, 2, 2, 3, 3};
5572                 Value *c = Nucleus::createShuffleVector(cast.value, cast.value, swizzle);
5573
5574                 *this = As<Int4>(c) >> 16;
5575         }
5576
5577         Int4::Int4(RValue<UShort4> cast)
5578         {
5579                 int swizzle[8] = {0, 8, 1, 9, 2, 10, 3, 11};
5580                 Value *c = Nucleus::createShuffleVector(cast.value, Short8(0, 0, 0, 0, 0, 0, 0, 0).loadValue(), swizzle);
5581                 Value *d = Nucleus::createBitCast(c, Int4::getType());
5582                 storeValue(d);
5583         }
5584
5585         Int4::Int4(int xyzw)
5586         {
5587                 constant(xyzw, xyzw, xyzw, xyzw);
5588         }
5589
5590         Int4::Int4(int x, int yzw)
5591         {
5592                 constant(x, yzw, yzw, yzw);
5593         }
5594
5595         Int4::Int4(int x, int y, int zw)
5596         {
5597                 constant(x, y, zw, zw);
5598         }
5599
5600         Int4::Int4(int x, int y, int z, int w)
5601         {
5602                 constant(x, y, z, w);
5603         }
5604
5605         void Int4::constant(int x, int y, int z, int w)
5606         {
5607                 int64_t constantVector[4] = {x, y, z, w};
5608                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
5609         }
5610
5611         Int4::Int4(RValue<Int4> rhs)
5612         {
5613                 storeValue(rhs.value);
5614         }
5615
5616         Int4::Int4(const Int4 &rhs)
5617         {
5618                 Value *value = rhs.loadValue();
5619                 storeValue(value);
5620         }
5621
5622         Int4::Int4(const Reference<Int4> &rhs)
5623         {
5624                 Value *value = rhs.loadValue();
5625                 storeValue(value);
5626         }
5627
5628         Int4::Int4(RValue<UInt4> rhs)
5629         {
5630                 storeValue(rhs.value);
5631         }
5632
5633         Int4::Int4(const UInt4 &rhs)
5634         {
5635                 Value *value = rhs.loadValue();
5636                 storeValue(value);
5637         }
5638
5639         Int4::Int4(const Reference<UInt4> &rhs)
5640         {
5641                 Value *value = rhs.loadValue();
5642                 storeValue(value);
5643         }
5644
5645         Int4::Int4(RValue<Int2> lo, RValue<Int2> hi)
5646         {
5647                 int shuffle[4] = {0, 1, 4, 5};   // Real type is v4i32
5648                 Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
5649
5650                 storeValue(packed);
5651         }
5652
5653         Int4::Int4(RValue<Int> rhs)
5654         {
5655                 Value *vector = Nucleus::createBitCast(rhs.value, Int4::getType());
5656
5657                 int swizzle[4] = {0, 0, 0, 0};
5658                 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
5659
5660                 storeValue(replicate);
5661         }
5662
5663         Int4::Int4(const Int &rhs)
5664         {
5665                 *this = RValue<Int>(rhs.loadValue());
5666         }
5667
5668         Int4::Int4(const Reference<Int> &rhs)
5669         {
5670                 *this = RValue<Int>(rhs.loadValue());
5671         }
5672
5673         RValue<Int4> Int4::operator=(RValue<Int4> rhs)
5674         {
5675                 storeValue(rhs.value);
5676
5677                 return rhs;
5678         }
5679
5680         RValue<Int4> Int4::operator=(const Int4 &rhs)
5681         {
5682                 Value *value = rhs.loadValue();
5683                 storeValue(value);
5684
5685                 return RValue<Int4>(value);
5686         }
5687
5688         RValue<Int4> Int4::operator=(const Reference<Int4> &rhs)
5689         {
5690                 Value *value = rhs.loadValue();
5691                 storeValue(value);
5692
5693                 return RValue<Int4>(value);
5694         }
5695
5696         RValue<Int4> operator+(RValue<Int4> lhs, RValue<Int4> rhs)
5697         {
5698                 return RValue<Int4>(Nucleus::createAdd(lhs.value, rhs.value));
5699         }
5700
5701         RValue<Int4> operator-(RValue<Int4> lhs, RValue<Int4> rhs)
5702         {
5703                 return RValue<Int4>(Nucleus::createSub(lhs.value, rhs.value));
5704         }
5705
5706         RValue<Int4> operator*(RValue<Int4> lhs, RValue<Int4> rhs)
5707         {
5708                 return RValue<Int4>(Nucleus::createMul(lhs.value, rhs.value));
5709         }
5710
5711         RValue<Int4> operator/(RValue<Int4> lhs, RValue<Int4> rhs)
5712         {
5713                 return RValue<Int4>(Nucleus::createSDiv(lhs.value, rhs.value));
5714         }
5715
5716         RValue<Int4> operator%(RValue<Int4> lhs, RValue<Int4> rhs)
5717         {
5718                 return RValue<Int4>(Nucleus::createSRem(lhs.value, rhs.value));
5719         }
5720
5721         RValue<Int4> operator&(RValue<Int4> lhs, RValue<Int4> rhs)
5722         {
5723                 return RValue<Int4>(Nucleus::createAnd(lhs.value, rhs.value));
5724         }
5725
5726         RValue<Int4> operator|(RValue<Int4> lhs, RValue<Int4> rhs)
5727         {
5728                 return RValue<Int4>(Nucleus::createOr(lhs.value, rhs.value));
5729         }
5730
5731         RValue<Int4> operator^(RValue<Int4> lhs, RValue<Int4> rhs)
5732         {
5733                 return RValue<Int4>(Nucleus::createXor(lhs.value, rhs.value));
5734         }
5735
5736         RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
5737         {
5738                 if(emulateIntrinsics)
5739                 {
5740                         Int4 result;
5741                         result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
5742                         result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
5743                         result = Insert(result, Extract(lhs, 2) << Int(rhs), 2);
5744                         result = Insert(result, Extract(lhs, 3) << Int(rhs), 3);
5745
5746                         return result;
5747                 }
5748                 else
5749                 {
5750                         return RValue<Int4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
5751                 }
5752         }
5753
5754         RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
5755         {
5756                 if(emulateIntrinsics)
5757                 {
5758                         Int4 result;
5759                         result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
5760                         result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
5761                         result = Insert(result, Extract(lhs, 2) >> Int(rhs), 2);
5762                         result = Insert(result, Extract(lhs, 3) >> Int(rhs), 3);
5763
5764                         return result;
5765                 }
5766                 else
5767                 {
5768                         return RValue<Int4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
5769                 }
5770         }
5771
5772         RValue<Int4> operator<<(RValue<Int4> lhs, RValue<Int4> rhs)
5773         {
5774                 return RValue<Int4>(Nucleus::createShl(lhs.value, rhs.value));
5775         }
5776
5777         RValue<Int4> operator>>(RValue<Int4> lhs, RValue<Int4> rhs)
5778         {
5779                 return RValue<Int4>(Nucleus::createAShr(lhs.value, rhs.value));
5780         }
5781
5782         RValue<Int4> operator+=(Int4 &lhs, RValue<Int4> rhs)
5783         {
5784                 return lhs = lhs + rhs;
5785         }
5786
5787         RValue<Int4> operator-=(Int4 &lhs, RValue<Int4> rhs)
5788         {
5789                 return lhs = lhs - rhs;
5790         }
5791
5792         RValue<Int4> operator*=(Int4 &lhs, RValue<Int4> rhs)
5793         {
5794                 return lhs = lhs * rhs;
5795         }
5796
5797 //      RValue<Int4> operator/=(Int4 &lhs, RValue<Int4> rhs)
5798 //      {
5799 //              return lhs = lhs / rhs;
5800 //      }
5801
5802 //      RValue<Int4> operator%=(Int4 &lhs, RValue<Int4> rhs)
5803 //      {
5804 //              return lhs = lhs % rhs;
5805 //      }
5806
5807         RValue<Int4> operator&=(Int4 &lhs, RValue<Int4> rhs)
5808         {
5809                 return lhs = lhs & rhs;
5810         }
5811
5812         RValue<Int4> operator|=(Int4 &lhs, RValue<Int4> rhs)
5813         {
5814                 return lhs = lhs | rhs;
5815         }
5816
5817         RValue<Int4> operator^=(Int4 &lhs, RValue<Int4> rhs)
5818         {
5819                 return lhs = lhs ^ rhs;
5820         }
5821
5822         RValue<Int4> operator<<=(Int4 &lhs, unsigned char rhs)
5823         {
5824                 return lhs = lhs << rhs;
5825         }
5826
5827         RValue<Int4> operator>>=(Int4 &lhs, unsigned char rhs)
5828         {
5829                 return lhs = lhs >> rhs;
5830         }
5831
5832         RValue<Int4> operator+(RValue<Int4> val)
5833         {
5834                 return val;
5835         }
5836
5837         RValue<Int4> operator-(RValue<Int4> val)
5838         {
5839                 return RValue<Int4>(Nucleus::createNeg(val.value));
5840         }
5841
5842         RValue<Int4> operator~(RValue<Int4> val)
5843         {
5844                 return RValue<Int4>(Nucleus::createNot(val.value));
5845         }
5846
5847         RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
5848         {
5849                 return RValue<Int4>(Nucleus::createICmpEQ(x.value, y.value));
5850         }
5851
5852         RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
5853         {
5854                 return RValue<Int4>(Nucleus::createICmpSLT(x.value, y.value));
5855         }
5856
5857         RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
5858         {
5859                 return RValue<Int4>(Nucleus::createICmpSLE(x.value, y.value));
5860         }
5861
5862         RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
5863         {
5864                 return RValue<Int4>(Nucleus::createICmpNE(x.value, y.value));
5865         }
5866
5867         RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
5868         {
5869                 return RValue<Int4>(Nucleus::createICmpSGE(x.value, y.value));
5870         }
5871
5872         RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
5873         {
5874                 return RValue<Int4>(Nucleus::createICmpSGT(x.value, y.value));
5875         }
5876
5877         RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
5878         {
5879                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
5880                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value, y.value);
5881                 ::basicBlock->appendInst(cmp);
5882
5883                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
5884                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
5885                 ::basicBlock->appendInst(select);
5886
5887                 return RValue<Int4>(V(result));
5888         }
5889
5890         RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
5891         {
5892                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
5893                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value, y.value);
5894                 ::basicBlock->appendInst(cmp);
5895
5896                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
5897                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
5898                 ::basicBlock->appendInst(select);
5899
5900                 return RValue<Int4>(V(result));
5901         }
5902
5903         RValue<Int4> RoundInt(RValue<Float4> cast)
5904         {
5905                 if(emulateIntrinsics)
5906                 {
5907                         // Push the fractional part off the mantissa. Accurate up to +/-2^22.
5908                         return Int4((cast + Float4(0x00C00000)) - Float4(0x00C00000));
5909                 }
5910                 else
5911                 {
5912                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
5913                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
5914                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
5915                         auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
5916                         nearbyint->addArg(cast.value);
5917                         ::basicBlock->appendInst(nearbyint);
5918
5919                         return RValue<Int4>(V(result));
5920                 }
5921         }
5922
5923         RValue<Short8> Pack(RValue<Int4> x, RValue<Int4> y)
5924         {
5925                 if(emulateIntrinsics)
5926                 {
5927                         Short8 result;
5928                         result = Insert(result, Saturate(Extract(x, 0)), 0);
5929                         result = Insert(result, Saturate(Extract(x, 1)), 1);
5930                         result = Insert(result, Saturate(Extract(x, 2)), 2);
5931                         result = Insert(result, Saturate(Extract(x, 3)), 3);
5932                         result = Insert(result, Saturate(Extract(y, 0)), 4);
5933                         result = Insert(result, Saturate(Extract(y, 1)), 5);
5934                         result = Insert(result, Saturate(Extract(y, 2)), 6);
5935                         result = Insert(result, Saturate(Extract(y, 3)), 7);
5936
5937                         return result;
5938                 }
5939                 else
5940                 {
5941                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
5942                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
5943                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
5944                         auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
5945                         pack->addArg(x.value);
5946                         pack->addArg(y.value);
5947                         ::basicBlock->appendInst(pack);
5948
5949                         return RValue<Short8>(V(result));
5950                 }
5951         }
5952
5953         RValue<Int> Extract(RValue<Int4> x, int i)
5954         {
5955                 return RValue<Int>(Nucleus::createExtractElement(x.value, Int::getType(), i));
5956         }
5957
5958         RValue<Int4> Insert(RValue<Int4> x, RValue<Int> element, int i)
5959         {
5960                 return RValue<Int4>(Nucleus::createInsertElement(x.value, element.value, i));
5961         }
5962
5963         RValue<Int> SignMask(RValue<Int4> x)
5964         {
5965                 if(emulateIntrinsics)
5966                 {
5967                         Int4 xx = (x >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
5968                         return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
5969                 }
5970                 else
5971                 {
5972                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
5973                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
5974                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
5975                         auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
5976                         movmsk->addArg(x.value);
5977                         ::basicBlock->appendInst(movmsk);
5978
5979                         return RValue<Int>(V(result));
5980                 }
5981         }
5982
5983         RValue<Int4> Swizzle(RValue<Int4> x, unsigned char select)
5984         {
5985                 return RValue<Int4>(createSwizzle4(x.value, select));
5986         }
5987
5988         Type *Int4::getType()
5989         {
5990                 return T(Ice::IceType_v4i32);
5991         }
5992
5993         UInt4::UInt4(RValue<Float4> cast)
5994         {
5995                 // Smallest positive value representable in UInt, but not in Int
5996                 const unsigned int ustart = 0x80000000u;
5997                 const float ustartf = float(ustart);
5998
5999                 // Check if the value can be represented as an Int
6000                 Int4 uiValue = CmpNLT(cast, Float4(ustartf));
6001                 // If the value is too large, subtract ustart and re-add it after conversion.
6002                 uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) |
6003                 // Otherwise, just convert normally
6004                           (~uiValue & Int4(cast));
6005                 // If the value is negative, store 0, otherwise store the result of the conversion
6006                 storeValue((~(As<Int4>(cast) >> 31) & uiValue).value);
6007         }
6008
6009         UInt4::UInt4(int xyzw)
6010         {
6011                 constant(xyzw, xyzw, xyzw, xyzw);
6012         }
6013
6014         UInt4::UInt4(int x, int yzw)
6015         {
6016                 constant(x, yzw, yzw, yzw);
6017         }
6018
6019         UInt4::UInt4(int x, int y, int zw)
6020         {
6021                 constant(x, y, zw, zw);
6022         }
6023
6024         UInt4::UInt4(int x, int y, int z, int w)
6025         {
6026                 constant(x, y, z, w);
6027         }
6028
6029         void UInt4::constant(int x, int y, int z, int w)
6030         {
6031                 int64_t constantVector[4] = {x, y, z, w};
6032                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
6033         }
6034
6035         UInt4::UInt4(RValue<UInt4> rhs)
6036         {
6037                 storeValue(rhs.value);
6038         }
6039
6040         UInt4::UInt4(const UInt4 &rhs)
6041         {
6042                 Value *value = rhs.loadValue();
6043                 storeValue(value);
6044         }
6045
6046         UInt4::UInt4(const Reference<UInt4> &rhs)
6047         {
6048                 Value *value = rhs.loadValue();
6049                 storeValue(value);
6050         }
6051
6052         UInt4::UInt4(RValue<Int4> rhs)
6053         {
6054                 storeValue(rhs.value);
6055         }
6056
6057         UInt4::UInt4(const Int4 &rhs)
6058         {
6059                 Value *value = rhs.loadValue();
6060                 storeValue(value);
6061         }
6062
6063         UInt4::UInt4(const Reference<Int4> &rhs)
6064         {
6065                 Value *value = rhs.loadValue();
6066                 storeValue(value);
6067         }
6068
6069         UInt4::UInt4(RValue<UInt2> lo, RValue<UInt2> hi)
6070         {
6071                 int shuffle[4] = {0, 1, 4, 5};   // Real type is v4i32
6072                 Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
6073
6074                 storeValue(packed);
6075         }
6076
6077         RValue<UInt4> UInt4::operator=(RValue<UInt4> rhs)
6078         {
6079                 storeValue(rhs.value);
6080
6081                 return rhs;
6082         }
6083
6084         RValue<UInt4> UInt4::operator=(const UInt4 &rhs)
6085         {
6086                 Value *value = rhs.loadValue();
6087                 storeValue(value);
6088
6089                 return RValue<UInt4>(value);
6090         }
6091
6092         RValue<UInt4> UInt4::operator=(const Reference<UInt4> &rhs)
6093         {
6094                 Value *value = rhs.loadValue();
6095                 storeValue(value);
6096
6097                 return RValue<UInt4>(value);
6098         }
6099
6100         RValue<UInt4> operator+(RValue<UInt4> lhs, RValue<UInt4> rhs)
6101         {
6102                 return RValue<UInt4>(Nucleus::createAdd(lhs.value, rhs.value));
6103         }
6104
6105         RValue<UInt4> operator-(RValue<UInt4> lhs, RValue<UInt4> rhs)
6106         {
6107                 return RValue<UInt4>(Nucleus::createSub(lhs.value, rhs.value));
6108         }
6109
6110         RValue<UInt4> operator*(RValue<UInt4> lhs, RValue<UInt4> rhs)
6111         {
6112                 return RValue<UInt4>(Nucleus::createMul(lhs.value, rhs.value));
6113         }
6114
6115         RValue<UInt4> operator/(RValue<UInt4> lhs, RValue<UInt4> rhs)
6116         {
6117                 return RValue<UInt4>(Nucleus::createUDiv(lhs.value, rhs.value));
6118         }
6119
6120         RValue<UInt4> operator%(RValue<UInt4> lhs, RValue<UInt4> rhs)
6121         {
6122                 return RValue<UInt4>(Nucleus::createURem(lhs.value, rhs.value));
6123         }
6124
6125         RValue<UInt4> operator&(RValue<UInt4> lhs, RValue<UInt4> rhs)
6126         {
6127                 return RValue<UInt4>(Nucleus::createAnd(lhs.value, rhs.value));
6128         }
6129
6130         RValue<UInt4> operator|(RValue<UInt4> lhs, RValue<UInt4> rhs)
6131         {
6132                 return RValue<UInt4>(Nucleus::createOr(lhs.value, rhs.value));
6133         }
6134
6135         RValue<UInt4> operator^(RValue<UInt4> lhs, RValue<UInt4> rhs)
6136         {
6137                 return RValue<UInt4>(Nucleus::createXor(lhs.value, rhs.value));
6138         }
6139
6140         RValue<UInt> Extract(RValue<UInt4> x, int i)
6141         {
6142                 return RValue<UInt>(Nucleus::createExtractElement(x.value, UInt::getType(), i));
6143         }
6144
6145         RValue<UInt4> Insert(RValue<UInt4> x, RValue<UInt> element, int i)
6146         {
6147                 return RValue<UInt4>(Nucleus::createInsertElement(x.value, element.value, i));
6148         }
6149
6150         RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
6151         {
6152                 if(emulateIntrinsics)
6153                 {
6154                         UInt4 result;
6155                         result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
6156                         result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
6157                         result = Insert(result, Extract(lhs, 2) << UInt(rhs), 2);
6158                         result = Insert(result, Extract(lhs, 3) << UInt(rhs), 3);
6159
6160                         return result;
6161                 }
6162                 else
6163                 {
6164                         return RValue<UInt4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
6165                 }
6166         }
6167
6168         RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
6169         {
6170                 if(emulateIntrinsics)
6171                 {
6172                         UInt4 result;
6173                         result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
6174                         result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
6175                         result = Insert(result, Extract(lhs, 2) >> UInt(rhs), 2);
6176                         result = Insert(result, Extract(lhs, 3) >> UInt(rhs), 3);
6177
6178                         return result;
6179                 }
6180                 else
6181                 {
6182                         return RValue<UInt4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
6183                 }
6184         }
6185
6186         RValue<UInt4> operator<<(RValue<UInt4> lhs, RValue<UInt4> rhs)
6187         {
6188                 return RValue<UInt4>(Nucleus::createShl(lhs.value, rhs.value));
6189         }
6190
6191         RValue<UInt4> operator>>(RValue<UInt4> lhs, RValue<UInt4> rhs)
6192         {
6193                 return RValue<UInt4>(Nucleus::createLShr(lhs.value, rhs.value));
6194         }
6195
6196         RValue<UInt4> operator+=(UInt4 &lhs, RValue<UInt4> rhs)
6197         {
6198                 return lhs = lhs + rhs;
6199         }
6200
6201         RValue<UInt4> operator-=(UInt4 &lhs, RValue<UInt4> rhs)
6202         {
6203                 return lhs = lhs - rhs;
6204         }
6205
6206         RValue<UInt4> operator*=(UInt4 &lhs, RValue<UInt4> rhs)
6207         {
6208                 return lhs = lhs * rhs;
6209         }
6210
6211 //      RValue<UInt4> operator/=(UInt4 &lhs, RValue<UInt4> rhs)
6212 //      {
6213 //              return lhs = lhs / rhs;
6214 //      }
6215
6216 //      RValue<UInt4> operator%=(UInt4 &lhs, RValue<UInt4> rhs)
6217 //      {
6218 //              return lhs = lhs % rhs;
6219 //      }
6220
6221         RValue<UInt4> operator&=(UInt4 &lhs, RValue<UInt4> rhs)
6222         {
6223                 return lhs = lhs & rhs;
6224         }
6225
6226         RValue<UInt4> operator|=(UInt4 &lhs, RValue<UInt4> rhs)
6227         {
6228                 return lhs = lhs | rhs;
6229         }
6230
6231         RValue<UInt4> operator^=(UInt4 &lhs, RValue<UInt4> rhs)
6232         {
6233                 return lhs = lhs ^ rhs;
6234         }
6235
6236         RValue<UInt4> operator<<=(UInt4 &lhs, unsigned char rhs)
6237         {
6238                 return lhs = lhs << rhs;
6239         }
6240
6241         RValue<UInt4> operator>>=(UInt4 &lhs, unsigned char rhs)
6242         {
6243                 return lhs = lhs >> rhs;
6244         }
6245
6246         RValue<UInt4> operator+(RValue<UInt4> val)
6247         {
6248                 return val;
6249         }
6250
6251         RValue<UInt4> operator-(RValue<UInt4> val)
6252         {
6253                 return RValue<UInt4>(Nucleus::createNeg(val.value));
6254         }
6255
6256         RValue<UInt4> operator~(RValue<UInt4> val)
6257         {
6258                 return RValue<UInt4>(Nucleus::createNot(val.value));
6259         }
6260
6261         RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
6262         {
6263                 return RValue<UInt4>(Nucleus::createICmpEQ(x.value, y.value));
6264         }
6265
6266         RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
6267         {
6268                 return RValue<UInt4>(Nucleus::createICmpULT(x.value, y.value));
6269         }
6270
6271         RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
6272         {
6273                 return RValue<UInt4>(Nucleus::createICmpULE(x.value, y.value));
6274         }
6275
6276         RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
6277         {
6278                 return RValue<UInt4>(Nucleus::createICmpNE(x.value, y.value));
6279         }
6280
6281         RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
6282         {
6283                 return RValue<UInt4>(Nucleus::createICmpUGE(x.value, y.value));
6284         }
6285
6286         RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
6287         {
6288                 return RValue<UInt4>(Nucleus::createICmpUGT(x.value, y.value));
6289         }
6290
6291         RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
6292         {
6293                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
6294                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value, y.value);
6295                 ::basicBlock->appendInst(cmp);
6296
6297                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
6298                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
6299                 ::basicBlock->appendInst(select);
6300
6301                 return RValue<UInt4>(V(result));
6302         }
6303
6304         RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
6305         {
6306                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
6307                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value, y.value);
6308                 ::basicBlock->appendInst(cmp);
6309
6310                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
6311                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
6312                 ::basicBlock->appendInst(select);
6313
6314                 return RValue<UInt4>(V(result));
6315         }
6316
6317         RValue<UShort8> Pack(RValue<UInt4> x, RValue<UInt4> y)
6318         {
6319                 if(CPUID::SSE4_1)
6320                 {
6321                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
6322                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
6323                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
6324                         auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
6325                         pack->addArg(x.value);
6326                         pack->addArg(y.value);
6327                         ::basicBlock->appendInst(pack);
6328
6329                         return RValue<UShort8>(V(result));
6330                 }
6331                 else
6332                 {
6333                         RValue<Int4> sx = As<Int4>(x);
6334                         RValue<Int4> bx = (sx & ~(sx >> 31)) - Int4(0x8000);
6335
6336                         RValue<Int4> sy = As<Int4>(y);
6337                         RValue<Int4> by = (sy & ~(sy >> 31)) - Int4(0x8000);
6338
6339                         return As<UShort8>(Pack(bx, by) + Short8(0x8000u));
6340                 }
6341         }
6342
6343         Type *UInt4::getType()
6344         {
6345                 return T(Ice::IceType_v4i32);
6346         }
6347
6348         Float::Float(RValue<Int> cast)
6349         {
6350                 Value *integer = Nucleus::createSIToFP(cast.value, Float::getType());
6351
6352                 storeValue(integer);
6353         }
6354
6355         Float::Float(RValue<UInt> cast)
6356         {
6357                 RValue<Float> result = Float(Int(cast & UInt(0x7FFFFFFF))) +
6358                                        As<Float>((As<Int>(cast) >> 31) & As<Int>(Float(0x80000000u)));
6359
6360                 storeValue(result.value);
6361         }
6362
6363         Float::Float(float x)
6364         {
6365                 storeValue(Nucleus::createConstantFloat(x));
6366         }
6367
6368         Float::Float(RValue<Float> rhs)
6369         {
6370                 storeValue(rhs.value);
6371         }
6372
6373         Float::Float(const Float &rhs)
6374         {
6375                 Value *value = rhs.loadValue();
6376                 storeValue(value);
6377         }
6378
6379         Float::Float(const Reference<Float> &rhs)
6380         {
6381                 Value *value = rhs.loadValue();
6382                 storeValue(value);
6383         }
6384
6385         RValue<Float> Float::operator=(RValue<Float> rhs)
6386         {
6387                 storeValue(rhs.value);
6388
6389                 return rhs;
6390         }
6391
6392         RValue<Float> Float::operator=(const Float &rhs)
6393         {
6394                 Value *value = rhs.loadValue();
6395                 storeValue(value);
6396
6397                 return RValue<Float>(value);
6398         }
6399
6400         RValue<Float> Float::operator=(const Reference<Float> &rhs)
6401         {
6402                 Value *value = rhs.loadValue();
6403                 storeValue(value);
6404
6405                 return RValue<Float>(value);
6406         }
6407
6408         RValue<Float> operator+(RValue<Float> lhs, RValue<Float> rhs)
6409         {
6410                 return RValue<Float>(Nucleus::createFAdd(lhs.value, rhs.value));
6411         }
6412
6413         RValue<Float> operator-(RValue<Float> lhs, RValue<Float> rhs)
6414         {
6415                 return RValue<Float>(Nucleus::createFSub(lhs.value, rhs.value));
6416         }
6417
6418         RValue<Float> operator*(RValue<Float> lhs, RValue<Float> rhs)
6419         {
6420                 return RValue<Float>(Nucleus::createFMul(lhs.value, rhs.value));
6421         }
6422
6423         RValue<Float> operator/(RValue<Float> lhs, RValue<Float> rhs)
6424         {
6425                 return RValue<Float>(Nucleus::createFDiv(lhs.value, rhs.value));
6426         }
6427
6428         RValue<Float> operator+=(Float &lhs, RValue<Float> rhs)
6429         {
6430                 return lhs = lhs + rhs;
6431         }
6432
6433         RValue<Float> operator-=(Float &lhs, RValue<Float> rhs)
6434         {
6435                 return lhs = lhs - rhs;
6436         }
6437
6438         RValue<Float> operator*=(Float &lhs, RValue<Float> rhs)
6439         {
6440                 return lhs = lhs * rhs;
6441         }
6442
6443         RValue<Float> operator/=(Float &lhs, RValue<Float> rhs)
6444         {
6445                 return lhs = lhs / rhs;
6446         }
6447
6448         RValue<Float> operator+(RValue<Float> val)
6449         {
6450                 return val;
6451         }
6452
6453         RValue<Float> operator-(RValue<Float> val)
6454         {
6455                 return RValue<Float>(Nucleus::createFNeg(val.value));
6456         }
6457
6458         RValue<Bool> operator<(RValue<Float> lhs, RValue<Float> rhs)
6459         {
6460                 return RValue<Bool>(Nucleus::createFCmpOLT(lhs.value, rhs.value));
6461         }
6462
6463         RValue<Bool> operator<=(RValue<Float> lhs, RValue<Float> rhs)
6464         {
6465                 return RValue<Bool>(Nucleus::createFCmpOLE(lhs.value, rhs.value));
6466         }
6467
6468         RValue<Bool> operator>(RValue<Float> lhs, RValue<Float> rhs)
6469         {
6470                 return RValue<Bool>(Nucleus::createFCmpOGT(lhs.value, rhs.value));
6471         }
6472
6473         RValue<Bool> operator>=(RValue<Float> lhs, RValue<Float> rhs)
6474         {
6475                 return RValue<Bool>(Nucleus::createFCmpOGE(lhs.value, rhs.value));
6476         }
6477
6478         RValue<Bool> operator!=(RValue<Float> lhs, RValue<Float> rhs)
6479         {
6480                 return RValue<Bool>(Nucleus::createFCmpONE(lhs.value, rhs.value));
6481         }
6482
6483         RValue<Bool> operator==(RValue<Float> lhs, RValue<Float> rhs)
6484         {
6485                 return RValue<Bool>(Nucleus::createFCmpOEQ(lhs.value, rhs.value));
6486         }
6487
6488         RValue<Float> Abs(RValue<Float> x)
6489         {
6490                 return IfThenElse(x > 0.0f, x, -x);
6491         }
6492
6493         RValue<Float> Max(RValue<Float> x, RValue<Float> y)
6494         {
6495                 return IfThenElse(x > y, x, y);
6496         }
6497
6498         RValue<Float> Min(RValue<Float> x, RValue<Float> y)
6499         {
6500                 return IfThenElse(x < y, x, y);
6501         }
6502
6503         RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
6504         {
6505                 return 1.0f / x;
6506         }
6507
6508         RValue<Float> RcpSqrt_pp(RValue<Float> x)
6509         {
6510                 return Rcp_pp(Sqrt(x));
6511         }
6512
6513         RValue<Float> Sqrt(RValue<Float> x)
6514         {
6515                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_f32);
6516                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
6517                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
6518                 auto sqrt = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
6519                 sqrt->addArg(x.value);
6520                 ::basicBlock->appendInst(sqrt);
6521
6522                 return RValue<Float>(V(result));
6523         }
6524
6525         RValue<Float> Round(RValue<Float> x)
6526         {
6527                 return Float4(Round(Float4(x))).x;
6528         }
6529
6530         RValue<Float> Trunc(RValue<Float> x)
6531         {
6532                 return Float4(Trunc(Float4(x))).x;
6533         }
6534
6535         RValue<Float> Frac(RValue<Float> x)
6536         {
6537                 return Float4(Frac(Float4(x))).x;
6538         }
6539
6540         RValue<Float> Floor(RValue<Float> x)
6541         {
6542                 return Float4(Floor(Float4(x))).x;
6543         }
6544
6545         RValue<Float> Ceil(RValue<Float> x)
6546         {
6547                 return Float4(Ceil(Float4(x))).x;
6548         }
6549
6550         Type *Float::getType()
6551         {
6552                 return T(Ice::IceType_f32);
6553         }
6554
6555         Float2::Float2(RValue<Float4> cast)
6556         {
6557                 storeValue(Nucleus::createBitCast(cast.value, getType()));
6558         }
6559
6560         Type *Float2::getType()
6561         {
6562                 return T(Type_v2f32);
6563         }
6564
6565         Float4::Float4(RValue<Byte4> cast) : FloatXYZW(this)
6566         {
6567                 Value *a = Int4(cast).loadValue();
6568                 Value *xyzw = Nucleus::createSIToFP(a, Float4::getType());
6569
6570                 storeValue(xyzw);
6571         }
6572
6573         Float4::Float4(RValue<SByte4> cast) : FloatXYZW(this)
6574         {
6575                 Value *a = Int4(cast).loadValue();
6576                 Value *xyzw = Nucleus::createSIToFP(a, Float4::getType());
6577
6578                 storeValue(xyzw);
6579         }
6580
6581         Float4::Float4(RValue<Short4> cast) : FloatXYZW(this)
6582         {
6583                 Int4 c(cast);
6584                 storeValue(Nucleus::createSIToFP(RValue<Int4>(c).value, Float4::getType()));
6585         }
6586
6587         Float4::Float4(RValue<UShort4> cast) : FloatXYZW(this)
6588         {
6589                 Int4 c(cast);
6590                 storeValue(Nucleus::createSIToFP(RValue<Int4>(c).value, Float4::getType()));
6591         }
6592
6593         Float4::Float4(RValue<Int4> cast) : FloatXYZW(this)
6594         {
6595                 Value *xyzw = Nucleus::createSIToFP(cast.value, Float4::getType());
6596
6597                 storeValue(xyzw);
6598         }
6599
6600         Float4::Float4(RValue<UInt4> cast) : FloatXYZW(this)
6601         {
6602                 RValue<Float4> result = Float4(Int4(cast & UInt4(0x7FFFFFFF))) +
6603                                         As<Float4>((As<Int4>(cast) >> 31) & As<Int4>(Float4(0x80000000u)));
6604
6605                 storeValue(result.value);
6606         }
6607
6608         Float4::Float4() : FloatXYZW(this)
6609         {
6610         }
6611
6612         Float4::Float4(float xyzw) : FloatXYZW(this)
6613         {
6614                 constant(xyzw, xyzw, xyzw, xyzw);
6615         }
6616
6617         Float4::Float4(float x, float yzw) : FloatXYZW(this)
6618         {
6619                 constant(x, yzw, yzw, yzw);
6620         }
6621
6622         Float4::Float4(float x, float y, float zw) : FloatXYZW(this)
6623         {
6624                 constant(x, y, zw, zw);
6625         }
6626
6627         Float4::Float4(float x, float y, float z, float w) : FloatXYZW(this)
6628         {
6629                 constant(x, y, z, w);
6630         }
6631
6632         void Float4::constant(float x, float y, float z, float w)
6633         {
6634                 double constantVector[4] = {x, y, z, w};
6635                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
6636         }
6637
6638         Float4::Float4(RValue<Float4> rhs) : FloatXYZW(this)
6639         {
6640                 storeValue(rhs.value);
6641         }
6642
6643         Float4::Float4(const Float4 &rhs) : FloatXYZW(this)
6644         {
6645                 Value *value = rhs.loadValue();
6646                 storeValue(value);
6647         }
6648
6649         Float4::Float4(const Reference<Float4> &rhs) : FloatXYZW(this)
6650         {
6651                 Value *value = rhs.loadValue();
6652                 storeValue(value);
6653         }
6654
6655         Float4::Float4(RValue<Float> rhs) : FloatXYZW(this)
6656         {
6657                 Value *vector = Nucleus::createBitCast(rhs.value, Float4::getType());
6658
6659                 int swizzle[4] = {0, 0, 0, 0};
6660                 Value *replicate = Nucleus::createShuffleVector(vector, vector, swizzle);
6661
6662                 storeValue(replicate);
6663         }
6664
6665         Float4::Float4(const Float &rhs) : FloatXYZW(this)
6666         {
6667                 *this = RValue<Float>(rhs.loadValue());
6668         }
6669
6670         Float4::Float4(const Reference<Float> &rhs) : FloatXYZW(this)
6671         {
6672                 *this = RValue<Float>(rhs.loadValue());
6673         }
6674
6675         RValue<Float4> Float4::operator=(float x)
6676         {
6677                 return *this = Float4(x, x, x, x);
6678         }
6679
6680         RValue<Float4> Float4::operator=(RValue<Float4> rhs)
6681         {
6682                 storeValue(rhs.value);
6683
6684                 return rhs;
6685         }
6686
6687         RValue<Float4> Float4::operator=(const Float4 &rhs)
6688         {
6689                 Value *value = rhs.loadValue();
6690                 storeValue(value);
6691
6692                 return RValue<Float4>(value);
6693         }
6694
6695         RValue<Float4> Float4::operator=(const Reference<Float4> &rhs)
6696         {
6697                 Value *value = rhs.loadValue();
6698                 storeValue(value);
6699
6700                 return RValue<Float4>(value);
6701         }
6702
6703         RValue<Float4> Float4::operator=(RValue<Float> rhs)
6704         {
6705                 return *this = Float4(rhs);
6706         }
6707
6708         RValue<Float4> Float4::operator=(const Float &rhs)
6709         {
6710                 return *this = Float4(rhs);
6711         }
6712
6713         RValue<Float4> Float4::operator=(const Reference<Float> &rhs)
6714         {
6715                 return *this = Float4(rhs);
6716         }
6717
6718         RValue<Float4> operator+(RValue<Float4> lhs, RValue<Float4> rhs)
6719         {
6720                 return RValue<Float4>(Nucleus::createFAdd(lhs.value, rhs.value));
6721         }
6722
6723         RValue<Float4> operator-(RValue<Float4> lhs, RValue<Float4> rhs)
6724         {
6725                 return RValue<Float4>(Nucleus::createFSub(lhs.value, rhs.value));
6726         }
6727
6728         RValue<Float4> operator*(RValue<Float4> lhs, RValue<Float4> rhs)
6729         {
6730                 return RValue<Float4>(Nucleus::createFMul(lhs.value, rhs.value));
6731         }
6732
6733         RValue<Float4> operator/(RValue<Float4> lhs, RValue<Float4> rhs)
6734         {
6735                 return RValue<Float4>(Nucleus::createFDiv(lhs.value, rhs.value));
6736         }
6737
6738         RValue<Float4> operator%(RValue<Float4> lhs, RValue<Float4> rhs)
6739         {
6740                 return RValue<Float4>(Nucleus::createFRem(lhs.value, rhs.value));
6741         }
6742
6743         RValue<Float4> operator+=(Float4 &lhs, RValue<Float4> rhs)
6744         {
6745                 return lhs = lhs + rhs;
6746         }
6747
6748         RValue<Float4> operator-=(Float4 &lhs, RValue<Float4> rhs)
6749         {
6750                 return lhs = lhs - rhs;
6751         }
6752
6753         RValue<Float4> operator*=(Float4 &lhs, RValue<Float4> rhs)
6754         {
6755                 return lhs = lhs * rhs;
6756         }
6757
6758         RValue<Float4> operator/=(Float4 &lhs, RValue<Float4> rhs)
6759         {
6760                 return lhs = lhs / rhs;
6761         }
6762
6763         RValue<Float4> operator%=(Float4 &lhs, RValue<Float4> rhs)
6764         {
6765                 return lhs = lhs % rhs;
6766         }
6767
6768         RValue<Float4> operator+(RValue<Float4> val)
6769         {
6770                 return val;
6771         }
6772
6773         RValue<Float4> operator-(RValue<Float4> val)
6774         {
6775                 return RValue<Float4>(Nucleus::createFNeg(val.value));
6776         }
6777
6778         RValue<Float4> Abs(RValue<Float4> x)
6779         {
6780                 Value *vector = Nucleus::createBitCast(x.value, Int4::getType());
6781                 int64_t constantVector[4] = {0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF};
6782                 Value *result = Nucleus::createAnd(vector, V(Nucleus::createConstantVector(constantVector, Int4::getType())));
6783
6784                 return As<Float4>(result);
6785         }
6786
6787         RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
6788         {
6789                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
6790                 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Ogt, condition, x.value, y.value);
6791                 ::basicBlock->appendInst(cmp);
6792
6793                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
6794                 auto select = Ice::InstSelect::create(::function, result, condition, x.value, y.value);
6795                 ::basicBlock->appendInst(select);
6796
6797                 return RValue<Float4>(V(result));
6798         }
6799
6800         RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
6801         {
6802                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
6803                 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Olt, condition, x.value, y.value);
6804                 ::basicBlock->appendInst(cmp);
6805
6806                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
6807                 auto select = Ice::InstSelect::create(::function, result, condition, x.value, y.value);
6808                 ::basicBlock->appendInst(select);
6809
6810                 return RValue<Float4>(V(result));
6811         }
6812
6813         RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
6814         {
6815                 return Float4(1.0f) / x;
6816         }
6817
6818         RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
6819         {
6820                 return Rcp_pp(Sqrt(x));
6821         }
6822
6823         RValue<Float4> Sqrt(RValue<Float4> x)
6824         {
6825                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
6826                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
6827                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
6828                 auto sqrt = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
6829                 sqrt->addArg(x.value);
6830                 ::basicBlock->appendInst(sqrt);
6831
6832                 return RValue<Float4>(V(result));
6833         }
6834
6835         RValue<Float4> Insert(RValue<Float4> x, RValue<Float> element, int i)
6836         {
6837                 return RValue<Float4>(Nucleus::createInsertElement(x.value, element.value, i));
6838         }
6839
6840         RValue<Float> Extract(RValue<Float4> x, int i)
6841         {
6842                 return RValue<Float>(Nucleus::createExtractElement(x.value, Float::getType(), i));
6843         }
6844
6845         RValue<Float4> Swizzle(RValue<Float4> x, unsigned char select)
6846         {
6847                 return RValue<Float4>(createSwizzle4(x.value, select));
6848         }
6849
6850         RValue<Float4> ShuffleLowHigh(RValue<Float4> x, RValue<Float4> y, unsigned char imm)
6851         {
6852                 int shuffle[4] =
6853                 {
6854                         ((imm >> 0) & 0x03) + 0,
6855                         ((imm >> 2) & 0x03) + 0,
6856                         ((imm >> 4) & 0x03) + 4,
6857                         ((imm >> 6) & 0x03) + 4,
6858                 };
6859
6860                 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
6861         }
6862
6863         RValue<Float4> UnpackLow(RValue<Float4> x, RValue<Float4> y)
6864         {
6865                 int shuffle[4] = {0, 4, 1, 5};
6866                 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
6867         }
6868
6869         RValue<Float4> UnpackHigh(RValue<Float4> x, RValue<Float4> y)
6870         {
6871                 int shuffle[4] = {2, 6, 3, 7};
6872                 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
6873         }
6874
6875         RValue<Float4> Mask(Float4 &lhs, RValue<Float4> rhs, unsigned char select)
6876         {
6877                 Value *vector = lhs.loadValue();
6878                 Value *result = createMask4(vector, rhs.value, select);
6879                 lhs.storeValue(result);
6880
6881                 return RValue<Float4>(result);
6882         }
6883
6884         RValue<Int> SignMask(RValue<Float4> x)
6885         {
6886                 if(emulateIntrinsics)
6887                 {
6888                         Int4 xx = (As<Int4>(x) >> 31) & Int4(0x00000001, 0x00000002, 0x00000004, 0x00000008);
6889                         return Extract(xx, 0) | Extract(xx, 1) | Extract(xx, 2) | Extract(xx, 3);
6890                 }
6891                 else
6892                 {
6893                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
6894                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
6895                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
6896                         auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
6897                         movmsk->addArg(x.value);
6898                         ::basicBlock->appendInst(movmsk);
6899
6900                         return RValue<Int>(V(result));
6901                 }
6902         }
6903
6904         RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
6905         {
6906                 return RValue<Int4>(Nucleus::createFCmpOEQ(x.value, y.value));
6907         }
6908
6909         RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
6910         {
6911                 return RValue<Int4>(Nucleus::createFCmpOLT(x.value, y.value));
6912         }
6913
6914         RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
6915         {
6916                 return RValue<Int4>(Nucleus::createFCmpOLE(x.value, y.value));
6917         }
6918
6919         RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
6920         {
6921                 return RValue<Int4>(Nucleus::createFCmpONE(x.value, y.value));
6922         }
6923
6924         RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
6925         {
6926                 return RValue<Int4>(Nucleus::createFCmpOGE(x.value, y.value));
6927         }
6928
6929         RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
6930         {
6931                 return RValue<Int4>(Nucleus::createFCmpOGT(x.value, y.value));
6932         }
6933
6934         RValue<Float4> Round(RValue<Float4> x)
6935         {
6936                 if(emulateIntrinsics)
6937                 {
6938                         // Push the fractional part off the mantissa. Accurate up to +/-2^22.
6939                         return (x + Float4(0x00C00000)) - Float4(0x00C00000);
6940                 }
6941                 else if(CPUID::SSE4_1)
6942                 {
6943                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
6944                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
6945                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
6946                         auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
6947                         round->addArg(x.value);
6948                         round->addArg(::context->getConstantInt32(0));
6949                         ::basicBlock->appendInst(round);
6950
6951                         return RValue<Float4>(V(result));
6952                 }
6953                 else
6954                 {
6955                         return Float4(RoundInt(x));
6956                 }
6957         }
6958
6959         RValue<Float4> Trunc(RValue<Float4> x)
6960         {
6961                 if(CPUID::SSE4_1)
6962                 {
6963                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
6964                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
6965                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
6966                         auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
6967                         round->addArg(x.value);
6968                         round->addArg(::context->getConstantInt32(3));
6969                         ::basicBlock->appendInst(round);
6970
6971                         return RValue<Float4>(V(result));
6972                 }
6973                 else
6974                 {
6975                         return Float4(Int4(x));
6976                 }
6977         }
6978
6979         RValue<Float4> Frac(RValue<Float4> x)
6980         {
6981                 Float4 frc;
6982
6983                 if(CPUID::SSE4_1)
6984                 {
6985                         frc = x - Floor(x);
6986                 }
6987                 else
6988                 {
6989                         frc = x - Float4(Int4(x));   // Signed fractional part.
6990
6991                         frc += As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1, 1, 1, 1)));   // Add 1.0 if negative.
6992                 }
6993
6994                 // x - floor(x) can be 1.0 for very small negative x.
6995                 // Clamp against the value just below 1.0.
6996                 return Min(frc, As<Float4>(Int4(0x3F7FFFFF)));
6997         }
6998
6999         RValue<Float4> Floor(RValue<Float4> x)
7000         {
7001                 if(CPUID::SSE4_1)
7002                 {
7003                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
7004                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
7005                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
7006                         auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
7007                         round->addArg(x.value);
7008                         round->addArg(::context->getConstantInt32(1));
7009                         ::basicBlock->appendInst(round);
7010
7011                         return RValue<Float4>(V(result));
7012                 }
7013                 else
7014                 {
7015                         return x - Frac(x);
7016                 }
7017         }
7018
7019         RValue<Float4> Ceil(RValue<Float4> x)
7020         {
7021                 if(CPUID::SSE4_1)
7022                 {
7023                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
7024                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
7025                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
7026                         auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
7027                         round->addArg(x.value);
7028                         round->addArg(::context->getConstantInt32(2));
7029                         ::basicBlock->appendInst(round);
7030
7031                         return RValue<Float4>(V(result));
7032                 }
7033                 else
7034                 {
7035                         return -Floor(-x);
7036                 }
7037         }
7038
7039         Type *Float4::getType()
7040         {
7041                 return T(Ice::IceType_v4f32);
7042         }
7043
7044         RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, int offset)
7045         {
7046                 return lhs + RValue<Int>(Nucleus::createConstantInt(offset));
7047         }
7048
7049         RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<Int> offset)
7050         {
7051                 return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), offset.value, false));
7052         }
7053
7054         RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<UInt> offset)
7055         {
7056                 return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), offset.value, true));
7057         }
7058
7059         RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, int offset)
7060         {
7061                 return lhs = lhs + offset;
7062         }
7063
7064         RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, RValue<Int> offset)
7065         {
7066                 return lhs = lhs + offset;
7067         }
7068
7069         RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, RValue<UInt> offset)
7070         {
7071                 return lhs = lhs + offset;
7072         }
7073
7074         RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, int offset)
7075         {
7076                 return lhs + -offset;
7077         }
7078
7079         RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, RValue<Int> offset)
7080         {
7081                 return lhs + -offset;
7082         }
7083
7084         RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, RValue<UInt> offset)
7085         {
7086                 return lhs + -offset;
7087         }
7088
7089         RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, int offset)
7090         {
7091                 return lhs = lhs - offset;
7092         }
7093
7094         RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, RValue<Int> offset)
7095         {
7096                 return lhs = lhs - offset;
7097         }
7098
7099         RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, RValue<UInt> offset)
7100         {
7101                 return lhs = lhs - offset;
7102         }
7103
7104         void Return()
7105         {
7106                 Nucleus::createRetVoid();
7107                 Nucleus::setInsertBlock(Nucleus::createBasicBlock());
7108                 Nucleus::createUnreachable();
7109         }
7110
7111         void Return(RValue<Int> ret)
7112         {
7113                 Nucleus::createRet(ret.value);
7114                 Nucleus::setInsertBlock(Nucleus::createBasicBlock());
7115                 Nucleus::createUnreachable();
7116         }
7117
7118         void branch(RValue<Bool> cmp, BasicBlock *bodyBB, BasicBlock *endBB)
7119         {
7120                 Nucleus::createCondBr(cmp.value, bodyBB, endBB);
7121                 Nucleus::setInsertBlock(bodyBB);
7122         }
7123
7124         RValue<Long> Ticks()
7125         {
7126                 assert(false && "UNIMPLEMENTED"); return RValue<Long>(V(nullptr));
7127         }
7128 }