OSDN Git Service

Detect SSE4.1 support for Subzero.
[android-x86/external-swiftshader.git] / src / Reactor / SubzeroReactor.cpp
1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "Nucleus.hpp"
16
17 #include "Reactor.hpp"
18 #include "Routine.hpp"
19
20 #include "Optimizer.hpp"
21
22 #include "src/IceTypes.h"
23 #include "src/IceCfg.h"
24 #include "src/IceELFStreamer.h"
25 #include "src/IceGlobalContext.h"
26 #include "src/IceCfgNode.h"
27 #include "src/IceELFObjectWriter.h"
28 #include "src/IceGlobalInits.h"
29
30 #include "llvm/Support/FileSystem.h"
31 #include "llvm/Support/raw_os_ostream.h"
32
33 #if defined(_WIN32)
34 #define WIN32_LEAN_AND_MEAN
35 #define NOMINMAX
36 #include <Windows.h>
37 #else
38 #include <sys/mman.h>
39 #endif
40
41 #include <mutex>
42 #include <limits>
43 #include <iostream>
44 #include <cassert>
45
46 namespace
47 {
48         Ice::GlobalContext *context = nullptr;
49         Ice::Cfg *function = nullptr;
50         Ice::CfgNode *basicBlock = nullptr;
51         Ice::CfgLocalAllocatorScope *allocator = nullptr;
52         sw::Routine *routine = nullptr;
53
54         std::mutex codegenMutex;
55
56         Ice::ELFFileStreamer *elfFile = nullptr;
57         Ice::Fdstream *out = nullptr;
58 }
59
60 namespace
61 {
62         class CPUID
63         {
64         public:
65                 const static bool SSE4_1;
66
67         private:
68                 static void cpuid(int registers[4], int info)
69                 {
70                         #if defined(_WIN32)
71                                 __cpuid(registers, info);
72                         #else
73                                 __asm volatile("cpuid": "=a" (registers[0]), "=b" (registers[1]), "=c" (registers[2]), "=d" (registers[3]): "a" (info));
74                         #endif
75                 }
76
77                 static bool detectSSE4_1()
78                 {
79                         int registers[4];
80                         cpuid(registers, 1);
81                         return (registers[2] & 0x00080000) != 0;
82                 }
83         };
84
85         const bool CPUID::SSE4_1 = CPUID::detectSSE4_1();
86 }
87
88 namespace sw
89 {
90         enum EmulatedType
91         {
92                 EmulatedShift = 16,
93                 EmulatedV2 = 2 << EmulatedShift,
94                 EmulatedV4 = 4 << EmulatedShift,
95                 EmulatedV8 = 8 << EmulatedShift,
96                 EmulatedBits = EmulatedV2 | EmulatedV4 | EmulatedV8,
97
98                 Type_v2i32 = Ice::IceType_v4i32 | EmulatedV2,
99                 Type_v4i16 = Ice::IceType_v8i16 | EmulatedV4,
100                 Type_v2i16 = Ice::IceType_v8i16 | EmulatedV2,
101                 Type_v8i8 =  Ice::IceType_v16i8 | EmulatedV8,
102                 Type_v4i8 =  Ice::IceType_v16i8 | EmulatedV4,
103                 Type_v2f32 = Ice::IceType_v4f32 | EmulatedV2,
104         };
105
106         class Value : public Ice::Operand {};
107         class SwitchCases : public Ice::InstSwitch {};
108         class BasicBlock : public Ice::CfgNode {};
109
110         Ice::Type T(Type *t)
111         {
112                 static_assert(Ice::IceType_NUM < EmulatedBits, "Ice::Type overlaps with our emulated types!");
113                 return (Ice::Type)(reinterpret_cast<std::intptr_t>(t) & ~EmulatedBits);
114         }
115
116         Type *T(Ice::Type t)
117         {
118                 return reinterpret_cast<Type*>(t);
119         }
120
121         Type *T(EmulatedType t)
122         {
123                 return reinterpret_cast<Type*>(t);
124         }
125
126         Value *V(Ice::Operand *v)
127         {
128                 return reinterpret_cast<Value*>(v);
129         }
130
131         BasicBlock *B(Ice::CfgNode *b)
132         {
133                 return reinterpret_cast<BasicBlock*>(b);
134         }
135
136         Optimization optimization[10] = {InstructionCombining, Disabled};
137
138         using ElfHeader = std::conditional<sizeof(void*) == 8, Elf64_Ehdr, Elf32_Ehdr>::type;
139         using SectionHeader = std::conditional<sizeof(void*) == 8, Elf64_Shdr, Elf32_Shdr>::type;
140
141         inline const SectionHeader *sectionHeader(const ElfHeader *elfHeader)
142         {
143                 return reinterpret_cast<const SectionHeader*>((intptr_t)elfHeader + elfHeader->e_shoff);
144         }
145
146         inline const SectionHeader *elfSection(const ElfHeader *elfHeader, int index)
147         {
148                 return &sectionHeader(elfHeader)[index];
149         }
150
151         static void *relocateSymbol(const ElfHeader *elfHeader, const Elf32_Rel &relocation, const SectionHeader &relocationTable)
152         {
153                 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
154
155                 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
156                 int32_t *patchSite = (int*)(address + relocation.r_offset);
157                 uint32_t index = relocation.getSymbol();
158                 int table = relocationTable.sh_link;
159                 void *symbolValue = nullptr;
160
161                 if(index != SHN_UNDEF)
162                 {
163                         if(table == SHN_UNDEF) return nullptr;
164                         const SectionHeader *symbolTable = elfSection(elfHeader, table);
165
166                         uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
167                         if(index >= symtab_entries)
168                         {
169                                 assert(index < symtab_entries && "Symbol Index out of range");
170                                 return nullptr;
171                         }
172
173                         intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
174                         Elf32_Sym &symbol = ((Elf32_Sym*)symbolAddress)[index];
175                         uint16_t section = symbol.st_shndx;
176
177                         if(section != SHN_UNDEF && section < SHN_LORESERVE)
178                         {
179                                 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
180                                 symbolValue = reinterpret_cast<void*>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
181                         }
182                         else
183                         {
184                                 return nullptr;
185                         }
186                 }
187
188                 switch(relocation.getType())
189                 {
190                 case R_386_NONE:
191                         // No relocation
192                         break;
193                 case R_386_32:
194                         *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite);
195                         break;
196         //      case R_386_PC32:
197         //              *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite - (intptr_t)patchSite);
198         //              break;
199                 default:
200                         assert(false && "Unsupported relocation type");
201                         return nullptr;
202                 }
203
204                 return symbolValue;
205         }
206
207         static void *relocateSymbol(const ElfHeader *elfHeader, const Elf64_Rela &relocation, const SectionHeader &relocationTable)
208         {
209                 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
210
211                 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
212                 int32_t *patchSite = (int*)(address + relocation.r_offset);
213                 uint32_t index = relocation.getSymbol();
214                 int table = relocationTable.sh_link;
215                 void *symbolValue = nullptr;
216
217                 if(index != SHN_UNDEF)
218                 {
219                         if(table == SHN_UNDEF) return nullptr;
220                         const SectionHeader *symbolTable = elfSection(elfHeader, table);
221
222                         uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
223                         if(index >= symtab_entries)
224                         {
225                                 assert(index < symtab_entries && "Symbol Index out of range");
226                                 return nullptr;
227                         }
228
229                         intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
230                         Elf64_Sym &symbol = ((Elf64_Sym*)symbolAddress)[index];
231                         uint16_t section = symbol.st_shndx;
232
233                         if(section != SHN_UNDEF && section < SHN_LORESERVE)
234                         {
235                                 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
236                                 symbolValue = reinterpret_cast<void*>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
237                         }
238                         else
239                         {
240                                 return nullptr;
241                         }
242                 }
243
244                 switch(relocation.getType())
245                 {
246                 case R_X86_64_NONE:
247                         // No relocation
248                         break;
249                 case R_X86_64_64:
250                         *(int64_t*)patchSite = (int64_t)((intptr_t)symbolValue + *(int64_t*)patchSite) + relocation.r_addend;
251                         break;
252                 case R_X86_64_PC32:
253                         *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite - (intptr_t)patchSite) + relocation.r_addend;
254                         break;
255                 case R_X86_64_32S:
256                         *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite) + relocation.r_addend;
257                         break;
258                 default:
259                         assert(false && "Unsupported relocation type");
260                         return nullptr;
261                 }
262
263                 return symbolValue;
264         }
265
266         void *loadImage(uint8_t *const elfImage)
267         {
268                 ElfHeader *elfHeader = (ElfHeader*)elfImage;
269
270                 if(!elfHeader->checkMagic())
271                 {
272                         return nullptr;
273                 }
274
275                 // Expect ELF bitness to match platform
276                 assert(sizeof(void*) == 8 ? elfHeader->getFileClass() == ELFCLASS64 : elfHeader->getFileClass() == ELFCLASS32);
277                 assert(sizeof(void*) == 8 ? elfHeader->e_machine == EM_X86_64 : elfHeader->e_machine == EM_386);
278
279                 SectionHeader *sectionHeader = (SectionHeader*)(elfImage + elfHeader->e_shoff);
280                 void *entry = nullptr;
281
282                 for(int i = 0; i < elfHeader->e_shnum; i++)
283                 {
284                         if(sectionHeader[i].sh_type == SHT_PROGBITS)
285                         {
286                                 if(sectionHeader[i].sh_flags & SHF_EXECINSTR)
287                                 {
288                                         entry = elfImage + sectionHeader[i].sh_offset;
289                                 }
290                         }
291                         else if(sectionHeader[i].sh_type == SHT_REL)
292                         {
293                                 assert(sizeof(void*) == 4 && "UNIMPLEMENTED");   // Only expected/implemented for 32-bit code
294
295                                 for(int index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
296                                 {
297                                         const Elf32_Rel &relocation = ((const Elf32_Rel*)(elfImage + sectionHeader[i].sh_offset))[index];
298                                         void *symbol = relocateSymbol(elfHeader, relocation, sectionHeader[i]);
299                                 }
300                         }
301                         else if(sectionHeader[i].sh_type == SHT_RELA)
302                         {
303                                 assert(sizeof(void*) == 8 && "UNIMPLEMENTED");   // Only expected/implemented for 64-bit code
304
305                                 for(int index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
306                                 {
307                                         const Elf64_Rela &relocation = ((const Elf64_Rela*)(elfImage + sectionHeader[i].sh_offset))[index];
308                                         void *symbol = relocateSymbol(elfHeader, relocation, sectionHeader[i]);
309                                 }
310                         }
311                 }
312
313                 return entry;
314         }
315
316         template<typename T>
317         struct ExecutableAllocator
318         {
319                 ExecutableAllocator() {};
320                 template<class U> ExecutableAllocator(const ExecutableAllocator<U> &other) {};
321
322                 using value_type = T;
323                 using size_type = std::size_t;
324
325                 T *allocate(size_type n)
326                 {
327                         #if defined(_WIN32)
328                                 return (T*)VirtualAlloc(NULL, sizeof(T) * n, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
329                         #else
330                                 return (T*)mmap(nullptr, sizeof(T) * n, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
331                         #endif
332                 }
333
334                 void deallocate(T *p, size_type n)
335                 {
336                         #if defined(_WIN32)
337                                 VirtualFree(p, 0, MEM_RELEASE);
338                         #else
339                                 munmap(p, sizeof(T) * n);
340                         #endif
341                 }
342         };
343
344         class ELFMemoryStreamer : public Ice::ELFStreamer, public Routine
345         {
346                 ELFMemoryStreamer(const ELFMemoryStreamer &) = delete;
347                 ELFMemoryStreamer &operator=(const ELFMemoryStreamer &) = delete;
348
349         public:
350                 ELFMemoryStreamer() : Routine(), entry(nullptr)
351                 {
352                         position = 0;
353                         buffer.reserve(0x1000);
354                 }
355
356                 virtual ~ELFMemoryStreamer()
357                 {
358                         #if defined(_WIN32)
359                                 if(buffer.size() != 0)
360                                 {
361                                         DWORD exeProtection;
362                                         VirtualProtect(&buffer[0], buffer.size(), oldProtection, &exeProtection);
363                                 }
364                         #endif
365                 }
366
367                 void write8(uint8_t Value) override
368                 {
369                         if(position == (uint64_t)buffer.size())
370                         {
371                                 buffer.push_back(Value);
372                                 position++;
373                         }
374                         else if(position < (uint64_t)buffer.size())
375                         {
376                                 buffer[position] = Value;
377                                 position++;
378                         }
379                         else assert(false && "UNIMPLEMENTED");
380                 }
381
382                 void writeBytes(llvm::StringRef Bytes) override
383                 {
384                         std::size_t oldSize = buffer.size();
385                         buffer.resize(oldSize + Bytes.size());
386                         memcpy(&buffer[oldSize], Bytes.begin(), Bytes.size());
387                         position += Bytes.size();
388                 }
389
390                 uint64_t tell() const override { return position; }
391
392                 void seek(uint64_t Off) override { position = Off; }
393
394                 const void *getEntry() override
395                 {
396                         if(!entry)
397                         {
398                                 #if defined(_WIN32)
399                                         VirtualProtect(&buffer[0], buffer.size(), PAGE_EXECUTE_READWRITE, &oldProtection);
400                                 #else
401                                         mprotect(&buffer[0], buffer.size(), PROT_READ | PROT_WRITE | PROT_EXEC);
402                                 #endif
403
404                                 position = std::numeric_limits<std::size_t>::max();   // Can't stream more data after this
405
406                                 entry = loadImage(&buffer[0]);
407                         }
408
409                         return entry;
410                 }
411
412         private:
413                 void *entry;
414                 std::vector<uint8_t, ExecutableAllocator<uint8_t>> buffer;
415                 std::size_t position;
416
417                 #if defined(_WIN32)
418                 DWORD oldProtection;
419                 #endif
420         };
421
422         Nucleus::Nucleus()
423         {
424                 ::codegenMutex.lock();   // Reactor is currently not thread safe
425
426                 Ice::ClFlags &Flags = Ice::ClFlags::Flags;
427                 Ice::ClFlags::getParsedClFlags(Flags);
428
429                 Flags.setTargetArch(sizeof(void*) == 8 ? Ice::Target_X8664 : Ice::Target_X8632);
430                 Flags.setOutFileType(Ice::FT_Elf);
431                 Flags.setOptLevel(Ice::Opt_2);
432                 Flags.setApplicationBinaryInterface(Ice::ABI_Platform);
433                 Flags.setTargetInstructionSet(CPUID::SSE4_1 ? Ice::X86InstructionSet_SSE4_1 : Ice::X86InstructionSet_SSE2);
434                 Flags.setVerbose(false ? Ice::IceV_All : Ice::IceV_None);
435
436                 static llvm::raw_os_ostream cout(std::cout);
437                 static llvm::raw_os_ostream cerr(std::cerr);
438
439                 if(false)   // Write out to a file
440                 {
441                         std::error_code errorCode;
442                         ::out = new Ice::Fdstream("out.o", errorCode, llvm::sys::fs::F_None);
443                         ::elfFile = new Ice::ELFFileStreamer(*out);
444                         ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfFile);
445                 }
446                 else
447                 {
448                         ELFMemoryStreamer *elfMemory = new ELFMemoryStreamer();
449                         ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfMemory);
450                         ::routine = elfMemory;
451                 }
452         }
453
454         Nucleus::~Nucleus()
455         {
456                 delete ::allocator;
457                 delete ::function;
458                 delete ::context;
459
460                 delete ::elfFile;
461                 delete ::out;
462
463                 ::codegenMutex.unlock();
464         }
465
466         Routine *Nucleus::acquireRoutine(const wchar_t *name, bool runOptimizations)
467         {
468                 if(basicBlock->getInsts().empty() || basicBlock->getInsts().back().getKind() != Ice::Inst::Ret)
469                 {
470                         createRetVoid();
471                 }
472
473                 std::wstring wideName(name);
474                 std::string asciiName(wideName.begin(), wideName.end());
475                 ::function->setFunctionName(Ice::GlobalString::createWithString(::context, asciiName));
476
477                 optimize();
478
479                 ::function->translate();
480                 assert(!::function->hasError());
481
482                 auto *globals = ::function->getGlobalInits().release();
483
484                 if(globals && !globals->empty())
485                 {
486                         ::context->getGlobals()->merge(globals);
487                 }
488
489                 ::context->emitFileHeader();
490                 ::function->emitIAS();
491                 auto assembler = ::function->releaseAssembler();
492                 auto objectWriter = ::context->getObjectWriter();
493                 assembler->alignFunction();
494                 objectWriter->writeFunctionCode(::function->getFunctionName(), false, assembler.get());
495                 ::context->lowerGlobals("last");
496                 ::context->lowerConstants();
497                 ::context->lowerJumpTables();
498                 objectWriter->setUndefinedSyms(::context->getConstantExternSyms());
499                 objectWriter->writeNonUserSections();
500
501                 return ::routine;
502         }
503
504         void Nucleus::optimize()
505         {
506                 sw::optimize(::function);
507         }
508
509         Value *Nucleus::allocateStackVariable(Type *t, int arraySize)
510         {
511                 Ice::Type type = T(t);
512                 int typeSize = Ice::typeWidthInBytes(type);
513                 int totalSize = typeSize * (arraySize ? arraySize : 1);
514
515                 auto bytes = Ice::ConstantInteger32::create(::context, type, totalSize);
516                 auto address = ::function->makeVariable(T(getPointerType(t)));
517                 auto alloca = Ice::InstAlloca::create(::function, address, bytes, typeSize);
518                 ::function->getEntryNode()->getInsts().push_front(alloca);
519
520                 return V(address);
521         }
522
523         BasicBlock *Nucleus::createBasicBlock()
524         {
525                 return B(::function->makeNode());
526         }
527
528         BasicBlock *Nucleus::getInsertBlock()
529         {
530                 return B(::basicBlock);
531         }
532
533         void Nucleus::setInsertBlock(BasicBlock *basicBlock)
534         {
535         //      assert(::basicBlock->getInsts().back().getTerminatorEdges().size() >= 0 && "Previous basic block must have a terminator");
536                 ::basicBlock = basicBlock;
537         }
538
539         void Nucleus::createFunction(Type *ReturnType, std::vector<Type*> &Params)
540         {
541                 uint32_t sequenceNumber = 0;
542                 ::function = Ice::Cfg::create(::context, sequenceNumber).release();
543                 ::allocator = new Ice::CfgLocalAllocatorScope(::function);
544
545                 for(Type *type : Params)
546                 {
547                         Ice::Variable *arg = ::function->makeVariable(T(type));
548                         ::function->addArg(arg);
549                 }
550
551                 Ice::CfgNode *node = ::function->makeNode();
552                 ::function->setEntryNode(node);
553                 ::basicBlock = node;
554         }
555
556         Value *Nucleus::getArgument(unsigned int index)
557         {
558                 return V(::function->getArgs()[index]);
559         }
560
561         void Nucleus::createRetVoid()
562         {
563                 Ice::InstRet *ret = Ice::InstRet::create(::function);
564                 ::basicBlock->appendInst(ret);
565         }
566
567         void Nucleus::createRet(Value *v)
568         {
569                 Ice::InstRet *ret = Ice::InstRet::create(::function, v);
570                 ::basicBlock->appendInst(ret);
571         }
572
573         void Nucleus::createBr(BasicBlock *dest)
574         {
575                 auto br = Ice::InstBr::create(::function, dest);
576                 ::basicBlock->appendInst(br);
577         }
578
579         void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
580         {
581                 auto br = Ice::InstBr::create(::function, cond, ifTrue, ifFalse);
582                 ::basicBlock->appendInst(br);
583         }
584
585         static Value *createArithmetic(Ice::InstArithmetic::OpKind op, Value *lhs, Value *rhs)
586         {
587                 assert(lhs->getType() == rhs->getType() || (llvm::isa<Ice::Constant>(rhs) && (op == Ice::InstArithmetic::Shl || Ice::InstArithmetic::Lshr || Ice::InstArithmetic::Ashr)));
588
589                 Ice::Variable *result = ::function->makeVariable(lhs->getType());
590                 Ice::InstArithmetic *arithmetic = Ice::InstArithmetic::create(::function, op, result, lhs, rhs);
591                 ::basicBlock->appendInst(arithmetic);
592
593                 return V(result);
594         }
595
596         Value *Nucleus::createAdd(Value *lhs, Value *rhs)
597         {
598                 return createArithmetic(Ice::InstArithmetic::Add, lhs, rhs);
599         }
600
601         Value *Nucleus::createSub(Value *lhs, Value *rhs)
602         {
603                 return createArithmetic(Ice::InstArithmetic::Sub, lhs, rhs);
604         }
605
606         Value *Nucleus::createMul(Value *lhs, Value *rhs)
607         {
608                 return createArithmetic(Ice::InstArithmetic::Mul, lhs, rhs);
609         }
610
611         Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
612         {
613                 return createArithmetic(Ice::InstArithmetic::Udiv, lhs, rhs);
614         }
615
616         Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
617         {
618                 return createArithmetic(Ice::InstArithmetic::Sdiv, lhs, rhs);
619         }
620
621         Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
622         {
623                 return createArithmetic(Ice::InstArithmetic::Fadd, lhs, rhs);
624         }
625
626         Value *Nucleus::createFSub(Value *lhs, Value *rhs)
627         {
628                 return createArithmetic(Ice::InstArithmetic::Fsub, lhs, rhs);
629         }
630
631         Value *Nucleus::createFMul(Value *lhs, Value *rhs)
632         {
633                 return createArithmetic(Ice::InstArithmetic::Fmul, lhs, rhs);
634         }
635
636         Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
637         {
638                 return createArithmetic(Ice::InstArithmetic::Fdiv, lhs, rhs);
639         }
640
641         Value *Nucleus::createURem(Value *lhs, Value *rhs)
642         {
643                 return createArithmetic(Ice::InstArithmetic::Urem, lhs, rhs);
644         }
645
646         Value *Nucleus::createSRem(Value *lhs, Value *rhs)
647         {
648                 return createArithmetic(Ice::InstArithmetic::Srem, lhs, rhs);
649         }
650
651         Value *Nucleus::createFRem(Value *lhs, Value *rhs)
652         {
653                 return createArithmetic(Ice::InstArithmetic::Frem, lhs, rhs);
654         }
655
656         Value *Nucleus::createShl(Value *lhs, Value *rhs)
657         {
658                 return createArithmetic(Ice::InstArithmetic::Shl, lhs, rhs);
659         }
660
661         Value *Nucleus::createLShr(Value *lhs, Value *rhs)
662         {
663                 return createArithmetic(Ice::InstArithmetic::Lshr, lhs, rhs);
664         }
665
666         Value *Nucleus::createAShr(Value *lhs, Value *rhs)
667         {
668                 return createArithmetic(Ice::InstArithmetic::Ashr, lhs, rhs);
669         }
670
671         Value *Nucleus::createAnd(Value *lhs, Value *rhs)
672         {
673                 return createArithmetic(Ice::InstArithmetic::And, lhs, rhs);
674         }
675
676         Value *Nucleus::createOr(Value *lhs, Value *rhs)
677         {
678                 return createArithmetic(Ice::InstArithmetic::Or, lhs, rhs);
679         }
680
681         Value *Nucleus::createXor(Value *lhs, Value *rhs)
682         {
683                 return createArithmetic(Ice::InstArithmetic::Xor, lhs, rhs);
684         }
685
686         static Ice::Variable *createAssign(Ice::Operand *constant)
687         {
688                 Ice::Variable *value = ::function->makeVariable(constant->getType());
689                 auto assign = Ice::InstAssign::create(::function, value, constant);
690                 ::basicBlock->appendInst(assign);
691
692                 return value;
693         }
694
695         Value *Nucleus::createNeg(Value *v)
696         {
697                 return createSub(createNullValue(T(v->getType())), v);
698         }
699
700         Value *Nucleus::createFNeg(Value *v)
701         {
702                 double c[4] = {-0.0, -0.0, -0.0, -0.0};
703                 Value *negativeZero = Ice::isVectorType(v->getType()) ?
704                                       createConstantVector(c, T(v->getType())) :
705                                       V(::context->getConstantFloat(-0.0f));
706
707                 return createFSub(negativeZero, v);
708         }
709
710         Value *Nucleus::createNot(Value *v)
711         {
712                 if(Ice::isScalarIntegerType(v->getType()))
713                 {
714                         return createXor(v, V(::context->getConstantInt(v->getType(), -1)));
715                 }
716                 else   // Vector
717                 {
718                         int64_t c[4] = {-1, -1, -1, -1};
719                         return createXor(v, createConstantVector(c, T(v->getType())));
720                 }
721         }
722
723         Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int align)
724         {
725                 int valueType = (int)reinterpret_cast<intptr_t>(type);
726                 Ice::Variable *result = ::function->makeVariable(T(type));
727
728                 if(valueType & EmulatedBits)
729                 {
730                         switch(valueType)
731                         {
732                         case Type_v4i8:
733                         case Type_v2i16:
734                                 {
735                                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::LoadSubVector, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
736                                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
737                                         auto load = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
738                                         load->addArg(ptr);
739                                         load->addArg(::context->getConstantInt32(4));
740                                         ::basicBlock->appendInst(load);
741                                 }
742                                 break;
743                         case Type_v2i32:
744                         case Type_v8i8:
745                         case Type_v4i16:
746                         case Type_v2f32:
747                                 {
748                                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::LoadSubVector, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
749                                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
750                                         auto load = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
751                                         load->addArg(ptr);
752                                         load->addArg(::context->getConstantInt32(8));
753                                         ::basicBlock->appendInst(load);
754                                 }
755                                 break;
756                         default: assert(false && "UNIMPLEMENTED");
757                         }
758                 }
759                 else
760                 {
761                         auto load = Ice::InstLoad::create(::function, result, ptr, align);
762                         ::basicBlock->appendInst(load);
763                 }
764
765                 return V(result);
766         }
767
768         Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int align)
769         {
770                 int valueType = (int)reinterpret_cast<intptr_t>(type);
771
772                 if(valueType & EmulatedBits)
773                 {
774                         switch(valueType)
775                         {
776                         case Type_v4i8:
777                         case Type_v2i16:
778                                 {
779                                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::StoreSubVector, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T};
780                                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
781                                         auto store = Ice::InstIntrinsicCall::create(::function, 3, nullptr, target, intrinsic);
782                                         store->addArg(value);
783                                         store->addArg(ptr);
784                                         store->addArg(::context->getConstantInt32(4));
785                                         ::basicBlock->appendInst(store);
786                                 }
787                                 break;
788                         case Type_v2i32:
789                         case Type_v8i8:
790                         case Type_v4i16:
791                         case Type_v2f32:
792                                 {
793                                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::StoreSubVector, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T};
794                                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
795                                         auto store = Ice::InstIntrinsicCall::create(::function, 3, nullptr, target, intrinsic);
796                                         store->addArg(value);
797                                         store->addArg(ptr);
798                                         store->addArg(::context->getConstantInt32(8));
799                                         ::basicBlock->appendInst(store);
800                                 }
801                                 break;
802                         default: assert(false && "UNIMPLEMENTED");
803                         }
804                 }
805                 else
806                 {
807                         assert(T(value->getType()) == type);
808
809                         auto store = Ice::InstStore::create(::function, value, ptr, align);
810                         ::basicBlock->appendInst(store);
811                 }
812
813                 return value;
814         }
815
816         Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index)
817         {
818                 assert(index->getType() == Ice::IceType_i32);
819
820                 if(auto *constant = llvm::dyn_cast<Ice::ConstantInteger32>(index))
821                 {
822                         int32_t offset = constant->getValue() * (int)Ice::typeWidthInBytes(T(type));
823
824                         if(offset == 0)
825                         {
826                                 return ptr;
827                         }
828
829                         return createAdd(ptr, createConstantInt(offset));
830                 }
831
832                 if(!Ice::isByteSizedType(T(type)))
833                 {
834                         index = createMul(index, createConstantInt((int)Ice::typeWidthInBytes(T(type))));
835                 }
836
837                 if(sizeof(void*) == 8)
838                 {
839                         index = createSExt(index, T(Ice::IceType_i64));
840                 }
841
842                 return createAdd(ptr, index);
843         }
844
845         Value *Nucleus::createAtomicAdd(Value *ptr, Value *value)
846         {
847                 assert(false && "UNIMPLEMENTED"); return nullptr;
848         }
849
850         static Value *createCast(Ice::InstCast::OpKind op, Value *v, Type *destType)
851         {
852                 if(v->getType() == T(destType))
853                 {
854                         return v;
855                 }
856
857                 Ice::Variable *result = ::function->makeVariable(T(destType));
858                 Ice::InstCast *cast = Ice::InstCast::create(::function, op, result, v);
859                 ::basicBlock->appendInst(cast);
860
861                 return V(result);
862         }
863
864         Value *Nucleus::createTrunc(Value *v, Type *destType)
865         {
866                 return createCast(Ice::InstCast::Trunc, v, destType);
867         }
868
869         Value *Nucleus::createZExt(Value *v, Type *destType)
870         {
871                 return createCast(Ice::InstCast::Zext, v, destType);
872         }
873
874         Value *Nucleus::createSExt(Value *v, Type *destType)
875         {
876                 return createCast(Ice::InstCast::Sext, v, destType);
877         }
878
879         Value *Nucleus::createFPToSI(Value *v, Type *destType)
880         {
881                 return createCast(Ice::InstCast::Fptosi, v, destType);
882         }
883
884         Value *Nucleus::createSIToFP(Value *v, Type *destType)
885         {
886                 return createCast(Ice::InstCast::Sitofp, v, destType);
887         }
888
889         Value *Nucleus::createFPTrunc(Value *v, Type *destType)
890         {
891                 return createCast(Ice::InstCast::Fptrunc, v, destType);
892         }
893
894         Value *Nucleus::createFPExt(Value *v, Type *destType)
895         {
896                 return createCast(Ice::InstCast::Fpext, v, destType);
897         }
898
899         Value *Nucleus::createBitCast(Value *v, Type *destType)
900         {
901                 return createCast(Ice::InstCast::Bitcast, v, destType);
902         }
903
904         static Value *createIntCompare(Ice::InstIcmp::ICond condition, Value *lhs, Value *rhs)
905         {
906                 assert(lhs->getType() == rhs->getType());
907
908                 auto result = ::function->makeVariable(Ice::isScalarIntegerType(lhs->getType()) ? Ice::IceType_i1 : lhs->getType());
909                 auto cmp = Ice::InstIcmp::create(::function, condition, result, lhs, rhs);
910                 ::basicBlock->appendInst(cmp);
911
912                 return V(result);
913         }
914
915         Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
916         {
917                 return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
918         }
919
920         Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
921         {
922                 return createIntCompare(Ice::InstIcmp::Ne, lhs, rhs);
923         }
924
925         Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
926         {
927                 return createIntCompare(Ice::InstIcmp::Ugt, lhs, rhs);
928         }
929
930         Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
931         {
932                 return createIntCompare(Ice::InstIcmp::Uge, lhs, rhs);
933         }
934
935         Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
936         {
937                 return createIntCompare(Ice::InstIcmp::Ult, lhs, rhs);
938         }
939
940         Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
941         {
942                 return createIntCompare(Ice::InstIcmp::Ule, lhs, rhs);
943         }
944
945         Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
946         {
947                 return createIntCompare(Ice::InstIcmp::Sgt, lhs, rhs);
948         }
949
950         Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
951         {
952                 return createIntCompare(Ice::InstIcmp::Sge, lhs, rhs);
953         }
954
955         Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
956         {
957                 return createIntCompare(Ice::InstIcmp::Slt, lhs, rhs);
958         }
959
960         Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
961         {
962                 return createIntCompare(Ice::InstIcmp::Sle, lhs, rhs);
963         }
964
965         static Value *createFloatCompare(Ice::InstFcmp::FCond condition, Value *lhs, Value *rhs)
966         {
967                 assert(lhs->getType() == rhs->getType());
968                 assert(Ice::isScalarFloatingType(lhs->getType()) || lhs->getType() == Ice::IceType_v4f32);
969
970                 auto result = ::function->makeVariable(Ice::isScalarFloatingType(lhs->getType()) ? Ice::IceType_i1 : Ice::IceType_v4i32);
971                 auto cmp = Ice::InstFcmp::create(::function, condition, result, lhs, rhs);
972                 ::basicBlock->appendInst(cmp);
973
974                 return V(result);
975         }
976
977         Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
978         {
979                 return createFloatCompare(Ice::InstFcmp::Oeq, lhs, rhs);
980         }
981
982         Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
983         {
984                 return createFloatCompare(Ice::InstFcmp::Ogt, lhs, rhs);
985         }
986
987         Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
988         {
989                 return createFloatCompare(Ice::InstFcmp::Oge, lhs, rhs);
990         }
991
992         Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
993         {
994                 return createFloatCompare(Ice::InstFcmp::Olt, lhs, rhs);
995         }
996
997         Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
998         {
999                 return createFloatCompare(Ice::InstFcmp::Ole, lhs, rhs);
1000         }
1001
1002         Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
1003         {
1004                 return createFloatCompare(Ice::InstFcmp::One, lhs, rhs);
1005         }
1006
1007         Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
1008         {
1009                 return createFloatCompare(Ice::InstFcmp::Ord, lhs, rhs);
1010         }
1011
1012         Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
1013         {
1014                 return createFloatCompare(Ice::InstFcmp::Uno, lhs, rhs);
1015         }
1016
1017         Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
1018         {
1019                 return createFloatCompare(Ice::InstFcmp::Ueq, lhs, rhs);
1020         }
1021
1022         Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
1023         {
1024                 return createFloatCompare(Ice::InstFcmp::Ugt, lhs, rhs);
1025         }
1026
1027         Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
1028         {
1029                 return createFloatCompare(Ice::InstFcmp::Uge, lhs, rhs);
1030         }
1031
1032         Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
1033         {
1034                 return createFloatCompare(Ice::InstFcmp::Ult, lhs, rhs);
1035         }
1036
1037         Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
1038         {
1039                 return createFloatCompare(Ice::InstFcmp::Ule, lhs, rhs);
1040         }
1041
1042         Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
1043         {
1044                 return createFloatCompare(Ice::InstFcmp::Une, lhs, rhs);
1045         }
1046
1047         Value *Nucleus::createExtractElement(Value *vector, Type *type, int index)
1048         {
1049                 auto result = ::function->makeVariable(T(type));
1050                 auto extract = Ice::InstExtractElement::create(::function, result, vector, ::context->getConstantInt32(index));
1051                 ::basicBlock->appendInst(extract);
1052
1053                 return V(result);
1054         }
1055
1056         Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
1057         {
1058                 auto result = ::function->makeVariable(vector->getType());
1059                 auto insert = Ice::InstInsertElement::create(::function, result, vector, element, ::context->getConstantInt32(index));
1060                 ::basicBlock->appendInst(insert);
1061
1062                 return V(result);
1063         }
1064
1065         Value *Nucleus::createShuffleVector(Value *V1, Value *V2, const int *select)
1066         {
1067                 assert(V1->getType() == V2->getType());
1068
1069                 int size = Ice::typeNumElements(V1->getType());
1070                 auto result = ::function->makeVariable(V1->getType());
1071                 auto shuffle = Ice::InstShuffleVector::create(::function, result, V1, V2);
1072
1073                 for(int i = 0; i < size; i++)
1074                 {
1075                         shuffle->addIndex(llvm::cast<Ice::ConstantInteger32>(::context->getConstantInt32(select[i])));
1076                 }
1077
1078                 ::basicBlock->appendInst(shuffle);
1079
1080                 return V(result);
1081         }
1082
1083         Value *Nucleus::createSelect(Value *C, Value *ifTrue, Value *ifFalse)
1084         {
1085                 assert(ifTrue->getType() == ifFalse->getType());
1086
1087                 auto result = ::function->makeVariable(ifTrue->getType());
1088                 auto *select = Ice::InstSelect::create(::function, result, C, ifTrue, ifFalse);
1089                 ::basicBlock->appendInst(select);
1090
1091                 return V(result);
1092         }
1093
1094         SwitchCases *Nucleus::createSwitch(Value *control, BasicBlock *defaultBranch, unsigned numCases)
1095         {
1096                 auto switchInst = Ice::InstSwitch::create(::function, numCases, control, defaultBranch);
1097                 ::basicBlock->appendInst(switchInst);
1098
1099                 return reinterpret_cast<SwitchCases*>(switchInst);
1100         }
1101
1102         void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch)
1103         {
1104                 switchCases->addBranch(label, label, branch);
1105         }
1106
1107         void Nucleus::createUnreachable()
1108         {
1109                 Ice::InstUnreachable *unreachable = Ice::InstUnreachable::create(::function);
1110                 ::basicBlock->appendInst(unreachable);
1111         }
1112
1113         static Value *createSwizzle4(Value *val, unsigned char select)
1114         {
1115                 int swizzle[4] =
1116                 {
1117                         (select >> 0) & 0x03,
1118                         (select >> 2) & 0x03,
1119                         (select >> 4) & 0x03,
1120                         (select >> 6) & 0x03,
1121                 };
1122
1123                 return Nucleus::createShuffleVector(val, val, swizzle);
1124         }
1125
1126         static Value *createMask4(Value *lhs, Value *rhs, unsigned char select)
1127         {
1128                 int64_t mask[4] = {0, 0, 0, 0};
1129
1130                 mask[(select >> 0) & 0x03] = -1;
1131                 mask[(select >> 2) & 0x03] = -1;
1132                 mask[(select >> 4) & 0x03] = -1;
1133                 mask[(select >> 6) & 0x03] = -1;
1134
1135                 Value *condition = Nucleus::createConstantVector(mask, T(Ice::IceType_v4i1));
1136                 Value *result = Nucleus::createSelect(condition, rhs, lhs);
1137
1138                 return result;
1139         }
1140
1141         Type *Nucleus::getPointerType(Type *ElementType)
1142         {
1143                 if(sizeof(void*) == 8)
1144                 {
1145                         return T(Ice::IceType_i64);
1146                 }
1147                 else
1148                 {
1149                         return T(Ice::IceType_i32);
1150                 }
1151         }
1152
1153         Value *Nucleus::createNullValue(Type *Ty)
1154         {
1155                 if(Ice::isVectorType(T(Ty)))
1156                 {
1157                         int64_t c[4] = {0, 0, 0, 0};
1158                         return createConstantVector(c, Ty);
1159                 }
1160                 else
1161                 {
1162                         return V(::context->getConstantZero(T(Ty)));
1163                 }
1164         }
1165
1166         Value *Nucleus::createConstantLong(int64_t i)
1167         {
1168                 return V(::context->getConstantInt64(i));
1169         }
1170
1171         Value *Nucleus::createConstantInt(int i)
1172         {
1173                 return V(::context->getConstantInt32(i));
1174         }
1175
1176         Value *Nucleus::createConstantInt(unsigned int i)
1177         {
1178                 return V(::context->getConstantInt32(i));
1179         }
1180
1181         Value *Nucleus::createConstantBool(bool b)
1182         {
1183                 return V(::context->getConstantInt1(b));
1184         }
1185
1186         Value *Nucleus::createConstantByte(signed char i)
1187         {
1188                 return V(::context->getConstantInt8(i));
1189         }
1190
1191         Value *Nucleus::createConstantByte(unsigned char i)
1192         {
1193                 return V(::context->getConstantInt8(i));
1194         }
1195
1196         Value *Nucleus::createConstantShort(short i)
1197         {
1198                 return V(::context->getConstantInt16(i));
1199         }
1200
1201         Value *Nucleus::createConstantShort(unsigned short i)
1202         {
1203                 return V(::context->getConstantInt16(i));
1204         }
1205
1206         Value *Nucleus::createConstantFloat(float x)
1207         {
1208                 return V(::context->getConstantFloat(x));
1209         }
1210
1211         Value *Nucleus::createNullPointer(Type *Ty)
1212         {
1213                 return createNullValue(T(sizeof(void*) == 8 ? Ice::IceType_i64 : Ice::IceType_i32));
1214         }
1215
1216         Value *Nucleus::createConstantVector(const int64_t *constants, Type *type)
1217         {
1218                 const int vectorSize = 16;
1219                 assert(Ice::typeWidthInBytes(T(type)) == vectorSize);
1220                 const int alignment = vectorSize;
1221                 auto globalPool = ::function->getGlobalPool();
1222
1223                 const int64_t *i = constants;
1224                 const double *f = reinterpret_cast<const double*>(constants);
1225                 Ice::VariableDeclaration::DataInitializer *dataInitializer = nullptr;
1226
1227                 switch((int)reinterpret_cast<intptr_t>(type))
1228                 {
1229                 case Ice::IceType_v4i32:
1230                 case Ice::IceType_v4i1:
1231                         {
1232                                 const int initializer[4] = {(int)i[0], (int)i[1], (int)i[2], (int)i[3]};
1233                                 static_assert(sizeof(initializer) == vectorSize, "!");
1234                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1235                         }
1236                         break;
1237                 case Ice::IceType_v4f32:
1238                         {
1239                                 const float initializer[4] = {(float)f[0], (float)f[1], (float)f[2], (float)f[3]};
1240                                 static_assert(sizeof(initializer) == vectorSize, "!");
1241                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1242                         }
1243                         break;
1244                 case Ice::IceType_v8i16:
1245                 case Ice::IceType_v8i1:
1246                         {
1247                                 const short initializer[8] = {(short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[4], (short)i[5], (short)i[6], (short)i[7]};
1248                                 static_assert(sizeof(initializer) == vectorSize, "!");
1249                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1250                         }
1251                         break;
1252                 case Ice::IceType_v16i8:
1253                 case Ice::IceType_v16i1:
1254                         {
1255                                 const char initializer[16] = {(char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[8], (char)i[9], (char)i[10], (char)i[11], (char)i[12], (char)i[13], (char)i[14], (char)i[15]};
1256                                 static_assert(sizeof(initializer) == vectorSize, "!");
1257                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1258                         }
1259                         break;
1260                 case Type_v2i32:
1261                         {
1262                                 const int initializer[4] = {(int)i[0], (int)i[1], (int)i[0], (int)i[1]};
1263                                 static_assert(sizeof(initializer) == vectorSize, "!");
1264                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1265                         }
1266                         break;
1267                 case Type_v2f32:
1268                         {
1269                                 const float initializer[4] = {(float)f[0], (float)f[1], (float)f[0], (float)f[1]};
1270                                 static_assert(sizeof(initializer) == vectorSize, "!");
1271                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1272                         }
1273                         break;
1274                 case Type_v4i16:
1275                         {
1276                                 const short initializer[8] = {(short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[0], (short)i[1], (short)i[2], (short)i[3]};
1277                                 static_assert(sizeof(initializer) == vectorSize, "!");
1278                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1279                         }
1280                         break;
1281                 case Type_v8i8:
1282                         {
1283                                 const char initializer[16] = {(char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7]};
1284                                 static_assert(sizeof(initializer) == vectorSize, "!");
1285                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1286                         }
1287                         break;
1288                 case Type_v4i8:
1289                         {
1290                                 const char initializer[16] = {(char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3]};
1291                                 static_assert(sizeof(initializer) == vectorSize, "!");
1292                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1293                         }
1294                         break;
1295                 default:
1296                         assert(false && "Unknown constant vector type" && type);
1297                 }
1298
1299                 auto name = Ice::GlobalString::createWithoutString(::context);
1300                 auto *variableDeclaration = Ice::VariableDeclaration::create(globalPool);
1301                 variableDeclaration->setName(name);
1302                 variableDeclaration->setAlignment(alignment);
1303                 variableDeclaration->setIsConstant(true);
1304                 variableDeclaration->addInitializer(dataInitializer);
1305
1306                 ::function->addGlobal(variableDeclaration);
1307
1308                 constexpr int32_t offset = 0;
1309                 Ice::Operand *ptr = ::context->getConstantSym(offset, name);
1310
1311                 Ice::Variable *result = ::function->makeVariable(T(type));
1312                 auto load = Ice::InstLoad::create(::function, result, ptr, alignment);
1313                 ::basicBlock->appendInst(load);
1314
1315                 return V(result);
1316         }
1317
1318         Value *Nucleus::createConstantVector(const double *constants, Type *type)
1319         {
1320                 return createConstantVector((const int64_t*)constants, type);
1321         }
1322
1323         Type *Void::getType()
1324         {
1325                 return T(Ice::IceType_void);
1326         }
1327
1328         Bool::Bool(Argument<Bool> argument)
1329         {
1330                 storeValue(argument.value);
1331         }
1332
1333         Bool::Bool(bool x)
1334         {
1335                 storeValue(Nucleus::createConstantBool(x));
1336         }
1337
1338         Bool::Bool(RValue<Bool> rhs)
1339         {
1340                 storeValue(rhs.value);
1341         }
1342
1343         Bool::Bool(const Bool &rhs)
1344         {
1345                 Value *value = rhs.loadValue();
1346                 storeValue(value);
1347         }
1348
1349         Bool::Bool(const Reference<Bool> &rhs)
1350         {
1351                 Value *value = rhs.loadValue();
1352                 storeValue(value);
1353         }
1354
1355         RValue<Bool> Bool::operator=(RValue<Bool> rhs)
1356         {
1357                 storeValue(rhs.value);
1358
1359                 return rhs;
1360         }
1361
1362         RValue<Bool> Bool::operator=(const Bool &rhs)
1363         {
1364                 Value *value = rhs.loadValue();
1365                 storeValue(value);
1366
1367                 return RValue<Bool>(value);
1368         }
1369
1370         RValue<Bool> Bool::operator=(const Reference<Bool> &rhs)
1371         {
1372                 Value *value = rhs.loadValue();
1373                 storeValue(value);
1374
1375                 return RValue<Bool>(value);
1376         }
1377
1378         RValue<Bool> operator!(RValue<Bool> val)
1379         {
1380                 return RValue<Bool>(Nucleus::createNot(val.value));
1381         }
1382
1383         RValue<Bool> operator&&(RValue<Bool> lhs, RValue<Bool> rhs)
1384         {
1385                 return RValue<Bool>(Nucleus::createAnd(lhs.value, rhs.value));
1386         }
1387
1388         RValue<Bool> operator||(RValue<Bool> lhs, RValue<Bool> rhs)
1389         {
1390                 return RValue<Bool>(Nucleus::createOr(lhs.value, rhs.value));
1391         }
1392
1393         Type *Bool::getType()
1394         {
1395                 return T(Ice::IceType_i1);
1396         }
1397
1398         Byte::Byte(Argument<Byte> argument)
1399         {
1400                 storeValue(argument.value);
1401         }
1402
1403         Byte::Byte(RValue<Int> cast)
1404         {
1405                 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
1406
1407                 storeValue(integer);
1408         }
1409
1410         Byte::Byte(RValue<UInt> cast)
1411         {
1412                 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
1413
1414                 storeValue(integer);
1415         }
1416
1417         Byte::Byte(RValue<UShort> cast)
1418         {
1419                 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
1420
1421                 storeValue(integer);
1422         }
1423
1424         Byte::Byte(int x)
1425         {
1426                 storeValue(Nucleus::createConstantByte((unsigned char)x));
1427         }
1428
1429         Byte::Byte(unsigned char x)
1430         {
1431                 storeValue(Nucleus::createConstantByte(x));
1432         }
1433
1434         Byte::Byte(RValue<Byte> rhs)
1435         {
1436                 storeValue(rhs.value);
1437         }
1438
1439         Byte::Byte(const Byte &rhs)
1440         {
1441                 Value *value = rhs.loadValue();
1442                 storeValue(value);
1443         }
1444
1445         Byte::Byte(const Reference<Byte> &rhs)
1446         {
1447                 Value *value = rhs.loadValue();
1448                 storeValue(value);
1449         }
1450
1451         RValue<Byte> Byte::operator=(RValue<Byte> rhs)
1452         {
1453                 storeValue(rhs.value);
1454
1455                 return rhs;
1456         }
1457
1458         RValue<Byte> Byte::operator=(const Byte &rhs)
1459         {
1460                 Value *value = rhs.loadValue();
1461                 storeValue(value);
1462
1463                 return RValue<Byte>(value);
1464         }
1465
1466         RValue<Byte> Byte::operator=(const Reference<Byte> &rhs)
1467         {
1468                 Value *value = rhs.loadValue();
1469                 storeValue(value);
1470
1471                 return RValue<Byte>(value);
1472         }
1473
1474         RValue<Byte> operator+(RValue<Byte> lhs, RValue<Byte> rhs)
1475         {
1476                 return RValue<Byte>(Nucleus::createAdd(lhs.value, rhs.value));
1477         }
1478
1479         RValue<Byte> operator-(RValue<Byte> lhs, RValue<Byte> rhs)
1480         {
1481                 return RValue<Byte>(Nucleus::createSub(lhs.value, rhs.value));
1482         }
1483
1484         RValue<Byte> operator*(RValue<Byte> lhs, RValue<Byte> rhs)
1485         {
1486                 return RValue<Byte>(Nucleus::createMul(lhs.value, rhs.value));
1487         }
1488
1489         RValue<Byte> operator/(RValue<Byte> lhs, RValue<Byte> rhs)
1490         {
1491                 return RValue<Byte>(Nucleus::createUDiv(lhs.value, rhs.value));
1492         }
1493
1494         RValue<Byte> operator%(RValue<Byte> lhs, RValue<Byte> rhs)
1495         {
1496                 return RValue<Byte>(Nucleus::createURem(lhs.value, rhs.value));
1497         }
1498
1499         RValue<Byte> operator&(RValue<Byte> lhs, RValue<Byte> rhs)
1500         {
1501                 return RValue<Byte>(Nucleus::createAnd(lhs.value, rhs.value));
1502         }
1503
1504         RValue<Byte> operator|(RValue<Byte> lhs, RValue<Byte> rhs)
1505         {
1506                 return RValue<Byte>(Nucleus::createOr(lhs.value, rhs.value));
1507         }
1508
1509         RValue<Byte> operator^(RValue<Byte> lhs, RValue<Byte> rhs)
1510         {
1511                 return RValue<Byte>(Nucleus::createXor(lhs.value, rhs.value));
1512         }
1513
1514         RValue<Byte> operator<<(RValue<Byte> lhs, RValue<Byte> rhs)
1515         {
1516                 return RValue<Byte>(Nucleus::createShl(lhs.value, rhs.value));
1517         }
1518
1519         RValue<Byte> operator>>(RValue<Byte> lhs, RValue<Byte> rhs)
1520         {
1521                 return RValue<Byte>(Nucleus::createLShr(lhs.value, rhs.value));
1522         }
1523
1524         RValue<Byte> operator+=(Byte &lhs, RValue<Byte> rhs)
1525         {
1526                 return lhs = lhs + rhs;
1527         }
1528
1529         RValue<Byte> operator-=(Byte &lhs, RValue<Byte> rhs)
1530         {
1531                 return lhs = lhs - rhs;
1532         }
1533
1534         RValue<Byte> operator*=(Byte &lhs, RValue<Byte> rhs)
1535         {
1536                 return lhs = lhs * rhs;
1537         }
1538
1539         RValue<Byte> operator/=(Byte &lhs, RValue<Byte> rhs)
1540         {
1541                 return lhs = lhs / rhs;
1542         }
1543
1544         RValue<Byte> operator%=(Byte &lhs, RValue<Byte> rhs)
1545         {
1546                 return lhs = lhs % rhs;
1547         }
1548
1549         RValue<Byte> operator&=(Byte &lhs, RValue<Byte> rhs)
1550         {
1551                 return lhs = lhs & rhs;
1552         }
1553
1554         RValue<Byte> operator|=(Byte &lhs, RValue<Byte> rhs)
1555         {
1556                 return lhs = lhs | rhs;
1557         }
1558
1559         RValue<Byte> operator^=(Byte &lhs, RValue<Byte> rhs)
1560         {
1561                 return lhs = lhs ^ rhs;
1562         }
1563
1564         RValue<Byte> operator<<=(Byte &lhs, RValue<Byte> rhs)
1565         {
1566                 return lhs = lhs << rhs;
1567         }
1568
1569         RValue<Byte> operator>>=(Byte &lhs, RValue<Byte> rhs)
1570         {
1571                 return lhs = lhs >> rhs;
1572         }
1573
1574         RValue<Byte> operator+(RValue<Byte> val)
1575         {
1576                 return val;
1577         }
1578
1579         RValue<Byte> operator-(RValue<Byte> val)
1580         {
1581                 return RValue<Byte>(Nucleus::createNeg(val.value));
1582         }
1583
1584         RValue<Byte> operator~(RValue<Byte> val)
1585         {
1586                 return RValue<Byte>(Nucleus::createNot(val.value));
1587         }
1588
1589         RValue<Byte> operator++(Byte &val, int)   // Post-increment
1590         {
1591                 RValue<Byte> res = val;
1592                 val += Byte(1);
1593                 return res;
1594         }
1595
1596         const Byte &operator++(Byte &val)   // Pre-increment
1597         {
1598                 val += Byte(1);
1599                 return val;
1600         }
1601
1602         RValue<Byte> operator--(Byte &val, int)   // Post-decrement
1603         {
1604                 RValue<Byte> res = val;
1605                 val -= Byte(1);
1606                 return res;
1607         }
1608
1609         const Byte &operator--(Byte &val)   // Pre-decrement
1610         {
1611                 val -= Byte(1);
1612                 return val;
1613         }
1614
1615         RValue<Bool> operator<(RValue<Byte> lhs, RValue<Byte> rhs)
1616         {
1617                 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
1618         }
1619
1620         RValue<Bool> operator<=(RValue<Byte> lhs, RValue<Byte> rhs)
1621         {
1622                 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
1623         }
1624
1625         RValue<Bool> operator>(RValue<Byte> lhs, RValue<Byte> rhs)
1626         {
1627                 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
1628         }
1629
1630         RValue<Bool> operator>=(RValue<Byte> lhs, RValue<Byte> rhs)
1631         {
1632                 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
1633         }
1634
1635         RValue<Bool> operator!=(RValue<Byte> lhs, RValue<Byte> rhs)
1636         {
1637                 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
1638         }
1639
1640         RValue<Bool> operator==(RValue<Byte> lhs, RValue<Byte> rhs)
1641         {
1642                 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
1643         }
1644
1645         Type *Byte::getType()
1646         {
1647                 return T(Ice::IceType_i8);
1648         }
1649
1650         SByte::SByte(Argument<SByte> argument)
1651         {
1652                 storeValue(argument.value);
1653         }
1654
1655         SByte::SByte(RValue<Int> cast)
1656         {
1657                 Value *integer = Nucleus::createTrunc(cast.value, SByte::getType());
1658
1659                 storeValue(integer);
1660         }
1661
1662         SByte::SByte(RValue<Short> cast)
1663         {
1664                 Value *integer = Nucleus::createTrunc(cast.value, SByte::getType());
1665
1666                 storeValue(integer);
1667         }
1668
1669         SByte::SByte(signed char x)
1670         {
1671                 storeValue(Nucleus::createConstantByte(x));
1672         }
1673
1674         SByte::SByte(RValue<SByte> rhs)
1675         {
1676                 storeValue(rhs.value);
1677         }
1678
1679         SByte::SByte(const SByte &rhs)
1680         {
1681                 Value *value = rhs.loadValue();
1682                 storeValue(value);
1683         }
1684
1685         SByte::SByte(const Reference<SByte> &rhs)
1686         {
1687                 Value *value = rhs.loadValue();
1688                 storeValue(value);
1689         }
1690
1691         RValue<SByte> SByte::operator=(RValue<SByte> rhs)
1692         {
1693                 storeValue(rhs.value);
1694
1695                 return rhs;
1696         }
1697
1698         RValue<SByte> SByte::operator=(const SByte &rhs)
1699         {
1700                 Value *value = rhs.loadValue();
1701                 storeValue(value);
1702
1703                 return RValue<SByte>(value);
1704         }
1705
1706         RValue<SByte> SByte::operator=(const Reference<SByte> &rhs)
1707         {
1708                 Value *value = rhs.loadValue();
1709                 storeValue(value);
1710
1711                 return RValue<SByte>(value);
1712         }
1713
1714         RValue<SByte> operator+(RValue<SByte> lhs, RValue<SByte> rhs)
1715         {
1716                 return RValue<SByte>(Nucleus::createAdd(lhs.value, rhs.value));
1717         }
1718
1719         RValue<SByte> operator-(RValue<SByte> lhs, RValue<SByte> rhs)
1720         {
1721                 return RValue<SByte>(Nucleus::createSub(lhs.value, rhs.value));
1722         }
1723
1724         RValue<SByte> operator*(RValue<SByte> lhs, RValue<SByte> rhs)
1725         {
1726                 return RValue<SByte>(Nucleus::createMul(lhs.value, rhs.value));
1727         }
1728
1729         RValue<SByte> operator/(RValue<SByte> lhs, RValue<SByte> rhs)
1730         {
1731                 return RValue<SByte>(Nucleus::createSDiv(lhs.value, rhs.value));
1732         }
1733
1734         RValue<SByte> operator%(RValue<SByte> lhs, RValue<SByte> rhs)
1735         {
1736                 return RValue<SByte>(Nucleus::createSRem(lhs.value, rhs.value));
1737         }
1738
1739         RValue<SByte> operator&(RValue<SByte> lhs, RValue<SByte> rhs)
1740         {
1741                 return RValue<SByte>(Nucleus::createAnd(lhs.value, rhs.value));
1742         }
1743
1744         RValue<SByte> operator|(RValue<SByte> lhs, RValue<SByte> rhs)
1745         {
1746                 return RValue<SByte>(Nucleus::createOr(lhs.value, rhs.value));
1747         }
1748
1749         RValue<SByte> operator^(RValue<SByte> lhs, RValue<SByte> rhs)
1750         {
1751                 return RValue<SByte>(Nucleus::createXor(lhs.value, rhs.value));
1752         }
1753
1754         RValue<SByte> operator<<(RValue<SByte> lhs, RValue<SByte> rhs)
1755         {
1756                 return RValue<SByte>(Nucleus::createShl(lhs.value, rhs.value));
1757         }
1758
1759         RValue<SByte> operator>>(RValue<SByte> lhs, RValue<SByte> rhs)
1760         {
1761                 return RValue<SByte>(Nucleus::createAShr(lhs.value, rhs.value));
1762         }
1763
1764         RValue<SByte> operator+=(SByte &lhs, RValue<SByte> rhs)
1765         {
1766                 return lhs = lhs + rhs;
1767         }
1768
1769         RValue<SByte> operator-=(SByte &lhs, RValue<SByte> rhs)
1770         {
1771                 return lhs = lhs - rhs;
1772         }
1773
1774         RValue<SByte> operator*=(SByte &lhs, RValue<SByte> rhs)
1775         {
1776                 return lhs = lhs * rhs;
1777         }
1778
1779         RValue<SByte> operator/=(SByte &lhs, RValue<SByte> rhs)
1780         {
1781                 return lhs = lhs / rhs;
1782         }
1783
1784         RValue<SByte> operator%=(SByte &lhs, RValue<SByte> rhs)
1785         {
1786                 return lhs = lhs % rhs;
1787         }
1788
1789         RValue<SByte> operator&=(SByte &lhs, RValue<SByte> rhs)
1790         {
1791                 return lhs = lhs & rhs;
1792         }
1793
1794         RValue<SByte> operator|=(SByte &lhs, RValue<SByte> rhs)
1795         {
1796                 return lhs = lhs | rhs;
1797         }
1798
1799         RValue<SByte> operator^=(SByte &lhs, RValue<SByte> rhs)
1800         {
1801                 return lhs = lhs ^ rhs;
1802         }
1803
1804         RValue<SByte> operator<<=(SByte &lhs, RValue<SByte> rhs)
1805         {
1806                 return lhs = lhs << rhs;
1807         }
1808
1809         RValue<SByte> operator>>=(SByte &lhs, RValue<SByte> rhs)
1810         {
1811                 return lhs = lhs >> rhs;
1812         }
1813
1814         RValue<SByte> operator+(RValue<SByte> val)
1815         {
1816                 return val;
1817         }
1818
1819         RValue<SByte> operator-(RValue<SByte> val)
1820         {
1821                 return RValue<SByte>(Nucleus::createNeg(val.value));
1822         }
1823
1824         RValue<SByte> operator~(RValue<SByte> val)
1825         {
1826                 return RValue<SByte>(Nucleus::createNot(val.value));
1827         }
1828
1829         RValue<SByte> operator++(SByte &val, int)   // Post-increment
1830         {
1831                 RValue<SByte> res = val;
1832                 val += SByte(1);
1833                 return res;
1834         }
1835
1836         const SByte &operator++(SByte &val)   // Pre-increment
1837         {
1838                 val += SByte(1);
1839                 return val;
1840         }
1841
1842         RValue<SByte> operator--(SByte &val, int)   // Post-decrement
1843         {
1844                 RValue<SByte> res = val;
1845                 val -= SByte(1);
1846                 return res;
1847         }
1848
1849         const SByte &operator--(SByte &val)   // Pre-decrement
1850         {
1851                 val -= SByte(1);
1852                 return val;
1853         }
1854
1855         RValue<Bool> operator<(RValue<SByte> lhs, RValue<SByte> rhs)
1856         {
1857                 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
1858         }
1859
1860         RValue<Bool> operator<=(RValue<SByte> lhs, RValue<SByte> rhs)
1861         {
1862                 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
1863         }
1864
1865         RValue<Bool> operator>(RValue<SByte> lhs, RValue<SByte> rhs)
1866         {
1867                 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
1868         }
1869
1870         RValue<Bool> operator>=(RValue<SByte> lhs, RValue<SByte> rhs)
1871         {
1872                 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
1873         }
1874
1875         RValue<Bool> operator!=(RValue<SByte> lhs, RValue<SByte> rhs)
1876         {
1877                 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
1878         }
1879
1880         RValue<Bool> operator==(RValue<SByte> lhs, RValue<SByte> rhs)
1881         {
1882                 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
1883         }
1884
1885         Type *SByte::getType()
1886         {
1887                 return T(Ice::IceType_i8);
1888         }
1889
1890         Short::Short(Argument<Short> argument)
1891         {
1892                 storeValue(argument.value);
1893         }
1894
1895         Short::Short(RValue<Int> cast)
1896         {
1897                 Value *integer = Nucleus::createTrunc(cast.value, Short::getType());
1898
1899                 storeValue(integer);
1900         }
1901
1902         Short::Short(short x)
1903         {
1904                 storeValue(Nucleus::createConstantShort(x));
1905         }
1906
1907         Short::Short(RValue<Short> rhs)
1908         {
1909                 storeValue(rhs.value);
1910         }
1911
1912         Short::Short(const Short &rhs)
1913         {
1914                 Value *value = rhs.loadValue();
1915                 storeValue(value);
1916         }
1917
1918         Short::Short(const Reference<Short> &rhs)
1919         {
1920                 Value *value = rhs.loadValue();
1921                 storeValue(value);
1922         }
1923
1924         RValue<Short> Short::operator=(RValue<Short> rhs)
1925         {
1926                 storeValue(rhs.value);
1927
1928                 return rhs;
1929         }
1930
1931         RValue<Short> Short::operator=(const Short &rhs)
1932         {
1933                 Value *value = rhs.loadValue();
1934                 storeValue(value);
1935
1936                 return RValue<Short>(value);
1937         }
1938
1939         RValue<Short> Short::operator=(const Reference<Short> &rhs)
1940         {
1941                 Value *value = rhs.loadValue();
1942                 storeValue(value);
1943
1944                 return RValue<Short>(value);
1945         }
1946
1947         RValue<Short> operator+(RValue<Short> lhs, RValue<Short> rhs)
1948         {
1949                 return RValue<Short>(Nucleus::createAdd(lhs.value, rhs.value));
1950         }
1951
1952         RValue<Short> operator-(RValue<Short> lhs, RValue<Short> rhs)
1953         {
1954                 return RValue<Short>(Nucleus::createSub(lhs.value, rhs.value));
1955         }
1956
1957         RValue<Short> operator*(RValue<Short> lhs, RValue<Short> rhs)
1958         {
1959                 return RValue<Short>(Nucleus::createMul(lhs.value, rhs.value));
1960         }
1961
1962         RValue<Short> operator/(RValue<Short> lhs, RValue<Short> rhs)
1963         {
1964                 return RValue<Short>(Nucleus::createSDiv(lhs.value, rhs.value));
1965         }
1966
1967         RValue<Short> operator%(RValue<Short> lhs, RValue<Short> rhs)
1968         {
1969                 return RValue<Short>(Nucleus::createSRem(lhs.value, rhs.value));
1970         }
1971
1972         RValue<Short> operator&(RValue<Short> lhs, RValue<Short> rhs)
1973         {
1974                 return RValue<Short>(Nucleus::createAnd(lhs.value, rhs.value));
1975         }
1976
1977         RValue<Short> operator|(RValue<Short> lhs, RValue<Short> rhs)
1978         {
1979                 return RValue<Short>(Nucleus::createOr(lhs.value, rhs.value));
1980         }
1981
1982         RValue<Short> operator^(RValue<Short> lhs, RValue<Short> rhs)
1983         {
1984                 return RValue<Short>(Nucleus::createXor(lhs.value, rhs.value));
1985         }
1986
1987         RValue<Short> operator<<(RValue<Short> lhs, RValue<Short> rhs)
1988         {
1989                 return RValue<Short>(Nucleus::createShl(lhs.value, rhs.value));
1990         }
1991
1992         RValue<Short> operator>>(RValue<Short> lhs, RValue<Short> rhs)
1993         {
1994                 return RValue<Short>(Nucleus::createAShr(lhs.value, rhs.value));
1995         }
1996
1997         RValue<Short> operator+=(Short &lhs, RValue<Short> rhs)
1998         {
1999                 return lhs = lhs + rhs;
2000         }
2001
2002         RValue<Short> operator-=(Short &lhs, RValue<Short> rhs)
2003         {
2004                 return lhs = lhs - rhs;
2005         }
2006
2007         RValue<Short> operator*=(Short &lhs, RValue<Short> rhs)
2008         {
2009                 return lhs = lhs * rhs;
2010         }
2011
2012         RValue<Short> operator/=(Short &lhs, RValue<Short> rhs)
2013         {
2014                 return lhs = lhs / rhs;
2015         }
2016
2017         RValue<Short> operator%=(Short &lhs, RValue<Short> rhs)
2018         {
2019                 return lhs = lhs % rhs;
2020         }
2021
2022         RValue<Short> operator&=(Short &lhs, RValue<Short> rhs)
2023         {
2024                 return lhs = lhs & rhs;
2025         }
2026
2027         RValue<Short> operator|=(Short &lhs, RValue<Short> rhs)
2028         {
2029                 return lhs = lhs | rhs;
2030         }
2031
2032         RValue<Short> operator^=(Short &lhs, RValue<Short> rhs)
2033         {
2034                 return lhs = lhs ^ rhs;
2035         }
2036
2037         RValue<Short> operator<<=(Short &lhs, RValue<Short> rhs)
2038         {
2039                 return lhs = lhs << rhs;
2040         }
2041
2042         RValue<Short> operator>>=(Short &lhs, RValue<Short> rhs)
2043         {
2044                 return lhs = lhs >> rhs;
2045         }
2046
2047         RValue<Short> operator+(RValue<Short> val)
2048         {
2049                 return val;
2050         }
2051
2052         RValue<Short> operator-(RValue<Short> val)
2053         {
2054                 return RValue<Short>(Nucleus::createNeg(val.value));
2055         }
2056
2057         RValue<Short> operator~(RValue<Short> val)
2058         {
2059                 return RValue<Short>(Nucleus::createNot(val.value));
2060         }
2061
2062         RValue<Short> operator++(Short &val, int)   // Post-increment
2063         {
2064                 RValue<Short> res = val;
2065                 val += Short(1);
2066                 return res;
2067         }
2068
2069         const Short &operator++(Short &val)   // Pre-increment
2070         {
2071                 val += Short(1);
2072                 return val;
2073         }
2074
2075         RValue<Short> operator--(Short &val, int)   // Post-decrement
2076         {
2077                 RValue<Short> res = val;
2078                 val -= Short(1);
2079                 return res;
2080         }
2081
2082         const Short &operator--(Short &val)   // Pre-decrement
2083         {
2084                 val -= Short(1);
2085                 return val;
2086         }
2087
2088         RValue<Bool> operator<(RValue<Short> lhs, RValue<Short> rhs)
2089         {
2090                 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
2091         }
2092
2093         RValue<Bool> operator<=(RValue<Short> lhs, RValue<Short> rhs)
2094         {
2095                 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
2096         }
2097
2098         RValue<Bool> operator>(RValue<Short> lhs, RValue<Short> rhs)
2099         {
2100                 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
2101         }
2102
2103         RValue<Bool> operator>=(RValue<Short> lhs, RValue<Short> rhs)
2104         {
2105                 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
2106         }
2107
2108         RValue<Bool> operator!=(RValue<Short> lhs, RValue<Short> rhs)
2109         {
2110                 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
2111         }
2112
2113         RValue<Bool> operator==(RValue<Short> lhs, RValue<Short> rhs)
2114         {
2115                 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
2116         }
2117
2118         Type *Short::getType()
2119         {
2120                 return T(Ice::IceType_i16);
2121         }
2122
2123         UShort::UShort(Argument<UShort> argument)
2124         {
2125                 storeValue(argument.value);
2126         }
2127
2128         UShort::UShort(RValue<UInt> cast)
2129         {
2130                 Value *integer = Nucleus::createTrunc(cast.value, UShort::getType());
2131
2132                 storeValue(integer);
2133         }
2134
2135         UShort::UShort(RValue<Int> cast)
2136         {
2137                 Value *integer = Nucleus::createTrunc(cast.value, UShort::getType());
2138
2139                 storeValue(integer);
2140         }
2141
2142         UShort::UShort(unsigned short x)
2143         {
2144                 storeValue(Nucleus::createConstantShort(x));
2145         }
2146
2147         UShort::UShort(RValue<UShort> rhs)
2148         {
2149                 storeValue(rhs.value);
2150         }
2151
2152         UShort::UShort(const UShort &rhs)
2153         {
2154                 Value *value = rhs.loadValue();
2155                 storeValue(value);
2156         }
2157
2158         UShort::UShort(const Reference<UShort> &rhs)
2159         {
2160                 Value *value = rhs.loadValue();
2161                 storeValue(value);
2162         }
2163
2164         RValue<UShort> UShort::operator=(RValue<UShort> rhs)
2165         {
2166                 storeValue(rhs.value);
2167
2168                 return rhs;
2169         }
2170
2171         RValue<UShort> UShort::operator=(const UShort &rhs)
2172         {
2173                 Value *value = rhs.loadValue();
2174                 storeValue(value);
2175
2176                 return RValue<UShort>(value);
2177         }
2178
2179         RValue<UShort> UShort::operator=(const Reference<UShort> &rhs)
2180         {
2181                 Value *value = rhs.loadValue();
2182                 storeValue(value);
2183
2184                 return RValue<UShort>(value);
2185         }
2186
2187         RValue<UShort> operator+(RValue<UShort> lhs, RValue<UShort> rhs)
2188         {
2189                 return RValue<UShort>(Nucleus::createAdd(lhs.value, rhs.value));
2190         }
2191
2192         RValue<UShort> operator-(RValue<UShort> lhs, RValue<UShort> rhs)
2193         {
2194                 return RValue<UShort>(Nucleus::createSub(lhs.value, rhs.value));
2195         }
2196
2197         RValue<UShort> operator*(RValue<UShort> lhs, RValue<UShort> rhs)
2198         {
2199                 return RValue<UShort>(Nucleus::createMul(lhs.value, rhs.value));
2200         }
2201
2202         RValue<UShort> operator/(RValue<UShort> lhs, RValue<UShort> rhs)
2203         {
2204                 return RValue<UShort>(Nucleus::createUDiv(lhs.value, rhs.value));
2205         }
2206
2207         RValue<UShort> operator%(RValue<UShort> lhs, RValue<UShort> rhs)
2208         {
2209                 return RValue<UShort>(Nucleus::createURem(lhs.value, rhs.value));
2210         }
2211
2212         RValue<UShort> operator&(RValue<UShort> lhs, RValue<UShort> rhs)
2213         {
2214                 return RValue<UShort>(Nucleus::createAnd(lhs.value, rhs.value));
2215         }
2216
2217         RValue<UShort> operator|(RValue<UShort> lhs, RValue<UShort> rhs)
2218         {
2219                 return RValue<UShort>(Nucleus::createOr(lhs.value, rhs.value));
2220         }
2221
2222         RValue<UShort> operator^(RValue<UShort> lhs, RValue<UShort> rhs)
2223         {
2224                 return RValue<UShort>(Nucleus::createXor(lhs.value, rhs.value));
2225         }
2226
2227         RValue<UShort> operator<<(RValue<UShort> lhs, RValue<UShort> rhs)
2228         {
2229                 return RValue<UShort>(Nucleus::createShl(lhs.value, rhs.value));
2230         }
2231
2232         RValue<UShort> operator>>(RValue<UShort> lhs, RValue<UShort> rhs)
2233         {
2234                 return RValue<UShort>(Nucleus::createLShr(lhs.value, rhs.value));
2235         }
2236
2237         RValue<UShort> operator+=(UShort &lhs, RValue<UShort> rhs)
2238         {
2239                 return lhs = lhs + rhs;
2240         }
2241
2242         RValue<UShort> operator-=(UShort &lhs, RValue<UShort> rhs)
2243         {
2244                 return lhs = lhs - rhs;
2245         }
2246
2247         RValue<UShort> operator*=(UShort &lhs, RValue<UShort> rhs)
2248         {
2249                 return lhs = lhs * rhs;
2250         }
2251
2252         RValue<UShort> operator/=(UShort &lhs, RValue<UShort> rhs)
2253         {
2254                 return lhs = lhs / rhs;
2255         }
2256
2257         RValue<UShort> operator%=(UShort &lhs, RValue<UShort> rhs)
2258         {
2259                 return lhs = lhs % rhs;
2260         }
2261
2262         RValue<UShort> operator&=(UShort &lhs, RValue<UShort> rhs)
2263         {
2264                 return lhs = lhs & rhs;
2265         }
2266
2267         RValue<UShort> operator|=(UShort &lhs, RValue<UShort> rhs)
2268         {
2269                 return lhs = lhs | rhs;
2270         }
2271
2272         RValue<UShort> operator^=(UShort &lhs, RValue<UShort> rhs)
2273         {
2274                 return lhs = lhs ^ rhs;
2275         }
2276
2277         RValue<UShort> operator<<=(UShort &lhs, RValue<UShort> rhs)
2278         {
2279                 return lhs = lhs << rhs;
2280         }
2281
2282         RValue<UShort> operator>>=(UShort &lhs, RValue<UShort> rhs)
2283         {
2284                 return lhs = lhs >> rhs;
2285         }
2286
2287         RValue<UShort> operator+(RValue<UShort> val)
2288         {
2289                 return val;
2290         }
2291
2292         RValue<UShort> operator-(RValue<UShort> val)
2293         {
2294                 return RValue<UShort>(Nucleus::createNeg(val.value));
2295         }
2296
2297         RValue<UShort> operator~(RValue<UShort> val)
2298         {
2299                 return RValue<UShort>(Nucleus::createNot(val.value));
2300         }
2301
2302         RValue<UShort> operator++(UShort &val, int)   // Post-increment
2303         {
2304                 RValue<UShort> res = val;
2305                 val += UShort(1);
2306                 return res;
2307         }
2308
2309         const UShort &operator++(UShort &val)   // Pre-increment
2310         {
2311                 val += UShort(1);
2312                 return val;
2313         }
2314
2315         RValue<UShort> operator--(UShort &val, int)   // Post-decrement
2316         {
2317                 RValue<UShort> res = val;
2318                 val -= UShort(1);
2319                 return res;
2320         }
2321
2322         const UShort &operator--(UShort &val)   // Pre-decrement
2323         {
2324                 val -= UShort(1);
2325                 return val;
2326         }
2327
2328         RValue<Bool> operator<(RValue<UShort> lhs, RValue<UShort> rhs)
2329         {
2330                 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
2331         }
2332
2333         RValue<Bool> operator<=(RValue<UShort> lhs, RValue<UShort> rhs)
2334         {
2335                 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
2336         }
2337
2338         RValue<Bool> operator>(RValue<UShort> lhs, RValue<UShort> rhs)
2339         {
2340                 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
2341         }
2342
2343         RValue<Bool> operator>=(RValue<UShort> lhs, RValue<UShort> rhs)
2344         {
2345                 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
2346         }
2347
2348         RValue<Bool> operator!=(RValue<UShort> lhs, RValue<UShort> rhs)
2349         {
2350                 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
2351         }
2352
2353         RValue<Bool> operator==(RValue<UShort> lhs, RValue<UShort> rhs)
2354         {
2355                 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
2356         }
2357
2358         Type *UShort::getType()
2359         {
2360                 return T(Ice::IceType_i16);
2361         }
2362
2363         Byte4::Byte4(RValue<Byte8> cast)
2364         {
2365                 storeValue(Nucleus::createBitCast(cast.value, getType()));
2366         }
2367
2368         Byte4::Byte4(const Reference<Byte4> &rhs)
2369         {
2370                 Value *value = rhs.loadValue();
2371                 storeValue(value);
2372         }
2373
2374         Type *Byte4::getType()
2375         {
2376                 return T(Type_v4i8);
2377         }
2378
2379         Type *SByte4::getType()
2380         {
2381                 return T(Type_v4i8);
2382         }
2383
2384         Byte8::Byte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7)
2385         {
2386                 int64_t constantVector[8] = {x0, x1, x2, x3, x4, x5, x6, x7};
2387                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
2388         }
2389
2390         Byte8::Byte8(RValue<Byte8> rhs)
2391         {
2392                 storeValue(rhs.value);
2393         }
2394
2395         Byte8::Byte8(const Byte8 &rhs)
2396         {
2397                 Value *value = rhs.loadValue();
2398                 storeValue(value);
2399         }
2400
2401         Byte8::Byte8(const Reference<Byte8> &rhs)
2402         {
2403                 Value *value = rhs.loadValue();
2404                 storeValue(value);
2405         }
2406
2407         RValue<Byte8> Byte8::operator=(RValue<Byte8> rhs)
2408         {
2409                 storeValue(rhs.value);
2410
2411                 return rhs;
2412         }
2413
2414         RValue<Byte8> Byte8::operator=(const Byte8 &rhs)
2415         {
2416                 Value *value = rhs.loadValue();
2417                 storeValue(value);
2418
2419                 return RValue<Byte8>(value);
2420         }
2421
2422         RValue<Byte8> Byte8::operator=(const Reference<Byte8> &rhs)
2423         {
2424                 Value *value = rhs.loadValue();
2425                 storeValue(value);
2426
2427                 return RValue<Byte8>(value);
2428         }
2429
2430         RValue<Byte8> operator+(RValue<Byte8> lhs, RValue<Byte8> rhs)
2431         {
2432                 return RValue<Byte8>(Nucleus::createAdd(lhs.value, rhs.value));
2433         }
2434
2435         RValue<Byte8> operator-(RValue<Byte8> lhs, RValue<Byte8> rhs)
2436         {
2437                 return RValue<Byte8>(Nucleus::createSub(lhs.value, rhs.value));
2438         }
2439
2440 //      RValue<Byte8> operator*(RValue<Byte8> lhs, RValue<Byte8> rhs)
2441 //      {
2442 //              return RValue<Byte8>(Nucleus::createMul(lhs.value, rhs.value));
2443 //      }
2444
2445 //      RValue<Byte8> operator/(RValue<Byte8> lhs, RValue<Byte8> rhs)
2446 //      {
2447 //              return RValue<Byte8>(Nucleus::createUDiv(lhs.value, rhs.value));
2448 //      }
2449
2450 //      RValue<Byte8> operator%(RValue<Byte8> lhs, RValue<Byte8> rhs)
2451 //      {
2452 //              return RValue<Byte8>(Nucleus::createURem(lhs.value, rhs.value));
2453 //      }
2454
2455         RValue<Byte8> operator&(RValue<Byte8> lhs, RValue<Byte8> rhs)
2456         {
2457                 return RValue<Byte8>(Nucleus::createAnd(lhs.value, rhs.value));
2458         }
2459
2460         RValue<Byte8> operator|(RValue<Byte8> lhs, RValue<Byte8> rhs)
2461         {
2462                 return RValue<Byte8>(Nucleus::createOr(lhs.value, rhs.value));
2463         }
2464
2465         RValue<Byte8> operator^(RValue<Byte8> lhs, RValue<Byte8> rhs)
2466         {
2467                 return RValue<Byte8>(Nucleus::createXor(lhs.value, rhs.value));
2468         }
2469
2470 //      RValue<Byte8> operator<<(RValue<Byte8> lhs, unsigned char rhs)
2471 //      {
2472 //              return RValue<Byte8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2473 //      }
2474
2475 //      RValue<Byte8> operator>>(RValue<Byte8> lhs, unsigned char rhs)
2476 //      {
2477 //              return RValue<Byte8>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
2478 //      }
2479
2480         RValue<Byte8> operator+=(Byte8 &lhs, RValue<Byte8> rhs)
2481         {
2482                 return lhs = lhs + rhs;
2483         }
2484
2485         RValue<Byte8> operator-=(Byte8 &lhs, RValue<Byte8> rhs)
2486         {
2487                 return lhs = lhs - rhs;
2488         }
2489
2490 //      RValue<Byte8> operator*=(Byte8 &lhs, RValue<Byte8> rhs)
2491 //      {
2492 //              return lhs = lhs * rhs;
2493 //      }
2494
2495 //      RValue<Byte8> operator/=(Byte8 &lhs, RValue<Byte8> rhs)
2496 //      {
2497 //              return lhs = lhs / rhs;
2498 //      }
2499
2500 //      RValue<Byte8> operator%=(Byte8 &lhs, RValue<Byte8> rhs)
2501 //      {
2502 //              return lhs = lhs % rhs;
2503 //      }
2504
2505         RValue<Byte8> operator&=(Byte8 &lhs, RValue<Byte8> rhs)
2506         {
2507                 return lhs = lhs & rhs;
2508         }
2509
2510         RValue<Byte8> operator|=(Byte8 &lhs, RValue<Byte8> rhs)
2511         {
2512                 return lhs = lhs | rhs;
2513         }
2514
2515         RValue<Byte8> operator^=(Byte8 &lhs, RValue<Byte8> rhs)
2516         {
2517                 return lhs = lhs ^ rhs;
2518         }
2519
2520 //      RValue<Byte8> operator<<=(Byte8 &lhs, RValue<Byte8> rhs)
2521 //      {
2522 //              return lhs = lhs << rhs;
2523 //      }
2524
2525 //      RValue<Byte8> operator>>=(Byte8 &lhs, RValue<Byte8> rhs)
2526 //      {
2527 //              return lhs = lhs >> rhs;
2528 //      }
2529
2530 //      RValue<Byte8> operator+(RValue<Byte8> val)
2531 //      {
2532 //              return val;
2533 //      }
2534
2535 //      RValue<Byte8> operator-(RValue<Byte8> val)
2536 //      {
2537 //              return RValue<Byte8>(Nucleus::createNeg(val.value));
2538 //      }
2539
2540         RValue<Byte8> operator~(RValue<Byte8> val)
2541         {
2542                 return RValue<Byte8>(Nucleus::createNot(val.value));
2543         }
2544
2545         RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
2546         {
2547                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
2548                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2549                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2550                 auto paddusb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2551                 paddusb->addArg(x.value);
2552                 paddusb->addArg(y.value);
2553                 ::basicBlock->appendInst(paddusb);
2554
2555                 return RValue<Byte8>(V(result));
2556         }
2557
2558         RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
2559         {
2560                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
2561                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2562                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2563                 auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2564                 psubusw->addArg(x.value);
2565                 psubusw->addArg(y.value);
2566                 ::basicBlock->appendInst(psubusw);
2567
2568                 return RValue<Byte8>(V(result));
2569         }
2570
2571         RValue<Short4> Unpack(RValue<Byte4> x)
2572         {
2573                 int shuffle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7};   // Real type is v16i8
2574                 return RValue<Short4>(Nucleus::createShuffleVector(x.value, x.value, shuffle));
2575         }
2576
2577         RValue<Short4> UnpackLow(RValue<Byte8> x, RValue<Byte8> y)
2578         {
2579                 int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};   // Real type is v16i8
2580                 return RValue<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
2581         }
2582
2583         RValue<Short4> UnpackHigh(RValue<Byte8> x, RValue<Byte8> y)
2584         {
2585                 int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};   // Real type is v16i8
2586                 auto lowHigh = RValue<Byte16>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
2587                 return As<Short4>(Swizzle(As<Int4>(lowHigh), 0xEE));
2588         }
2589
2590         RValue<Int> SignMask(RValue<Byte8> x)
2591         {
2592                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
2593                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2594                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2595                 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
2596                 movmsk->addArg(x.value);
2597                 ::basicBlock->appendInst(movmsk);
2598
2599                 return RValue<Int>(V(result));
2600         }
2601
2602 //      RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
2603 //      {
2604 //              return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Ugt, x.value, y.value));
2605 //      }
2606
2607         RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
2608         {
2609                 return RValue<Byte8>(Nucleus::createICmpEQ(x.value, y.value));
2610         }
2611
2612         Type *Byte8::getType()
2613         {
2614                 return T(Type_v8i8);
2615         }
2616
2617         SByte8::SByte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7)
2618         {
2619                 int64_t constantVector[8] = { x0, x1, x2, x3, x4, x5, x6, x7 };
2620                 Value *vector = V(Nucleus::createConstantVector(constantVector, getType()));
2621
2622                 storeValue(Nucleus::createBitCast(vector, getType()));
2623         }
2624
2625         SByte8::SByte8(RValue<SByte8> rhs)
2626         {
2627                 storeValue(rhs.value);
2628         }
2629
2630         SByte8::SByte8(const SByte8 &rhs)
2631         {
2632                 Value *value = rhs.loadValue();
2633                 storeValue(value);
2634         }
2635
2636         SByte8::SByte8(const Reference<SByte8> &rhs)
2637         {
2638                 Value *value = rhs.loadValue();
2639                 storeValue(value);
2640         }
2641
2642         RValue<SByte8> SByte8::operator=(RValue<SByte8> rhs)
2643         {
2644                 storeValue(rhs.value);
2645
2646                 return rhs;
2647         }
2648
2649         RValue<SByte8> SByte8::operator=(const SByte8 &rhs)
2650         {
2651                 Value *value = rhs.loadValue();
2652                 storeValue(value);
2653
2654                 return RValue<SByte8>(value);
2655         }
2656
2657         RValue<SByte8> SByte8::operator=(const Reference<SByte8> &rhs)
2658         {
2659                 Value *value = rhs.loadValue();
2660                 storeValue(value);
2661
2662                 return RValue<SByte8>(value);
2663         }
2664
2665         RValue<SByte8> operator+(RValue<SByte8> lhs, RValue<SByte8> rhs)
2666         {
2667                 return RValue<SByte8>(Nucleus::createAdd(lhs.value, rhs.value));
2668         }
2669
2670         RValue<SByte8> operator-(RValue<SByte8> lhs, RValue<SByte8> rhs)
2671         {
2672                 return RValue<SByte8>(Nucleus::createSub(lhs.value, rhs.value));
2673         }
2674
2675 //      RValue<SByte8> operator*(RValue<SByte8> lhs, RValue<SByte8> rhs)
2676 //      {
2677 //              return RValue<SByte8>(Nucleus::createMul(lhs.value, rhs.value));
2678 //      }
2679
2680 //      RValue<SByte8> operator/(RValue<SByte8> lhs, RValue<SByte8> rhs)
2681 //      {
2682 //              return RValue<SByte8>(Nucleus::createSDiv(lhs.value, rhs.value));
2683 //      }
2684
2685 //      RValue<SByte8> operator%(RValue<SByte8> lhs, RValue<SByte8> rhs)
2686 //      {
2687 //              return RValue<SByte8>(Nucleus::createSRem(lhs.value, rhs.value));
2688 //      }
2689
2690         RValue<SByte8> operator&(RValue<SByte8> lhs, RValue<SByte8> rhs)
2691         {
2692                 return RValue<SByte8>(Nucleus::createAnd(lhs.value, rhs.value));
2693         }
2694
2695         RValue<SByte8> operator|(RValue<SByte8> lhs, RValue<SByte8> rhs)
2696         {
2697                 return RValue<SByte8>(Nucleus::createOr(lhs.value, rhs.value));
2698         }
2699
2700         RValue<SByte8> operator^(RValue<SByte8> lhs, RValue<SByte8> rhs)
2701         {
2702                 return RValue<SByte8>(Nucleus::createXor(lhs.value, rhs.value));
2703         }
2704
2705 //      RValue<SByte8> operator<<(RValue<SByte8> lhs, unsigned char rhs)
2706 //      {
2707 //              return RValue<SByte8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2708 //      }
2709
2710 //      RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2711 //      {
2712 //              return RValue<SByte8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
2713 //      }
2714
2715         RValue<SByte8> operator+=(SByte8 &lhs, RValue<SByte8> rhs)
2716         {
2717                 return lhs = lhs + rhs;
2718         }
2719
2720         RValue<SByte8> operator-=(SByte8 &lhs, RValue<SByte8> rhs)
2721         {
2722                 return lhs = lhs - rhs;
2723         }
2724
2725 //      RValue<SByte8> operator*=(SByte8 &lhs, RValue<SByte8> rhs)
2726 //      {
2727 //              return lhs = lhs * rhs;
2728 //      }
2729
2730 //      RValue<SByte8> operator/=(SByte8 &lhs, RValue<SByte8> rhs)
2731 //      {
2732 //              return lhs = lhs / rhs;
2733 //      }
2734
2735 //      RValue<SByte8> operator%=(SByte8 &lhs, RValue<SByte8> rhs)
2736 //      {
2737 //              return lhs = lhs % rhs;
2738 //      }
2739
2740         RValue<SByte8> operator&=(SByte8 &lhs, RValue<SByte8> rhs)
2741         {
2742                 return lhs = lhs & rhs;
2743         }
2744
2745         RValue<SByte8> operator|=(SByte8 &lhs, RValue<SByte8> rhs)
2746         {
2747                 return lhs = lhs | rhs;
2748         }
2749
2750         RValue<SByte8> operator^=(SByte8 &lhs, RValue<SByte8> rhs)
2751         {
2752                 return lhs = lhs ^ rhs;
2753         }
2754
2755 //      RValue<SByte8> operator<<=(SByte8 &lhs, RValue<SByte8> rhs)
2756 //      {
2757 //              return lhs = lhs << rhs;
2758 //      }
2759
2760 //      RValue<SByte8> operator>>=(SByte8 &lhs, RValue<SByte8> rhs)
2761 //      {
2762 //              return lhs = lhs >> rhs;
2763 //      }
2764
2765 //      RValue<SByte8> operator+(RValue<SByte8> val)
2766 //      {
2767 //              return val;
2768 //      }
2769
2770 //      RValue<SByte8> operator-(RValue<SByte8> val)
2771 //      {
2772 //              return RValue<SByte8>(Nucleus::createNeg(val.value));
2773 //      }
2774
2775         RValue<SByte8> operator~(RValue<SByte8> val)
2776         {
2777                 return RValue<SByte8>(Nucleus::createNot(val.value));
2778         }
2779
2780         RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
2781         {
2782                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
2783                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2784                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2785                 auto paddsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2786                 paddsb->addArg(x.value);
2787                 paddsb->addArg(y.value);
2788                 ::basicBlock->appendInst(paddsb);
2789
2790                 return RValue<SByte8>(V(result));
2791         }
2792
2793         RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
2794         {
2795                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
2796                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2797                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2798                 auto psubsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2799                 psubsb->addArg(x.value);
2800                 psubsb->addArg(y.value);
2801                 ::basicBlock->appendInst(psubsb);
2802
2803                 return RValue<SByte8>(V(result));
2804         }
2805
2806         RValue<Short4> UnpackLow(RValue<SByte8> x, RValue<SByte8> y)
2807         {
2808                 int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};   // Real type is v16i8
2809                 return RValue<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
2810         }
2811
2812         RValue<Short4> UnpackHigh(RValue<SByte8> x, RValue<SByte8> y)
2813         {
2814                 int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};   // Real type is v16i8
2815                 auto lowHigh = RValue<Byte16>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
2816                 return As<Short4>(Swizzle(As<Int4>(lowHigh), 0xEE));
2817         }
2818
2819         RValue<Int> SignMask(RValue<SByte8> x)
2820         {
2821                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
2822                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2823                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2824                 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
2825                 movmsk->addArg(x.value);
2826                 ::basicBlock->appendInst(movmsk);
2827
2828                 return RValue<Int>(V(result));
2829         }
2830
2831         RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
2832         {
2833                 return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Sgt, x.value, y.value));
2834         }
2835
2836         RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
2837         {
2838                 return RValue<Byte8>(Nucleus::createICmpEQ(x.value, y.value));
2839         }
2840
2841         Type *SByte8::getType()
2842         {
2843                 return T(Type_v8i8);
2844         }
2845
2846         Byte16::Byte16(RValue<Byte16> rhs)
2847         {
2848                 storeValue(rhs.value);
2849         }
2850
2851         Byte16::Byte16(const Byte16 &rhs)
2852         {
2853                 Value *value = rhs.loadValue();
2854                 storeValue(value);
2855         }
2856
2857         Byte16::Byte16(const Reference<Byte16> &rhs)
2858         {
2859                 Value *value = rhs.loadValue();
2860                 storeValue(value);
2861         }
2862
2863         RValue<Byte16> Byte16::operator=(RValue<Byte16> rhs)
2864         {
2865                 storeValue(rhs.value);
2866
2867                 return rhs;
2868         }
2869
2870         RValue<Byte16> Byte16::operator=(const Byte16 &rhs)
2871         {
2872                 Value *value = rhs.loadValue();
2873                 storeValue(value);
2874
2875                 return RValue<Byte16>(value);
2876         }
2877
2878         RValue<Byte16> Byte16::operator=(const Reference<Byte16> &rhs)
2879         {
2880                 Value *value = rhs.loadValue();
2881                 storeValue(value);
2882
2883                 return RValue<Byte16>(value);
2884         }
2885
2886         Type *Byte16::getType()
2887         {
2888                 return T(Ice::IceType_v16i8);
2889         }
2890
2891         Type *SByte16::getType()
2892         {
2893                 return T(Ice::IceType_v16i8);
2894         }
2895
2896         Short2::Short2(RValue<Short4> cast)
2897         {
2898                 storeValue(Nucleus::createBitCast(cast.value, getType()));
2899         }
2900
2901         Type *Short2::getType()
2902         {
2903                 return T(Type_v2i16);
2904         }
2905
2906         UShort2::UShort2(RValue<UShort4> cast)
2907         {
2908                 storeValue(Nucleus::createBitCast(cast.value, getType()));
2909         }
2910
2911         Type *UShort2::getType()
2912         {
2913                 return T(Type_v2i16);
2914         }
2915
2916         Short4::Short4(RValue<Int> cast)
2917         {
2918                 Value *vector = loadValue();
2919                 Value *element = Nucleus::createTrunc(cast.value, Short::getType());
2920                 Value *insert = Nucleus::createInsertElement(vector, element, 0);
2921                 Value *swizzle = Swizzle(RValue<Short4>(insert), 0x00).value;
2922
2923                 storeValue(swizzle);
2924         }
2925
2926         Short4::Short4(RValue<Int4> cast)
2927         {
2928                 int pshufb[16] = {0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13};
2929                 Value *byte16 = Nucleus::createBitCast(cast.value, Byte16::getType());
2930                 Value *packed = Nucleus::createShuffleVector(byte16, byte16, pshufb);
2931
2932                 Value *int2 = RValue<Int2>(Int2(RValue<Int4>(packed))).value;
2933                 Value *short4 = Nucleus::createBitCast(int2, Short4::getType());
2934
2935                 storeValue(short4);
2936         }
2937
2938 //      Short4::Short4(RValue<Float> cast)
2939 //      {
2940 //      }
2941
2942         Short4::Short4(RValue<Float4> cast)
2943         {
2944                 assert(false && "UNIMPLEMENTED");
2945         }
2946
2947         Short4::Short4(short xyzw)
2948         {
2949                 int64_t constantVector[4] = {xyzw, xyzw, xyzw, xyzw};
2950                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
2951         }
2952
2953         Short4::Short4(short x, short y, short z, short w)
2954         {
2955                 int64_t constantVector[4] = {x, y, z, w};
2956                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
2957         }
2958
2959         Short4::Short4(RValue<Short4> rhs)
2960         {
2961                 storeValue(rhs.value);
2962         }
2963
2964         Short4::Short4(const Short4 &rhs)
2965         {
2966                 Value *value = rhs.loadValue();
2967                 storeValue(value);
2968         }
2969
2970         Short4::Short4(const Reference<Short4> &rhs)
2971         {
2972                 Value *value = rhs.loadValue();
2973                 storeValue(value);
2974         }
2975
2976         Short4::Short4(RValue<UShort4> rhs)
2977         {
2978                 storeValue(rhs.value);
2979         }
2980
2981         Short4::Short4(const UShort4 &rhs)
2982         {
2983                 storeValue(rhs.loadValue());
2984         }
2985
2986         Short4::Short4(const Reference<UShort4> &rhs)
2987         {
2988                 storeValue(rhs.loadValue());
2989         }
2990
2991         RValue<Short4> Short4::operator=(RValue<Short4> rhs)
2992         {
2993                 storeValue(rhs.value);
2994
2995                 return rhs;
2996         }
2997
2998         RValue<Short4> Short4::operator=(const Short4 &rhs)
2999         {
3000                 Value *value = rhs.loadValue();
3001                 storeValue(value);
3002
3003                 return RValue<Short4>(value);
3004         }
3005
3006         RValue<Short4> Short4::operator=(const Reference<Short4> &rhs)
3007         {
3008                 Value *value = rhs.loadValue();
3009                 storeValue(value);
3010
3011                 return RValue<Short4>(value);
3012         }
3013
3014         RValue<Short4> Short4::operator=(RValue<UShort4> rhs)
3015         {
3016                 storeValue(rhs.value);
3017
3018                 return RValue<Short4>(rhs);
3019         }
3020
3021         RValue<Short4> Short4::operator=(const UShort4 &rhs)
3022         {
3023                 Value *value = rhs.loadValue();
3024                 storeValue(value);
3025
3026                 return RValue<Short4>(value);
3027         }
3028
3029         RValue<Short4> Short4::operator=(const Reference<UShort4> &rhs)
3030         {
3031                 Value *value = rhs.loadValue();
3032                 storeValue(value);
3033
3034                 return RValue<Short4>(value);
3035         }
3036
3037         RValue<Short4> operator+(RValue<Short4> lhs, RValue<Short4> rhs)
3038         {
3039                 return RValue<Short4>(Nucleus::createAdd(lhs.value, rhs.value));
3040         }
3041
3042         RValue<Short4> operator-(RValue<Short4> lhs, RValue<Short4> rhs)
3043         {
3044                 return RValue<Short4>(Nucleus::createSub(lhs.value, rhs.value));
3045         }
3046
3047         RValue<Short4> operator*(RValue<Short4> lhs, RValue<Short4> rhs)
3048         {
3049                 return RValue<Short4>(Nucleus::createMul(lhs.value, rhs.value));
3050         }
3051
3052 //      RValue<Short4> operator/(RValue<Short4> lhs, RValue<Short4> rhs)
3053 //      {
3054 //              return RValue<Short4>(Nucleus::createSDiv(lhs.value, rhs.value));
3055 //      }
3056
3057 //      RValue<Short4> operator%(RValue<Short4> lhs, RValue<Short4> rhs)
3058 //      {
3059 //              return RValue<Short4>(Nucleus::createSRem(lhs.value, rhs.value));
3060 //      }
3061
3062         RValue<Short4> operator&(RValue<Short4> lhs, RValue<Short4> rhs)
3063         {
3064                 return RValue<Short4>(Nucleus::createAnd(lhs.value, rhs.value));
3065         }
3066
3067         RValue<Short4> operator|(RValue<Short4> lhs, RValue<Short4> rhs)
3068         {
3069                 return RValue<Short4>(Nucleus::createOr(lhs.value, rhs.value));
3070         }
3071
3072         RValue<Short4> operator^(RValue<Short4> lhs, RValue<Short4> rhs)
3073         {
3074                 return RValue<Short4>(Nucleus::createXor(lhs.value, rhs.value));
3075         }
3076
3077         RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
3078         {
3079                 return RValue<Short4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
3080         }
3081
3082         RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
3083         {
3084                 return RValue<Short4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
3085         }
3086
3087         RValue<Short4> operator+=(Short4 &lhs, RValue<Short4> rhs)
3088         {
3089                 return lhs = lhs + rhs;
3090         }
3091
3092         RValue<Short4> operator-=(Short4 &lhs, RValue<Short4> rhs)
3093         {
3094                 return lhs = lhs - rhs;
3095         }
3096
3097         RValue<Short4> operator*=(Short4 &lhs, RValue<Short4> rhs)
3098         {
3099                 return lhs = lhs * rhs;
3100         }
3101
3102 //      RValue<Short4> operator/=(Short4 &lhs, RValue<Short4> rhs)
3103 //      {
3104 //              return lhs = lhs / rhs;
3105 //      }
3106
3107 //      RValue<Short4> operator%=(Short4 &lhs, RValue<Short4> rhs)
3108 //      {
3109 //              return lhs = lhs % rhs;
3110 //      }
3111
3112         RValue<Short4> operator&=(Short4 &lhs, RValue<Short4> rhs)
3113         {
3114                 return lhs = lhs & rhs;
3115         }
3116
3117         RValue<Short4> operator|=(Short4 &lhs, RValue<Short4> rhs)
3118         {
3119                 return lhs = lhs | rhs;
3120         }
3121
3122         RValue<Short4> operator^=(Short4 &lhs, RValue<Short4> rhs)
3123         {
3124                 return lhs = lhs ^ rhs;
3125         }
3126
3127         RValue<Short4> operator<<=(Short4 &lhs, unsigned char rhs)
3128         {
3129                 return lhs = lhs << rhs;
3130         }
3131
3132         RValue<Short4> operator>>=(Short4 &lhs, unsigned char rhs)
3133         {
3134                 return lhs = lhs >> rhs;
3135         }
3136
3137 //      RValue<Short4> operator+(RValue<Short4> val)
3138 //      {
3139 //              return val;
3140 //      }
3141
3142         RValue<Short4> operator-(RValue<Short4> val)
3143         {
3144                 return RValue<Short4>(Nucleus::createNeg(val.value));
3145         }
3146
3147         RValue<Short4> operator~(RValue<Short4> val)
3148         {
3149                 return RValue<Short4>(Nucleus::createNot(val.value));
3150         }
3151
3152         RValue<Short4> RoundShort4(RValue<Float4> cast)
3153         {
3154                 RValue<Int4> int4 = RoundInt(cast);
3155                 return As<Short4>(Pack(int4, int4));
3156         }
3157
3158         RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
3159         {
3160                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
3161                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value, y.value);
3162                 ::basicBlock->appendInst(cmp);
3163
3164                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3165                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3166                 ::basicBlock->appendInst(select);
3167
3168                 return RValue<Short4>(V(result));
3169         }
3170
3171         RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
3172         {
3173                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
3174                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value, y.value);
3175                 ::basicBlock->appendInst(cmp);
3176
3177                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3178                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3179                 ::basicBlock->appendInst(select);
3180
3181                 return RValue<Short4>(V(result));
3182         }
3183
3184         RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
3185         {
3186                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3187                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3188                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3189                 auto paddsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3190                 paddsw->addArg(x.value);
3191                 paddsw->addArg(y.value);
3192                 ::basicBlock->appendInst(paddsw);
3193
3194                 return RValue<Short4>(V(result));
3195         }
3196
3197         RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
3198         {
3199                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3200                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3201                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3202                 auto psubsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3203                 psubsw->addArg(x.value);
3204                 psubsw->addArg(y.value);
3205                 ::basicBlock->appendInst(psubsw);
3206
3207                 return RValue<Short4>(V(result));
3208         }
3209
3210         RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
3211         {
3212                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3213                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::MultiplyHighSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3214                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3215                 auto pmulhw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3216                 pmulhw->addArg(x.value);
3217                 pmulhw->addArg(y.value);
3218                 ::basicBlock->appendInst(pmulhw);
3219
3220                 return RValue<Short4>(V(result));
3221         }
3222
3223         RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
3224         {
3225                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3226                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::MultiplyAddPairs, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3227                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3228                 auto pmaddwd = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3229                 pmaddwd->addArg(x.value);
3230                 pmaddwd->addArg(y.value);
3231                 ::basicBlock->appendInst(pmaddwd);
3232
3233                 return RValue<Int2>(V(result));
3234         }
3235
3236         RValue<SByte8> Pack(RValue<Short4> x, RValue<Short4> y)
3237         {
3238                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
3239                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3240                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3241                 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3242                 pack->addArg(x.value);
3243                 pack->addArg(y.value);
3244                 ::basicBlock->appendInst(pack);
3245
3246                 return As<SByte8>(Swizzle(As<Int4>(V(result)), 0x88));
3247         }
3248
3249         RValue<Int2> UnpackLow(RValue<Short4> x, RValue<Short4> y)
3250         {
3251                 int shuffle[8] = {0, 8, 1, 9, 2, 10, 3, 11};   // Real type is v8i16
3252                 return RValue<Int2>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
3253         }
3254
3255         RValue<Int2> UnpackHigh(RValue<Short4> x, RValue<Short4> y)
3256         {
3257                 int shuffle[8] = {0, 8, 1, 9, 2, 10, 3, 11};   // Real type is v8i16
3258                 auto lowHigh = RValue<Short8>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
3259                 return As<Int2>(Swizzle(As<Int4>(lowHigh), 0xEE));
3260         }
3261
3262         RValue<Short4> Swizzle(RValue<Short4> x, unsigned char select)
3263         {
3264                 // Real type is v8i16
3265                 int shuffle[8] =
3266                 {
3267                         (select >> 0) & 0x03,
3268                         (select >> 2) & 0x03,
3269                         (select >> 4) & 0x03,
3270                         (select >> 6) & 0x03,
3271                         (select >> 0) & 0x03,
3272                         (select >> 2) & 0x03,
3273                         (select >> 4) & 0x03,
3274                         (select >> 6) & 0x03,
3275                 };
3276
3277                 return RValue<Short4>(Nucleus::createShuffleVector(x.value, x.value, shuffle));
3278         }
3279
3280         RValue<Short4> Insert(RValue<Short4> val, RValue<Short> element, int i)
3281         {
3282                 return RValue<Short4>(Nucleus::createInsertElement(val.value, element.value, i));
3283         }
3284
3285         RValue<Short> Extract(RValue<Short4> val, int i)
3286         {
3287                 return RValue<Short>(Nucleus::createExtractElement(val.value, Short::getType(), i));
3288         }
3289
3290         RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
3291         {
3292                 return RValue<Short4>(createIntCompare(Ice::InstIcmp::Sgt, x.value, y.value));
3293         }
3294
3295         RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
3296         {
3297                 return RValue<Short4>(Nucleus::createICmpEQ(x.value, y.value));
3298         }
3299
3300         Type *Short4::getType()
3301         {
3302                 return T(Type_v4i16);
3303         }
3304
3305         UShort4::UShort4(RValue<Int4> cast)
3306         {
3307                 *this = Short4(cast);
3308         }
3309
3310         UShort4::UShort4(RValue<Float4> cast, bool saturate)
3311         {
3312                 if(saturate)
3313                 {
3314                         if(true)   // SSE 4.1
3315                         {
3316                                 Int4 int4(Min(cast, Float4(0xFFFF)));   // packusdw takes care of 0x0000 saturation
3317                                 *this = As<Short4>(Pack(As<UInt4>(int4), As<UInt4>(int4)));
3318                         }
3319                         else
3320                         {
3321                                 *this = Short4(Int4(Max(Min(cast, Float4(0xFFFF)), Float4(0x0000))));
3322                         }
3323                 }
3324                 else
3325                 {
3326                         *this = Short4(Int4(cast));
3327                 }
3328         }
3329
3330         UShort4::UShort4(unsigned short xyzw)
3331         {
3332                 int64_t constantVector[4] = {xyzw, xyzw, xyzw, xyzw};
3333                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3334         }
3335
3336         UShort4::UShort4(unsigned short x, unsigned short y, unsigned short z, unsigned short w)
3337         {
3338                 int64_t constantVector[4] = {x, y, z, w};
3339                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3340         }
3341
3342         UShort4::UShort4(RValue<UShort4> rhs)
3343         {
3344                 storeValue(rhs.value);
3345         }
3346
3347         UShort4::UShort4(const UShort4 &rhs)
3348         {
3349                 Value *value = rhs.loadValue();
3350                 storeValue(value);
3351         }
3352
3353         UShort4::UShort4(const Reference<UShort4> &rhs)
3354         {
3355                 Value *value = rhs.loadValue();
3356                 storeValue(value);
3357         }
3358
3359         UShort4::UShort4(RValue<Short4> rhs)
3360         {
3361                 storeValue(rhs.value);
3362         }
3363
3364         UShort4::UShort4(const Short4 &rhs)
3365         {
3366                 Value *value = rhs.loadValue();
3367                 storeValue(value);
3368         }
3369
3370         UShort4::UShort4(const Reference<Short4> &rhs)
3371         {
3372                 Value *value = rhs.loadValue();
3373                 storeValue(value);
3374         }
3375
3376         RValue<UShort4> UShort4::operator=(RValue<UShort4> rhs)
3377         {
3378                 storeValue(rhs.value);
3379
3380                 return rhs;
3381         }
3382
3383         RValue<UShort4> UShort4::operator=(const UShort4 &rhs)
3384         {
3385                 Value *value = rhs.loadValue();
3386                 storeValue(value);
3387
3388                 return RValue<UShort4>(value);
3389         }
3390
3391         RValue<UShort4> UShort4::operator=(const Reference<UShort4> &rhs)
3392         {
3393                 Value *value = rhs.loadValue();
3394                 storeValue(value);
3395
3396                 return RValue<UShort4>(value);
3397         }
3398
3399         RValue<UShort4> UShort4::operator=(RValue<Short4> rhs)
3400         {
3401                 storeValue(rhs.value);
3402
3403                 return RValue<UShort4>(rhs);
3404         }
3405
3406         RValue<UShort4> UShort4::operator=(const Short4 &rhs)
3407         {
3408                 Value *value = rhs.loadValue();
3409                 storeValue(value);
3410
3411                 return RValue<UShort4>(value);
3412         }
3413
3414         RValue<UShort4> UShort4::operator=(const Reference<Short4> &rhs)
3415         {
3416                 Value *value = rhs.loadValue();
3417                 storeValue(value);
3418
3419                 return RValue<UShort4>(value);
3420         }
3421
3422         RValue<UShort4> operator+(RValue<UShort4> lhs, RValue<UShort4> rhs)
3423         {
3424                 return RValue<UShort4>(Nucleus::createAdd(lhs.value, rhs.value));
3425         }
3426
3427         RValue<UShort4> operator-(RValue<UShort4> lhs, RValue<UShort4> rhs)
3428         {
3429                 return RValue<UShort4>(Nucleus::createSub(lhs.value, rhs.value));
3430         }
3431
3432         RValue<UShort4> operator*(RValue<UShort4> lhs, RValue<UShort4> rhs)
3433         {
3434                 return RValue<UShort4>(Nucleus::createMul(lhs.value, rhs.value));
3435         }
3436
3437         RValue<UShort4> operator&(RValue<UShort4> lhs, RValue<UShort4> rhs)
3438         {
3439                 return RValue<UShort4>(Nucleus::createAnd(lhs.value, rhs.value));
3440         }
3441
3442         RValue<UShort4> operator|(RValue<UShort4> lhs, RValue<UShort4> rhs)
3443         {
3444                 return RValue<UShort4>(Nucleus::createOr(lhs.value, rhs.value));
3445         }
3446
3447         RValue<UShort4> operator^(RValue<UShort4> lhs, RValue<UShort4> rhs)
3448         {
3449                 return RValue<UShort4>(Nucleus::createXor(lhs.value, rhs.value));
3450         }
3451
3452         RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
3453         {
3454                 return RValue<UShort4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
3455         }
3456
3457         RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
3458         {
3459                 return RValue<UShort4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
3460         }
3461
3462         RValue<UShort4> operator<<=(UShort4 &lhs, unsigned char rhs)
3463         {
3464                 return lhs = lhs << rhs;
3465         }
3466
3467         RValue<UShort4> operator>>=(UShort4 &lhs, unsigned char rhs)
3468         {
3469                 return lhs = lhs >> rhs;
3470         }
3471
3472         RValue<UShort4> operator~(RValue<UShort4> val)
3473         {
3474                 return RValue<UShort4>(Nucleus::createNot(val.value));
3475         }
3476
3477         RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
3478         {
3479                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
3480                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value, y.value);
3481                 ::basicBlock->appendInst(cmp);
3482
3483                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3484                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3485                 ::basicBlock->appendInst(select);
3486
3487                 return RValue<UShort4>(V(result));
3488         }
3489
3490         RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
3491         {
3492                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
3493                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value, y.value);
3494                 ::basicBlock->appendInst(cmp);
3495
3496                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3497                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3498                 ::basicBlock->appendInst(select);
3499
3500                 return RValue<UShort4>(V(result));
3501         }
3502
3503         RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
3504         {
3505                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3506                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3507                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3508                 auto paddusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3509                 paddusw->addArg(x.value);
3510                 paddusw->addArg(y.value);
3511                 ::basicBlock->appendInst(paddusw);
3512
3513                 return RValue<UShort4>(V(result));
3514         }
3515
3516         RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
3517         {
3518                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3519                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3520                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3521                 auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3522                 psubusw->addArg(x.value);
3523                 psubusw->addArg(y.value);
3524                 ::basicBlock->appendInst(psubusw);
3525
3526                 return RValue<UShort4>(V(result));
3527         }
3528
3529         RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
3530         {
3531                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3532                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::MultiplyHighUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3533                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3534                 auto pmulhuw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3535                 pmulhuw->addArg(x.value);
3536                 pmulhuw->addArg(y.value);
3537                 ::basicBlock->appendInst(pmulhuw);
3538
3539                 return RValue<UShort4>(V(result));
3540         }
3541
3542         RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
3543         {
3544                 assert(false && "UNIMPLEMENTED"); return RValue<UShort4>(V(nullptr));
3545         }
3546
3547         RValue<Byte8> Pack(RValue<UShort4> x, RValue<UShort4> y)
3548         {
3549                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
3550                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3551                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3552                 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3553                 pack->addArg(x.value);
3554                 pack->addArg(y.value);
3555                 ::basicBlock->appendInst(pack);
3556
3557                 return As<Byte8>(Swizzle(As<Int4>(V(result)), 0x88));
3558         }
3559
3560         Type *UShort4::getType()
3561         {
3562                 return T(Type_v4i16);
3563         }
3564
3565         Short8::Short8(short c)
3566         {
3567                 int64_t constantVector[8] = {c, c, c, c, c, c, c, c};
3568                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3569         }
3570
3571         Short8::Short8(short c0, short c1, short c2, short c3, short c4, short c5, short c6, short c7)
3572         {
3573                 int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7};
3574                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3575         }
3576
3577         Short8::Short8(RValue<Short8> rhs)
3578         {
3579                 storeValue(rhs.value);
3580         }
3581
3582         Short8::Short8(const Reference<Short8> &rhs)
3583         {
3584                 Value *value = rhs.loadValue();
3585                 storeValue(value);
3586         }
3587
3588         Short8::Short8(RValue<Short4> lo, RValue<Short4> hi)
3589         {
3590                 int shuffle[8] = {0, 1, 2, 3, 8, 9, 10, 11};   // Real type is v8i16
3591                 Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
3592
3593                 storeValue(packed);
3594         }
3595
3596         RValue<Short8> operator+(RValue<Short8> lhs, RValue<Short8> rhs)
3597         {
3598                 return RValue<Short8>(Nucleus::createAdd(lhs.value, rhs.value));
3599         }
3600
3601         RValue<Short8> operator&(RValue<Short8> lhs, RValue<Short8> rhs)
3602         {
3603                 return RValue<Short8>(Nucleus::createAnd(lhs.value, rhs.value));
3604         }
3605
3606         RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
3607         {
3608                 return RValue<Short8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
3609         }
3610
3611         RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
3612         {
3613                 return RValue<Short8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
3614         }
3615
3616         RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
3617         {
3618                 assert(false && "UNIMPLEMENTED"); return RValue<Int4>(V(nullptr));
3619         }
3620
3621         RValue<Int4> Abs(RValue<Int4> x)
3622         {
3623                 auto negative = x >> 31;
3624                 return (x ^ negative) - negative;
3625         }
3626
3627         RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
3628         {
3629                 assert(false && "UNIMPLEMENTED"); return RValue<Short8>(V(nullptr));
3630         }
3631
3632         Type *Short8::getType()
3633         {
3634                 return T(Ice::IceType_v8i16);
3635         }
3636
3637         UShort8::UShort8(unsigned short c)
3638         {
3639                 int64_t constantVector[8] = {c, c, c, c, c, c, c, c};
3640                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3641         }
3642
3643         UShort8::UShort8(unsigned short c0, unsigned short c1, unsigned short c2, unsigned short c3, unsigned short c4, unsigned short c5, unsigned short c6, unsigned short c7)
3644         {
3645                 int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7};
3646                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3647         }
3648
3649         UShort8::UShort8(RValue<UShort8> rhs)
3650         {
3651                 storeValue(rhs.value);
3652         }
3653
3654         UShort8::UShort8(const Reference<UShort8> &rhs)
3655         {
3656                 Value *value = rhs.loadValue();
3657                 storeValue(value);
3658         }
3659
3660         UShort8::UShort8(RValue<UShort4> lo, RValue<UShort4> hi)
3661         {
3662                 int shuffle[8] = {0, 1, 2, 3, 8, 9, 10, 11};   // Real type is v8i16
3663                 Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
3664
3665                 storeValue(packed);
3666         }
3667
3668         RValue<UShort8> UShort8::operator=(RValue<UShort8> rhs)
3669         {
3670                 storeValue(rhs.value);
3671
3672                 return rhs;
3673         }
3674
3675         RValue<UShort8> UShort8::operator=(const UShort8 &rhs)
3676         {
3677                 Value *value = rhs.loadValue();
3678                 storeValue(value);
3679
3680                 return RValue<UShort8>(value);
3681         }
3682
3683         RValue<UShort8> UShort8::operator=(const Reference<UShort8> &rhs)
3684         {
3685                 Value *value = rhs.loadValue();
3686                 storeValue(value);
3687
3688                 return RValue<UShort8>(value);
3689         }
3690
3691         RValue<UShort8> operator&(RValue<UShort8> lhs, RValue<UShort8> rhs)
3692         {
3693                 return RValue<UShort8>(Nucleus::createAnd(lhs.value, rhs.value));
3694         }
3695
3696         RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
3697         {
3698                 return RValue<UShort8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
3699         }
3700
3701         RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
3702         {
3703                 return RValue<UShort8>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
3704         }
3705
3706         RValue<UShort8> operator+(RValue<UShort8> lhs, RValue<UShort8> rhs)
3707         {
3708                 return RValue<UShort8>(Nucleus::createAdd(lhs.value, rhs.value));
3709         }
3710
3711         RValue<UShort8> operator*(RValue<UShort8> lhs, RValue<UShort8> rhs)
3712         {
3713                 return RValue<UShort8>(Nucleus::createMul(lhs.value, rhs.value));
3714         }
3715
3716         RValue<UShort8> operator+=(UShort8 &lhs, RValue<UShort8> rhs)
3717         {
3718                 return lhs = lhs + rhs;
3719         }
3720
3721         RValue<UShort8> operator~(RValue<UShort8> val)
3722         {
3723                 return RValue<UShort8>(Nucleus::createNot(val.value));
3724         }
3725
3726         RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7)
3727         {
3728                 assert(false && "UNIMPLEMENTED"); return RValue<UShort8>(V(nullptr));
3729         }
3730
3731         RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
3732         {
3733                 assert(false && "UNIMPLEMENTED"); return RValue<UShort8>(V(nullptr));
3734         }
3735
3736         // FIXME: Implement as Shuffle(x, y, Select(i0, ..., i16)) and Shuffle(x, y, SELECT_PACK_REPEAT(element))
3737 //      RValue<UShort8> PackRepeat(RValue<Byte16> x, RValue<Byte16> y, int element)
3738 //      {
3739 //              assert(false && "UNIMPLEMENTED"); return RValue<UShort8>(V(nullptr));
3740 //      }
3741
3742         Type *UShort8::getType()
3743         {
3744                 return T(Ice::IceType_v8i16);
3745         }
3746
3747         Int::Int(Argument<Int> argument)
3748         {
3749                 storeValue(argument.value);
3750         }
3751
3752         Int::Int(RValue<Byte> cast)
3753         {
3754                 Value *integer = Nucleus::createZExt(cast.value, Int::getType());
3755
3756                 storeValue(integer);
3757         }
3758
3759         Int::Int(RValue<SByte> cast)
3760         {
3761                 Value *integer = Nucleus::createSExt(cast.value, Int::getType());
3762
3763                 storeValue(integer);
3764         }
3765
3766         Int::Int(RValue<Short> cast)
3767         {
3768                 Value *integer = Nucleus::createSExt(cast.value, Int::getType());
3769
3770                 storeValue(integer);
3771         }
3772
3773         Int::Int(RValue<UShort> cast)
3774         {
3775                 Value *integer = Nucleus::createZExt(cast.value, Int::getType());
3776
3777                 storeValue(integer);
3778         }
3779
3780         Int::Int(RValue<Int2> cast)
3781         {
3782                 *this = Extract(cast, 0);
3783         }
3784
3785         Int::Int(RValue<Long> cast)
3786         {
3787                 Value *integer = Nucleus::createTrunc(cast.value, Int::getType());
3788
3789                 storeValue(integer);
3790         }
3791
3792         Int::Int(RValue<Float> cast)
3793         {
3794                 Value *integer = Nucleus::createFPToSI(cast.value, Int::getType());
3795
3796                 storeValue(integer);
3797         }
3798
3799         Int::Int(int x)
3800         {
3801                 storeValue(Nucleus::createConstantInt(x));
3802         }
3803
3804         Int::Int(RValue<Int> rhs)
3805         {
3806                 storeValue(rhs.value);
3807         }
3808
3809         Int::Int(RValue<UInt> rhs)
3810         {
3811                 storeValue(rhs.value);
3812         }
3813
3814         Int::Int(const Int &rhs)
3815         {
3816                 Value *value = rhs.loadValue();
3817                 storeValue(value);
3818         }
3819
3820         Int::Int(const Reference<Int> &rhs)
3821         {
3822                 Value *value = rhs.loadValue();
3823                 storeValue(value);
3824         }
3825
3826         Int::Int(const UInt &rhs)
3827         {
3828                 Value *value = rhs.loadValue();
3829                 storeValue(value);
3830         }
3831
3832         Int::Int(const Reference<UInt> &rhs)
3833         {
3834                 Value *value = rhs.loadValue();
3835                 storeValue(value);
3836         }
3837
3838         RValue<Int> Int::operator=(int rhs)
3839         {
3840                 return RValue<Int>(storeValue(Nucleus::createConstantInt(rhs)));
3841         }
3842
3843         RValue<Int> Int::operator=(RValue<Int> rhs)
3844         {
3845                 storeValue(rhs.value);
3846
3847                 return rhs;
3848         }
3849
3850         RValue<Int> Int::operator=(RValue<UInt> rhs)
3851         {
3852                 storeValue(rhs.value);
3853
3854                 return RValue<Int>(rhs);
3855         }
3856
3857         RValue<Int> Int::operator=(const Int &rhs)
3858         {
3859                 Value *value = rhs.loadValue();
3860                 storeValue(value);
3861
3862                 return RValue<Int>(value);
3863         }
3864
3865         RValue<Int> Int::operator=(const Reference<Int> &rhs)
3866         {
3867                 Value *value = rhs.loadValue();
3868                 storeValue(value);
3869
3870                 return RValue<Int>(value);
3871         }
3872
3873         RValue<Int> Int::operator=(const UInt &rhs)
3874         {
3875                 Value *value = rhs.loadValue();
3876                 storeValue(value);
3877
3878                 return RValue<Int>(value);
3879         }
3880
3881         RValue<Int> Int::operator=(const Reference<UInt> &rhs)
3882         {
3883                 Value *value = rhs.loadValue();
3884                 storeValue(value);
3885
3886                 return RValue<Int>(value);
3887         }
3888
3889         RValue<Int> operator+(RValue<Int> lhs, RValue<Int> rhs)
3890         {
3891                 return RValue<Int>(Nucleus::createAdd(lhs.value, rhs.value));
3892         }
3893
3894         RValue<Int> operator-(RValue<Int> lhs, RValue<Int> rhs)
3895         {
3896                 return RValue<Int>(Nucleus::createSub(lhs.value, rhs.value));
3897         }
3898
3899         RValue<Int> operator*(RValue<Int> lhs, RValue<Int> rhs)
3900         {
3901                 return RValue<Int>(Nucleus::createMul(lhs.value, rhs.value));
3902         }
3903
3904         RValue<Int> operator/(RValue<Int> lhs, RValue<Int> rhs)
3905         {
3906                 return RValue<Int>(Nucleus::createSDiv(lhs.value, rhs.value));
3907         }
3908
3909         RValue<Int> operator%(RValue<Int> lhs, RValue<Int> rhs)
3910         {
3911                 return RValue<Int>(Nucleus::createSRem(lhs.value, rhs.value));
3912         }
3913
3914         RValue<Int> operator&(RValue<Int> lhs, RValue<Int> rhs)
3915         {
3916                 return RValue<Int>(Nucleus::createAnd(lhs.value, rhs.value));
3917         }
3918
3919         RValue<Int> operator|(RValue<Int> lhs, RValue<Int> rhs)
3920         {
3921                 return RValue<Int>(Nucleus::createOr(lhs.value, rhs.value));
3922         }
3923
3924         RValue<Int> operator^(RValue<Int> lhs, RValue<Int> rhs)
3925         {
3926                 return RValue<Int>(Nucleus::createXor(lhs.value, rhs.value));
3927         }
3928
3929         RValue<Int> operator<<(RValue<Int> lhs, RValue<Int> rhs)
3930         {
3931                 return RValue<Int>(Nucleus::createShl(lhs.value, rhs.value));
3932         }
3933
3934         RValue<Int> operator>>(RValue<Int> lhs, RValue<Int> rhs)
3935         {
3936                 return RValue<Int>(Nucleus::createAShr(lhs.value, rhs.value));
3937         }
3938
3939         RValue<Int> operator+=(Int &lhs, RValue<Int> rhs)
3940         {
3941                 return lhs = lhs + rhs;
3942         }
3943
3944         RValue<Int> operator-=(Int &lhs, RValue<Int> rhs)
3945         {
3946                 return lhs = lhs - rhs;
3947         }
3948
3949         RValue<Int> operator*=(Int &lhs, RValue<Int> rhs)
3950         {
3951                 return lhs = lhs * rhs;
3952         }
3953
3954         RValue<Int> operator/=(Int &lhs, RValue<Int> rhs)
3955         {
3956                 return lhs = lhs / rhs;
3957         }
3958
3959         RValue<Int> operator%=(Int &lhs, RValue<Int> rhs)
3960         {
3961                 return lhs = lhs % rhs;
3962         }
3963
3964         RValue<Int> operator&=(Int &lhs, RValue<Int> rhs)
3965         {
3966                 return lhs = lhs & rhs;
3967         }
3968
3969         RValue<Int> operator|=(Int &lhs, RValue<Int> rhs)
3970         {
3971                 return lhs = lhs | rhs;
3972         }
3973
3974         RValue<Int> operator^=(Int &lhs, RValue<Int> rhs)
3975         {
3976                 return lhs = lhs ^ rhs;
3977         }
3978
3979         RValue<Int> operator<<=(Int &lhs, RValue<Int> rhs)
3980         {
3981                 return lhs = lhs << rhs;
3982         }
3983
3984         RValue<Int> operator>>=(Int &lhs, RValue<Int> rhs)
3985         {
3986                 return lhs = lhs >> rhs;
3987         }
3988
3989         RValue<Int> operator+(RValue<Int> val)
3990         {
3991                 return val;
3992         }
3993
3994         RValue<Int> operator-(RValue<Int> val)
3995         {
3996                 return RValue<Int>(Nucleus::createNeg(val.value));
3997         }
3998
3999         RValue<Int> operator~(RValue<Int> val)
4000         {
4001                 return RValue<Int>(Nucleus::createNot(val.value));
4002         }
4003
4004         RValue<Int> operator++(Int &val, int)   // Post-increment
4005         {
4006                 RValue<Int> res = val;
4007                 val += 1;
4008                 return res;
4009         }
4010
4011         const Int &operator++(Int &val)   // Pre-increment
4012         {
4013                 val += 1;
4014                 return val;
4015         }
4016
4017         RValue<Int> operator--(Int &val, int)   // Post-decrement
4018         {
4019                 RValue<Int> res = val;
4020                 val -= 1;
4021                 return res;
4022         }
4023
4024         const Int &operator--(Int &val)   // Pre-decrement
4025         {
4026                 val -= 1;
4027                 return val;
4028         }
4029
4030         RValue<Bool> operator<(RValue<Int> lhs, RValue<Int> rhs)
4031         {
4032                 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
4033         }
4034
4035         RValue<Bool> operator<=(RValue<Int> lhs, RValue<Int> rhs)
4036         {
4037                 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
4038         }
4039
4040         RValue<Bool> operator>(RValue<Int> lhs, RValue<Int> rhs)
4041         {
4042                 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
4043         }
4044
4045         RValue<Bool> operator>=(RValue<Int> lhs, RValue<Int> rhs)
4046         {
4047                 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
4048         }
4049
4050         RValue<Bool> operator!=(RValue<Int> lhs, RValue<Int> rhs)
4051         {
4052                 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
4053         }
4054
4055         RValue<Bool> operator==(RValue<Int> lhs, RValue<Int> rhs)
4056         {
4057                 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
4058         }
4059
4060         RValue<Int> Max(RValue<Int> x, RValue<Int> y)
4061         {
4062                 return IfThenElse(x > y, x, y);
4063         }
4064
4065         RValue<Int> Min(RValue<Int> x, RValue<Int> y)
4066         {
4067                 return IfThenElse(x < y, x, y);
4068         }
4069
4070         RValue<Int> Clamp(RValue<Int> x, RValue<Int> min, RValue<Int> max)
4071         {
4072                 return Min(Max(x, min), max);
4073         }
4074
4075         RValue<Int> RoundInt(RValue<Float> cast)
4076         {
4077                 RValue<Float> rounded = Round(cast);
4078
4079                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
4080                 auto round = Ice::InstCast::create(::function, Ice::InstCast::Fptosi, result, rounded.value);
4081                 ::basicBlock->appendInst(round);
4082
4083                 return RValue<Int>(V(result));
4084         }
4085
4086         Type *Int::getType()
4087         {
4088                 return T(Ice::IceType_i32);
4089         }
4090
4091         Long::Long(RValue<Int> cast)
4092         {
4093                 Value *integer = Nucleus::createSExt(cast.value, Long::getType());
4094
4095                 storeValue(integer);
4096         }
4097
4098         Long::Long(RValue<UInt> cast)
4099         {
4100                 Value *integer = Nucleus::createZExt(cast.value, Long::getType());
4101
4102                 storeValue(integer);
4103         }
4104
4105         Long::Long(RValue<Long> rhs)
4106         {
4107                 storeValue(rhs.value);
4108         }
4109
4110         RValue<Long> Long::operator=(int64_t rhs)
4111         {
4112                 return RValue<Long>(storeValue(Nucleus::createConstantLong(rhs)));
4113         }
4114
4115         RValue<Long> Long::operator=(RValue<Long> rhs)
4116         {
4117                 storeValue(rhs.value);
4118
4119                 return rhs;
4120         }
4121
4122         RValue<Long> Long::operator=(const Long &rhs)
4123         {
4124                 Value *value = rhs.loadValue();
4125                 storeValue(value);
4126
4127                 return RValue<Long>(value);
4128         }
4129
4130         RValue<Long> Long::operator=(const Reference<Long> &rhs)
4131         {
4132                 Value *value = rhs.loadValue();
4133                 storeValue(value);
4134
4135                 return RValue<Long>(value);
4136         }
4137
4138         RValue<Long> operator+(RValue<Long> lhs, RValue<Long> rhs)
4139         {
4140                 return RValue<Long>(Nucleus::createAdd(lhs.value, rhs.value));
4141         }
4142
4143         RValue<Long> operator-(RValue<Long> lhs, RValue<Long> rhs)
4144         {
4145                 return RValue<Long>(Nucleus::createSub(lhs.value, rhs.value));
4146         }
4147
4148         RValue<Long> operator+=(Long &lhs, RValue<Long> rhs)
4149         {
4150                 return lhs = lhs + rhs;
4151         }
4152
4153         RValue<Long> operator-=(Long &lhs, RValue<Long> rhs)
4154         {
4155                 return lhs = lhs - rhs;
4156         }
4157
4158         RValue<Long> AddAtomic(RValue<Pointer<Long> > x, RValue<Long> y)
4159         {
4160                 return RValue<Long>(Nucleus::createAtomicAdd(x.value, y.value));
4161         }
4162
4163         Type *Long::getType()
4164         {
4165                 return T(Ice::IceType_i64);
4166         }
4167
4168         UInt::UInt(Argument<UInt> argument)
4169         {
4170                 storeValue(argument.value);
4171         }
4172
4173         UInt::UInt(RValue<UShort> cast)
4174         {
4175                 Value *integer = Nucleus::createZExt(cast.value, UInt::getType());
4176
4177                 storeValue(integer);
4178         }
4179
4180         UInt::UInt(RValue<Long> cast)
4181         {
4182                 Value *integer = Nucleus::createTrunc(cast.value, UInt::getType());
4183
4184                 storeValue(integer);
4185         }
4186
4187         UInt::UInt(RValue<Float> cast)
4188         {
4189                 // Smallest positive value representable in UInt, but not in Int
4190                 const unsigned int ustart = 0x80000000u;
4191                 const float ustartf = float(ustart);
4192
4193                 // If the value is negative, store 0, otherwise store the result of the conversion
4194                 storeValue((~(As<Int>(cast) >> 31) &
4195                 // Check if the value can be represented as an Int
4196                         IfThenElse(cast >= ustartf,
4197                 // If the value is too large, subtract ustart and re-add it after conversion.
4198                                 As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)),
4199                 // Otherwise, just convert normally
4200                                 Int(cast))).value);
4201         }
4202
4203         UInt::UInt(int x)
4204         {
4205                 storeValue(Nucleus::createConstantInt(x));
4206         }
4207
4208         UInt::UInt(unsigned int x)
4209         {
4210                 storeValue(Nucleus::createConstantInt(x));
4211         }
4212
4213         UInt::UInt(RValue<UInt> rhs)
4214         {
4215                 storeValue(rhs.value);
4216         }
4217
4218         UInt::UInt(RValue<Int> rhs)
4219         {
4220                 storeValue(rhs.value);
4221         }
4222
4223         UInt::UInt(const UInt &rhs)
4224         {
4225                 Value *value = rhs.loadValue();
4226                 storeValue(value);
4227         }
4228
4229         UInt::UInt(const Reference<UInt> &rhs)
4230         {
4231                 Value *value = rhs.loadValue();
4232                 storeValue(value);
4233         }
4234
4235         UInt::UInt(const Int &rhs)
4236         {
4237                 Value *value = rhs.loadValue();
4238                 storeValue(value);
4239         }
4240
4241         UInt::UInt(const Reference<Int> &rhs)
4242         {
4243                 Value *value = rhs.loadValue();
4244                 storeValue(value);
4245         }
4246
4247         RValue<UInt> UInt::operator=(unsigned int rhs)
4248         {
4249                 return RValue<UInt>(storeValue(Nucleus::createConstantInt(rhs)));
4250         }
4251
4252         RValue<UInt> UInt::operator=(RValue<UInt> rhs)
4253         {
4254                 storeValue(rhs.value);
4255
4256                 return rhs;
4257         }
4258
4259         RValue<UInt> UInt::operator=(RValue<Int> rhs)
4260         {
4261                 storeValue(rhs.value);
4262
4263                 return RValue<UInt>(rhs);
4264         }
4265
4266         RValue<UInt> UInt::operator=(const UInt &rhs)
4267         {
4268                 Value *value = rhs.loadValue();
4269                 storeValue(value);
4270
4271                 return RValue<UInt>(value);
4272         }
4273
4274         RValue<UInt> UInt::operator=(const Reference<UInt> &rhs)
4275         {
4276                 Value *value = rhs.loadValue();
4277                 storeValue(value);
4278
4279                 return RValue<UInt>(value);
4280         }
4281
4282         RValue<UInt> UInt::operator=(const Int &rhs)
4283         {
4284                 Value *value = rhs.loadValue();
4285                 storeValue(value);
4286
4287                 return RValue<UInt>(value);
4288         }
4289
4290         RValue<UInt> UInt::operator=(const Reference<Int> &rhs)
4291         {
4292                 Value *value = rhs.loadValue();
4293                 storeValue(value);
4294
4295                 return RValue<UInt>(value);
4296         }
4297
4298         RValue<UInt> operator+(RValue<UInt> lhs, RValue<UInt> rhs)
4299         {
4300                 return RValue<UInt>(Nucleus::createAdd(lhs.value, rhs.value));
4301         }
4302
4303         RValue<UInt> operator-(RValue<UInt> lhs, RValue<UInt> rhs)
4304         {
4305                 return RValue<UInt>(Nucleus::createSub(lhs.value, rhs.value));
4306         }
4307
4308         RValue<UInt> operator*(RValue<UInt> lhs, RValue<UInt> rhs)
4309         {
4310                 return RValue<UInt>(Nucleus::createMul(lhs.value, rhs.value));
4311         }
4312
4313         RValue<UInt> operator/(RValue<UInt> lhs, RValue<UInt> rhs)
4314         {
4315                 return RValue<UInt>(Nucleus::createUDiv(lhs.value, rhs.value));
4316         }
4317
4318         RValue<UInt> operator%(RValue<UInt> lhs, RValue<UInt> rhs)
4319         {
4320                 return RValue<UInt>(Nucleus::createURem(lhs.value, rhs.value));
4321         }
4322
4323         RValue<UInt> operator&(RValue<UInt> lhs, RValue<UInt> rhs)
4324         {
4325                 return RValue<UInt>(Nucleus::createAnd(lhs.value, rhs.value));
4326         }
4327
4328         RValue<UInt> operator|(RValue<UInt> lhs, RValue<UInt> rhs)
4329         {
4330                 return RValue<UInt>(Nucleus::createOr(lhs.value, rhs.value));
4331         }
4332
4333         RValue<UInt> operator^(RValue<UInt> lhs, RValue<UInt> rhs)
4334         {
4335                 return RValue<UInt>(Nucleus::createXor(lhs.value, rhs.value));
4336         }
4337
4338         RValue<UInt> operator<<(RValue<UInt> lhs, RValue<UInt> rhs)
4339         {
4340                 return RValue<UInt>(Nucleus::createShl(lhs.value, rhs.value));
4341         }
4342
4343         RValue<UInt> operator>>(RValue<UInt> lhs, RValue<UInt> rhs)
4344         {
4345                 return RValue<UInt>(Nucleus::createLShr(lhs.value, rhs.value));
4346         }
4347
4348         RValue<UInt> operator+=(UInt &lhs, RValue<UInt> rhs)
4349         {
4350                 return lhs = lhs + rhs;
4351         }
4352
4353         RValue<UInt> operator-=(UInt &lhs, RValue<UInt> rhs)
4354         {
4355                 return lhs = lhs - rhs;
4356         }
4357
4358         RValue<UInt> operator*=(UInt &lhs, RValue<UInt> rhs)
4359         {
4360                 return lhs = lhs * rhs;
4361         }
4362
4363         RValue<UInt> operator/=(UInt &lhs, RValue<UInt> rhs)
4364         {
4365                 return lhs = lhs / rhs;
4366         }
4367
4368         RValue<UInt> operator%=(UInt &lhs, RValue<UInt> rhs)
4369         {
4370                 return lhs = lhs % rhs;
4371         }
4372
4373         RValue<UInt> operator&=(UInt &lhs, RValue<UInt> rhs)
4374         {
4375                 return lhs = lhs & rhs;
4376         }
4377
4378         RValue<UInt> operator|=(UInt &lhs, RValue<UInt> rhs)
4379         {
4380                 return lhs = lhs | rhs;
4381         }
4382
4383         RValue<UInt> operator^=(UInt &lhs, RValue<UInt> rhs)
4384         {
4385                 return lhs = lhs ^ rhs;
4386         }
4387
4388         RValue<UInt> operator<<=(UInt &lhs, RValue<UInt> rhs)
4389         {
4390                 return lhs = lhs << rhs;
4391         }
4392
4393         RValue<UInt> operator>>=(UInt &lhs, RValue<UInt> rhs)
4394         {
4395                 return lhs = lhs >> rhs;
4396         }
4397
4398         RValue<UInt> operator+(RValue<UInt> val)
4399         {
4400                 return val;
4401         }
4402
4403         RValue<UInt> operator-(RValue<UInt> val)
4404         {
4405                 return RValue<UInt>(Nucleus::createNeg(val.value));
4406         }
4407
4408         RValue<UInt> operator~(RValue<UInt> val)
4409         {
4410                 return RValue<UInt>(Nucleus::createNot(val.value));
4411         }
4412
4413         RValue<UInt> operator++(UInt &val, int)   // Post-increment
4414         {
4415                 RValue<UInt> res = val;
4416                 val += 1;
4417                 return res;
4418         }
4419
4420         const UInt &operator++(UInt &val)   // Pre-increment
4421         {
4422                 val += 1;
4423                 return val;
4424         }
4425
4426         RValue<UInt> operator--(UInt &val, int)   // Post-decrement
4427         {
4428                 RValue<UInt> res = val;
4429                 val -= 1;
4430                 return res;
4431         }
4432
4433         const UInt &operator--(UInt &val)   // Pre-decrement
4434         {
4435                 val -= 1;
4436                 return val;
4437         }
4438
4439         RValue<UInt> Max(RValue<UInt> x, RValue<UInt> y)
4440         {
4441                 return IfThenElse(x > y, x, y);
4442         }
4443
4444         RValue<UInt> Min(RValue<UInt> x, RValue<UInt> y)
4445         {
4446                 return IfThenElse(x < y, x, y);
4447         }
4448
4449         RValue<UInt> Clamp(RValue<UInt> x, RValue<UInt> min, RValue<UInt> max)
4450         {
4451                 return Min(Max(x, min), max);
4452         }
4453
4454         RValue<Bool> operator<(RValue<UInt> lhs, RValue<UInt> rhs)
4455         {
4456                 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
4457         }
4458
4459         RValue<Bool> operator<=(RValue<UInt> lhs, RValue<UInt> rhs)
4460         {
4461                 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
4462         }
4463
4464         RValue<Bool> operator>(RValue<UInt> lhs, RValue<UInt> rhs)
4465         {
4466                 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
4467         }
4468
4469         RValue<Bool> operator>=(RValue<UInt> lhs, RValue<UInt> rhs)
4470         {
4471                 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
4472         }
4473
4474         RValue<Bool> operator!=(RValue<UInt> lhs, RValue<UInt> rhs)
4475         {
4476                 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
4477         }
4478
4479         RValue<Bool> operator==(RValue<UInt> lhs, RValue<UInt> rhs)
4480         {
4481                 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
4482         }
4483
4484 //      RValue<UInt> RoundUInt(RValue<Float> cast)
4485 //      {
4486 //              assert(false && "UNIMPLEMENTED"); return RValue<UInt>(V(nullptr));
4487 //      }
4488
4489         Type *UInt::getType()
4490         {
4491                 return T(Ice::IceType_i32);
4492         }
4493
4494 //      Int2::Int2(RValue<Int> cast)
4495 //      {
4496 //              Value *extend = Nucleus::createZExt(cast.value, Long::getType());
4497 //              Value *vector = Nucleus::createBitCast(extend, Int2::getType());
4498 //
4499 //              Constant *shuffle[2];
4500 //              shuffle[0] = Nucleus::createConstantInt(0);
4501 //              shuffle[1] = Nucleus::createConstantInt(0);
4502 //
4503 //              Value *replicate = Nucleus::createShuffleVector(vector, UndefValue::get(Int2::getType()), Nucleus::createConstantVector(shuffle, 2));
4504 //
4505 //              storeValue(replicate);
4506 //      }
4507
4508         Int2::Int2(RValue<Int4> cast)
4509         {
4510                 storeValue(Nucleus::createBitCast(cast.value, getType()));
4511         }
4512
4513         Int2::Int2(int x, int y)
4514         {
4515                 int64_t constantVector[2] = {x, y};
4516                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
4517         }
4518
4519         Int2::Int2(RValue<Int2> rhs)
4520         {
4521                 storeValue(rhs.value);
4522         }
4523
4524         Int2::Int2(const Int2 &rhs)
4525         {
4526                 Value *value = rhs.loadValue();
4527                 storeValue(value);
4528         }
4529
4530         Int2::Int2(const Reference<Int2> &rhs)
4531         {
4532                 Value *value = rhs.loadValue();
4533                 storeValue(value);
4534         }
4535
4536         Int2::Int2(RValue<Int> lo, RValue<Int> hi)
4537         {
4538                 int shuffle[4] = {0, 4, 1, 5};
4539                 Value *packed = Nucleus::createShuffleVector(Int4(lo).loadValue(), Int4(hi).loadValue(), shuffle);
4540
4541                 storeValue(Nucleus::createBitCast(packed, Int2::getType()));
4542         }
4543
4544         RValue<Int2> Int2::operator=(RValue<Int2> rhs)
4545         {
4546                 storeValue(rhs.value);
4547
4548                 return rhs;
4549         }
4550
4551         RValue<Int2> Int2::operator=(const Int2 &rhs)
4552         {
4553                 Value *value = rhs.loadValue();
4554                 storeValue(value);
4555
4556                 return RValue<Int2>(value);
4557         }
4558
4559         RValue<Int2> Int2::operator=(const Reference<Int2> &rhs)
4560         {
4561                 Value *value = rhs.loadValue();
4562                 storeValue(value);
4563
4564                 return RValue<Int2>(value);
4565         }
4566
4567         RValue<Int2> operator+(RValue<Int2> lhs, RValue<Int2> rhs)
4568         {
4569                 return RValue<Int2>(Nucleus::createAdd(lhs.value, rhs.value));
4570         }
4571
4572         RValue<Int2> operator-(RValue<Int2> lhs, RValue<Int2> rhs)
4573         {
4574                 return RValue<Int2>(Nucleus::createSub(lhs.value, rhs.value));
4575         }
4576
4577 //      RValue<Int2> operator*(RValue<Int2> lhs, RValue<Int2> rhs)
4578 //      {
4579 //              return RValue<Int2>(Nucleus::createMul(lhs.value, rhs.value));
4580 //      }
4581
4582 //      RValue<Int2> operator/(RValue<Int2> lhs, RValue<Int2> rhs)
4583 //      {
4584 //              return RValue<Int2>(Nucleus::createSDiv(lhs.value, rhs.value));
4585 //      }
4586
4587 //      RValue<Int2> operator%(RValue<Int2> lhs, RValue<Int2> rhs)
4588 //      {
4589 //              return RValue<Int2>(Nucleus::createSRem(lhs.value, rhs.value));
4590 //      }
4591
4592         RValue<Int2> operator&(RValue<Int2> lhs, RValue<Int2> rhs)
4593         {
4594                 return RValue<Int2>(Nucleus::createAnd(lhs.value, rhs.value));
4595         }
4596
4597         RValue<Int2> operator|(RValue<Int2> lhs, RValue<Int2> rhs)
4598         {
4599                 return RValue<Int2>(Nucleus::createOr(lhs.value, rhs.value));
4600         }
4601
4602         RValue<Int2> operator^(RValue<Int2> lhs, RValue<Int2> rhs)
4603         {
4604                 return RValue<Int2>(Nucleus::createXor(lhs.value, rhs.value));
4605         }
4606
4607         RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
4608         {
4609                 return RValue<Int2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
4610         }
4611
4612         RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
4613         {
4614                 return RValue<Int2>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
4615         }
4616
4617         RValue<Int2> operator+=(Int2 &lhs, RValue<Int2> rhs)
4618         {
4619                 return lhs = lhs + rhs;
4620         }
4621
4622         RValue<Int2> operator-=(Int2 &lhs, RValue<Int2> rhs)
4623         {
4624                 return lhs = lhs - rhs;
4625         }
4626
4627 //      RValue<Int2> operator*=(Int2 &lhs, RValue<Int2> rhs)
4628 //      {
4629 //              return lhs = lhs * rhs;
4630 //      }
4631
4632 //      RValue<Int2> operator/=(Int2 &lhs, RValue<Int2> rhs)
4633 //      {
4634 //              return lhs = lhs / rhs;
4635 //      }
4636
4637 //      RValue<Int2> operator%=(Int2 &lhs, RValue<Int2> rhs)
4638 //      {
4639 //              return lhs = lhs % rhs;
4640 //      }
4641
4642         RValue<Int2> operator&=(Int2 &lhs, RValue<Int2> rhs)
4643         {
4644                 return lhs = lhs & rhs;
4645         }
4646
4647         RValue<Int2> operator|=(Int2 &lhs, RValue<Int2> rhs)
4648         {
4649                 return lhs = lhs | rhs;
4650         }
4651
4652         RValue<Int2> operator^=(Int2 &lhs, RValue<Int2> rhs)
4653         {
4654                 return lhs = lhs ^ rhs;
4655         }
4656
4657         RValue<Int2> operator<<=(Int2 &lhs, unsigned char rhs)
4658         {
4659                 return lhs = lhs << rhs;
4660         }
4661
4662         RValue<Int2> operator>>=(Int2 &lhs, unsigned char rhs)
4663         {
4664                 return lhs = lhs >> rhs;
4665         }
4666
4667 //      RValue<Int2> operator+(RValue<Int2> val)
4668 //      {
4669 //              return val;
4670 //      }
4671
4672 //      RValue<Int2> operator-(RValue<Int2> val)
4673 //      {
4674 //              return RValue<Int2>(Nucleus::createNeg(val.value));
4675 //      }
4676
4677         RValue<Int2> operator~(RValue<Int2> val)
4678         {
4679                 return RValue<Int2>(Nucleus::createNot(val.value));
4680         }
4681
4682         RValue<Short4> UnpackLow(RValue<Int2> x, RValue<Int2> y)
4683         {
4684                 int shuffle[4] = {0, 4, 1, 5};   // Real type is v4i32
4685                 return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
4686         }
4687
4688         RValue<Short4> UnpackHigh(RValue<Int2> x, RValue<Int2> y)
4689         {
4690                 int shuffle[16] = {0, 4, 1, 5};   // Real type is v4i32
4691                 auto lowHigh = RValue<Int4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
4692                 return As<Short4>(Swizzle(lowHigh, 0xEE));
4693         }
4694
4695         RValue<Int> Extract(RValue<Int2> val, int i)
4696         {
4697                 return RValue<Int>(Nucleus::createExtractElement(val.value, Int::getType(), i));
4698         }
4699
4700         RValue<Int2> Insert(RValue<Int2> val, RValue<Int> element, int i)
4701         {
4702                 return RValue<Int2>(Nucleus::createInsertElement(val.value, element.value, i));
4703         }
4704
4705         Type *Int2::getType()
4706         {
4707                 return T(Type_v2i32);
4708         }
4709
4710         UInt2::UInt2(unsigned int x, unsigned int y)
4711         {
4712                 int64_t constantVector[2] = {x, y};
4713                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
4714         }
4715
4716         UInt2::UInt2(RValue<UInt2> rhs)
4717         {
4718                 storeValue(rhs.value);
4719         }
4720
4721         UInt2::UInt2(const UInt2 &rhs)
4722         {
4723                 Value *value = rhs.loadValue();
4724                 storeValue(value);
4725         }
4726
4727         UInt2::UInt2(const Reference<UInt2> &rhs)
4728         {
4729                 Value *value = rhs.loadValue();
4730                 storeValue(value);
4731         }
4732
4733         RValue<UInt2> UInt2::operator=(RValue<UInt2> rhs)
4734         {
4735                 storeValue(rhs.value);
4736
4737                 return rhs;
4738         }
4739
4740         RValue<UInt2> UInt2::operator=(const UInt2 &rhs)
4741         {
4742                 Value *value = rhs.loadValue();
4743                 storeValue(value);
4744
4745                 return RValue<UInt2>(value);
4746         }
4747
4748         RValue<UInt2> UInt2::operator=(const Reference<UInt2> &rhs)
4749         {
4750                 Value *value = rhs.loadValue();
4751                 storeValue(value);
4752
4753                 return RValue<UInt2>(value);
4754         }
4755
4756         RValue<UInt2> operator+(RValue<UInt2> lhs, RValue<UInt2> rhs)
4757         {
4758                 return RValue<UInt2>(Nucleus::createAdd(lhs.value, rhs.value));
4759         }
4760
4761         RValue<UInt2> operator-(RValue<UInt2> lhs, RValue<UInt2> rhs)
4762         {
4763                 return RValue<UInt2>(Nucleus::createSub(lhs.value, rhs.value));
4764         }
4765
4766 //      RValue<UInt2> operator*(RValue<UInt2> lhs, RValue<UInt2> rhs)
4767 //      {
4768 //              return RValue<UInt2>(Nucleus::createMul(lhs.value, rhs.value));
4769 //      }
4770
4771 //      RValue<UInt2> operator/(RValue<UInt2> lhs, RValue<UInt2> rhs)
4772 //      {
4773 //              return RValue<UInt2>(Nucleus::createUDiv(lhs.value, rhs.value));
4774 //      }
4775
4776 //      RValue<UInt2> operator%(RValue<UInt2> lhs, RValue<UInt2> rhs)
4777 //      {
4778 //              return RValue<UInt2>(Nucleus::createURem(lhs.value, rhs.value));
4779 //      }
4780
4781         RValue<UInt2> operator&(RValue<UInt2> lhs, RValue<UInt2> rhs)
4782         {
4783                 return RValue<UInt2>(Nucleus::createAnd(lhs.value, rhs.value));
4784         }
4785
4786         RValue<UInt2> operator|(RValue<UInt2> lhs, RValue<UInt2> rhs)
4787         {
4788                 return RValue<UInt2>(Nucleus::createOr(lhs.value, rhs.value));
4789         }
4790
4791         RValue<UInt2> operator^(RValue<UInt2> lhs, RValue<UInt2> rhs)
4792         {
4793                 return RValue<UInt2>(Nucleus::createXor(lhs.value, rhs.value));
4794         }
4795
4796         RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
4797         {
4798                 return RValue<UInt2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
4799         }
4800
4801         RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
4802         {
4803                 return RValue<UInt2>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
4804         }
4805
4806         RValue<UInt2> operator+=(UInt2 &lhs, RValue<UInt2> rhs)
4807         {
4808                 return lhs = lhs + rhs;
4809         }
4810
4811         RValue<UInt2> operator-=(UInt2 &lhs, RValue<UInt2> rhs)
4812         {
4813                 return lhs = lhs - rhs;
4814         }
4815
4816 //      RValue<UInt2> operator*=(UInt2 &lhs, RValue<UInt2> rhs)
4817 //      {
4818 //              return lhs = lhs * rhs;
4819 //      }
4820
4821 //      RValue<UInt2> operator/=(UInt2 &lhs, RValue<UInt2> rhs)
4822 //      {
4823 //              return lhs = lhs / rhs;
4824 //      }
4825
4826 //      RValue<UInt2> operator%=(UInt2 &lhs, RValue<UInt2> rhs)
4827 //      {
4828 //              return lhs = lhs % rhs;
4829 //      }
4830
4831         RValue<UInt2> operator&=(UInt2 &lhs, RValue<UInt2> rhs)
4832         {
4833                 return lhs = lhs & rhs;
4834         }
4835
4836         RValue<UInt2> operator|=(UInt2 &lhs, RValue<UInt2> rhs)
4837         {
4838                 return lhs = lhs | rhs;
4839         }
4840
4841         RValue<UInt2> operator^=(UInt2 &lhs, RValue<UInt2> rhs)
4842         {
4843                 return lhs = lhs ^ rhs;
4844         }
4845
4846         RValue<UInt2> operator<<=(UInt2 &lhs, unsigned char rhs)
4847         {
4848                 return lhs = lhs << rhs;
4849         }
4850
4851         RValue<UInt2> operator>>=(UInt2 &lhs, unsigned char rhs)
4852         {
4853                 return lhs = lhs >> rhs;
4854         }
4855
4856 //      RValue<UInt2> operator+(RValue<UInt2> val)
4857 //      {
4858 //              return val;
4859 //      }
4860
4861 //      RValue<UInt2> operator-(RValue<UInt2> val)
4862 //      {
4863 //              return RValue<UInt2>(Nucleus::createNeg(val.value));
4864 //      }
4865
4866         RValue<UInt2> operator~(RValue<UInt2> val)
4867         {
4868                 return RValue<UInt2>(Nucleus::createNot(val.value));
4869         }
4870
4871         Type *UInt2::getType()
4872         {
4873                 return T(Type_v2i32);
4874         }
4875
4876         Int4::Int4(RValue<Byte4> cast)
4877         {
4878                 Value *x = Nucleus::createBitCast(cast.value, Int::getType());
4879                 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
4880
4881                 Value *e;
4882                 int swizzle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};
4883                 Value *b = Nucleus::createBitCast(a, Byte16::getType());
4884                 Value *c = Nucleus::createShuffleVector(b, V(Nucleus::createNullValue(Byte16::getType())), swizzle);
4885
4886                 int swizzle2[8] = {0, 8, 1, 9, 2, 10, 3, 11};
4887                 Value *d = Nucleus::createBitCast(c, Short8::getType());
4888                 e = Nucleus::createShuffleVector(d, V(Nucleus::createNullValue(Short8::getType())), swizzle2);
4889
4890                 Value *f = Nucleus::createBitCast(e, Int4::getType());
4891                 storeValue(f);
4892         }
4893
4894         Int4::Int4(RValue<SByte4> cast)
4895         {
4896                 Value *x = Nucleus::createBitCast(cast.value, Int::getType());
4897                 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
4898
4899                 Value *e;
4900                 int swizzle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7};
4901                 Value *b = Nucleus::createBitCast(a, Byte16::getType());
4902                 Value *c = Nucleus::createShuffleVector(b, b, swizzle);
4903
4904                 int swizzle2[8] = {0, 0, 1, 1, 2, 2, 3, 3};
4905                 Value *d = Nucleus::createBitCast(c, Short8::getType());
4906                 e = Nucleus::createShuffleVector(d, d, swizzle2);
4907
4908                 Value *f = Nucleus::createBitCast(e, Int4::getType());
4909                 Value *g = Nucleus::createAShr(f, V(::context->getConstantInt32(24)));
4910                 storeValue(g);
4911         }
4912
4913         Int4::Int4(RValue<Float4> cast)
4914         {
4915                 Value *xyzw = Nucleus::createFPToSI(cast.value, Int4::getType());
4916
4917                 storeValue(xyzw);
4918         }
4919
4920         Int4::Int4(RValue<Short4> cast)
4921         {
4922                 int swizzle[8] = {0, 0, 1, 1, 2, 2, 3, 3};
4923                 Value *c = Nucleus::createShuffleVector(cast.value, cast.value, swizzle);
4924                 Value *d = Nucleus::createBitCast(c, Int4::getType());
4925                 Value *e = Nucleus::createAShr(d, V(::context->getConstantInt32(16)));
4926                 storeValue(e);
4927         }
4928
4929         Int4::Int4(RValue<UShort4> cast)
4930         {
4931                 int swizzle[8] = {0, 8, 1, 9, 2, 10, 3, 11};
4932                 Value *c = Nucleus::createShuffleVector(cast.value, Short8(0, 0, 0, 0, 0, 0, 0, 0).loadValue(), swizzle);
4933                 Value *d = Nucleus::createBitCast(c, Int4::getType());
4934                 storeValue(d);
4935         }
4936
4937         Int4::Int4(int xyzw)
4938         {
4939                 constant(xyzw, xyzw, xyzw, xyzw);
4940         }
4941
4942         Int4::Int4(int x, int yzw)
4943         {
4944                 constant(x, yzw, yzw, yzw);
4945         }
4946
4947         Int4::Int4(int x, int y, int zw)
4948         {
4949                 constant(x, y, zw, zw);
4950         }
4951
4952         Int4::Int4(int x, int y, int z, int w)
4953         {
4954                 constant(x, y, z, w);
4955         }
4956
4957         void Int4::constant(int x, int y, int z, int w)
4958         {
4959                 int64_t constantVector[4] = {x, y, z, w};
4960                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
4961         }
4962
4963         Int4::Int4(RValue<Int4> rhs)
4964         {
4965                 storeValue(rhs.value);
4966         }
4967
4968         Int4::Int4(const Int4 &rhs)
4969         {
4970                 Value *value = rhs.loadValue();
4971                 storeValue(value);
4972         }
4973
4974         Int4::Int4(const Reference<Int4> &rhs)
4975         {
4976                 Value *value = rhs.loadValue();
4977                 storeValue(value);
4978         }
4979
4980         Int4::Int4(RValue<UInt4> rhs)
4981         {
4982                 storeValue(rhs.value);
4983         }
4984
4985         Int4::Int4(const UInt4 &rhs)
4986         {
4987                 Value *value = rhs.loadValue();
4988                 storeValue(value);
4989         }
4990
4991         Int4::Int4(const Reference<UInt4> &rhs)
4992         {
4993                 Value *value = rhs.loadValue();
4994                 storeValue(value);
4995         }
4996
4997         Int4::Int4(RValue<Int2> lo, RValue<Int2> hi)
4998         {
4999                 int shuffle[4] = {0, 1, 4, 5};   // Real type is v4i32
5000                 Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
5001
5002                 storeValue(packed);
5003         }
5004
5005         Int4::Int4(RValue<Int> rhs)
5006         {
5007                 Value *vector = loadValue();
5008                 Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
5009
5010                 int swizzle[4] = {0, 0, 0, 0};
5011                 Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle);
5012
5013                 storeValue(replicate);
5014         }
5015
5016         Int4::Int4(const Int &rhs)
5017         {
5018                 *this = RValue<Int>(rhs.loadValue());
5019         }
5020
5021         Int4::Int4(const Reference<Int> &rhs)
5022         {
5023                 *this = RValue<Int>(rhs.loadValue());
5024         }
5025
5026         RValue<Int4> Int4::operator=(RValue<Int4> rhs)
5027         {
5028                 storeValue(rhs.value);
5029
5030                 return rhs;
5031         }
5032
5033         RValue<Int4> Int4::operator=(const Int4 &rhs)
5034         {
5035                 Value *value = rhs.loadValue();
5036                 storeValue(value);
5037
5038                 return RValue<Int4>(value);
5039         }
5040
5041         RValue<Int4> Int4::operator=(const Reference<Int4> &rhs)
5042         {
5043                 Value *value = rhs.loadValue();
5044                 storeValue(value);
5045
5046                 return RValue<Int4>(value);
5047         }
5048
5049         RValue<Int4> operator+(RValue<Int4> lhs, RValue<Int4> rhs)
5050         {
5051                 return RValue<Int4>(Nucleus::createAdd(lhs.value, rhs.value));
5052         }
5053
5054         RValue<Int4> operator-(RValue<Int4> lhs, RValue<Int4> rhs)
5055         {
5056                 return RValue<Int4>(Nucleus::createSub(lhs.value, rhs.value));
5057         }
5058
5059         RValue<Int4> operator*(RValue<Int4> lhs, RValue<Int4> rhs)
5060         {
5061                 return RValue<Int4>(Nucleus::createMul(lhs.value, rhs.value));
5062         }
5063
5064         RValue<Int4> operator/(RValue<Int4> lhs, RValue<Int4> rhs)
5065         {
5066                 return RValue<Int4>(Nucleus::createSDiv(lhs.value, rhs.value));
5067         }
5068
5069         RValue<Int4> operator%(RValue<Int4> lhs, RValue<Int4> rhs)
5070         {
5071                 return RValue<Int4>(Nucleus::createSRem(lhs.value, rhs.value));
5072         }
5073
5074         RValue<Int4> operator&(RValue<Int4> lhs, RValue<Int4> rhs)
5075         {
5076                 return RValue<Int4>(Nucleus::createAnd(lhs.value, rhs.value));
5077         }
5078
5079         RValue<Int4> operator|(RValue<Int4> lhs, RValue<Int4> rhs)
5080         {
5081                 return RValue<Int4>(Nucleus::createOr(lhs.value, rhs.value));
5082         }
5083
5084         RValue<Int4> operator^(RValue<Int4> lhs, RValue<Int4> rhs)
5085         {
5086                 return RValue<Int4>(Nucleus::createXor(lhs.value, rhs.value));
5087         }
5088
5089         RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
5090         {
5091                 return RValue<Int4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
5092         }
5093
5094         RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
5095         {
5096                 return RValue<Int4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
5097         }
5098
5099         RValue<Int4> operator<<(RValue<Int4> lhs, RValue<Int4> rhs)
5100         {
5101                 return RValue<Int4>(Nucleus::createShl(lhs.value, rhs.value));
5102         }
5103
5104         RValue<Int4> operator>>(RValue<Int4> lhs, RValue<Int4> rhs)
5105         {
5106                 return RValue<Int4>(Nucleus::createAShr(lhs.value, rhs.value));
5107         }
5108
5109         RValue<Int4> operator+=(Int4 &lhs, RValue<Int4> rhs)
5110         {
5111                 return lhs = lhs + rhs;
5112         }
5113
5114         RValue<Int4> operator-=(Int4 &lhs, RValue<Int4> rhs)
5115         {
5116                 return lhs = lhs - rhs;
5117         }
5118
5119         RValue<Int4> operator*=(Int4 &lhs, RValue<Int4> rhs)
5120         {
5121                 return lhs = lhs * rhs;
5122         }
5123
5124 //      RValue<Int4> operator/=(Int4 &lhs, RValue<Int4> rhs)
5125 //      {
5126 //              return lhs = lhs / rhs;
5127 //      }
5128
5129 //      RValue<Int4> operator%=(Int4 &lhs, RValue<Int4> rhs)
5130 //      {
5131 //              return lhs = lhs % rhs;
5132 //      }
5133
5134         RValue<Int4> operator&=(Int4 &lhs, RValue<Int4> rhs)
5135         {
5136                 return lhs = lhs & rhs;
5137         }
5138
5139         RValue<Int4> operator|=(Int4 &lhs, RValue<Int4> rhs)
5140         {
5141                 return lhs = lhs | rhs;
5142         }
5143
5144         RValue<Int4> operator^=(Int4 &lhs, RValue<Int4> rhs)
5145         {
5146                 return lhs = lhs ^ rhs;
5147         }
5148
5149         RValue<Int4> operator<<=(Int4 &lhs, unsigned char rhs)
5150         {
5151                 return lhs = lhs << rhs;
5152         }
5153
5154         RValue<Int4> operator>>=(Int4 &lhs, unsigned char rhs)
5155         {
5156                 return lhs = lhs >> rhs;
5157         }
5158
5159         RValue<Int4> operator+(RValue<Int4> val)
5160         {
5161                 return val;
5162         }
5163
5164         RValue<Int4> operator-(RValue<Int4> val)
5165         {
5166                 return RValue<Int4>(Nucleus::createNeg(val.value));
5167         }
5168
5169         RValue<Int4> operator~(RValue<Int4> val)
5170         {
5171                 return RValue<Int4>(Nucleus::createNot(val.value));
5172         }
5173
5174         RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
5175         {
5176                 return RValue<Int4>(Nucleus::createICmpEQ(x.value, y.value));
5177         }
5178
5179         RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
5180         {
5181                 return RValue<Int4>(Nucleus::createICmpSLT(x.value, y.value));
5182         }
5183
5184         RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
5185         {
5186                 return RValue<Int4>(Nucleus::createICmpSLE(x.value, y.value));
5187         }
5188
5189         RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
5190         {
5191                 return RValue<Int4>(Nucleus::createICmpNE(x.value, y.value));
5192         }
5193
5194         RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
5195         {
5196                 return RValue<Int4>(Nucleus::createICmpSGE(x.value, y.value));
5197         }
5198
5199         RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
5200         {
5201                 return RValue<Int4>(Nucleus::createICmpSGT(x.value, y.value));
5202         }
5203
5204         RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
5205         {
5206                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
5207                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value, y.value);
5208                 ::basicBlock->appendInst(cmp);
5209
5210                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
5211                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
5212                 ::basicBlock->appendInst(select);
5213
5214                 return RValue<Int4>(V(result));
5215         }
5216
5217         RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
5218         {
5219                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
5220                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value, y.value);
5221                 ::basicBlock->appendInst(cmp);
5222
5223                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
5224                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
5225                 ::basicBlock->appendInst(select);
5226
5227                 return RValue<Int4>(V(result));
5228         }
5229
5230         RValue<Int4> RoundInt(RValue<Float4> cast)
5231         {
5232                 RValue<Float4> rounded = Round(cast);
5233
5234                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
5235                 auto round = Ice::InstCast::create(::function, Ice::InstCast::Fptosi, result, rounded.value);
5236                 ::basicBlock->appendInst(round);
5237
5238                 return RValue<Int4>(V(result));
5239         }
5240
5241         RValue<Short8> Pack(RValue<Int4> x, RValue<Int4> y)
5242         {
5243                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
5244                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
5245                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
5246                 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
5247                 pack->addArg(x.value);
5248                 pack->addArg(y.value);
5249                 ::basicBlock->appendInst(pack);
5250
5251                 return RValue<Short8>(V(result));
5252         }
5253
5254         RValue<Int> Extract(RValue<Int4> x, int i)
5255         {
5256                 return RValue<Int>(Nucleus::createExtractElement(x.value, Int::getType(), i));
5257         }
5258
5259         RValue<Int4> Insert(RValue<Int4> x, RValue<Int> element, int i)
5260         {
5261                 return RValue<Int4>(Nucleus::createInsertElement(x.value, element.value, i));
5262         }
5263
5264         RValue<Int> SignMask(RValue<Int4> x)
5265         {
5266                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
5267                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
5268                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
5269                 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
5270                 movmsk->addArg(x.value);
5271                 ::basicBlock->appendInst(movmsk);
5272
5273                 return RValue<Int>(V(result));
5274         }
5275
5276         RValue<Int4> Swizzle(RValue<Int4> x, unsigned char select)
5277         {
5278                 return RValue<Int4>(createSwizzle4(x.value, select));
5279         }
5280
5281         Type *Int4::getType()
5282         {
5283                 return T(Ice::IceType_v4i32);
5284         }
5285
5286         UInt4::UInt4(RValue<Float4> cast)
5287         {
5288                 // Smallest positive value representable in UInt, but not in Int
5289                 const unsigned int ustart = 0x80000000u;
5290                 const float ustartf = float(ustart);
5291
5292                 // Check if the value can be represented as an Int
5293                 Int4 uiValue = CmpNLT(cast, Float4(ustartf));
5294                 // If the value is too large, subtract ustart and re-add it after conversion.
5295                 uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) |
5296                 // Otherwise, just convert normally
5297                           (~uiValue & Int4(cast));
5298                 // If the value is negative, store 0, otherwise store the result of the conversion
5299                 storeValue((~(As<Int4>(cast) >> 31) & uiValue).value);
5300         }
5301
5302         UInt4::UInt4(int xyzw)
5303         {
5304                 constant(xyzw, xyzw, xyzw, xyzw);
5305         }
5306
5307         UInt4::UInt4(int x, int yzw)
5308         {
5309                 constant(x, yzw, yzw, yzw);
5310         }
5311
5312         UInt4::UInt4(int x, int y, int zw)
5313         {
5314                 constant(x, y, zw, zw);
5315         }
5316
5317         UInt4::UInt4(int x, int y, int z, int w)
5318         {
5319                 constant(x, y, z, w);
5320         }
5321
5322         void UInt4::constant(int x, int y, int z, int w)
5323         {
5324                 int64_t constantVector[4] = {x, y, z, w};
5325                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
5326         }
5327
5328         UInt4::UInt4(RValue<UInt4> rhs)
5329         {
5330                 storeValue(rhs.value);
5331         }
5332
5333         UInt4::UInt4(const UInt4 &rhs)
5334         {
5335                 Value *value = rhs.loadValue();
5336                 storeValue(value);
5337         }
5338
5339         UInt4::UInt4(const Reference<UInt4> &rhs)
5340         {
5341                 Value *value = rhs.loadValue();
5342                 storeValue(value);
5343         }
5344
5345         UInt4::UInt4(RValue<Int4> rhs)
5346         {
5347                 storeValue(rhs.value);
5348         }
5349
5350         UInt4::UInt4(const Int4 &rhs)
5351         {
5352                 Value *value = rhs.loadValue();
5353                 storeValue(value);
5354         }
5355
5356         UInt4::UInt4(const Reference<Int4> &rhs)
5357         {
5358                 Value *value = rhs.loadValue();
5359                 storeValue(value);
5360         }
5361
5362         UInt4::UInt4(RValue<UInt2> lo, RValue<UInt2> hi)
5363         {
5364                 int shuffle[4] = {0, 1, 4, 5};   // Real type is v4i32
5365                 Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
5366
5367                 storeValue(packed);
5368         }
5369
5370         RValue<UInt4> UInt4::operator=(RValue<UInt4> rhs)
5371         {
5372                 storeValue(rhs.value);
5373
5374                 return rhs;
5375         }
5376
5377         RValue<UInt4> UInt4::operator=(const UInt4 &rhs)
5378         {
5379                 Value *value = rhs.loadValue();
5380                 storeValue(value);
5381
5382                 return RValue<UInt4>(value);
5383         }
5384
5385         RValue<UInt4> UInt4::operator=(const Reference<UInt4> &rhs)
5386         {
5387                 Value *value = rhs.loadValue();
5388                 storeValue(value);
5389
5390                 return RValue<UInt4>(value);
5391         }
5392
5393         RValue<UInt4> operator+(RValue<UInt4> lhs, RValue<UInt4> rhs)
5394         {
5395                 return RValue<UInt4>(Nucleus::createAdd(lhs.value, rhs.value));
5396         }
5397
5398         RValue<UInt4> operator-(RValue<UInt4> lhs, RValue<UInt4> rhs)
5399         {
5400                 return RValue<UInt4>(Nucleus::createSub(lhs.value, rhs.value));
5401         }
5402
5403         RValue<UInt4> operator*(RValue<UInt4> lhs, RValue<UInt4> rhs)
5404         {
5405                 return RValue<UInt4>(Nucleus::createMul(lhs.value, rhs.value));
5406         }
5407
5408         RValue<UInt4> operator/(RValue<UInt4> lhs, RValue<UInt4> rhs)
5409         {
5410                 return RValue<UInt4>(Nucleus::createUDiv(lhs.value, rhs.value));
5411         }
5412
5413         RValue<UInt4> operator%(RValue<UInt4> lhs, RValue<UInt4> rhs)
5414         {
5415                 return RValue<UInt4>(Nucleus::createURem(lhs.value, rhs.value));
5416         }
5417
5418         RValue<UInt4> operator&(RValue<UInt4> lhs, RValue<UInt4> rhs)
5419         {
5420                 return RValue<UInt4>(Nucleus::createAnd(lhs.value, rhs.value));
5421         }
5422
5423         RValue<UInt4> operator|(RValue<UInt4> lhs, RValue<UInt4> rhs)
5424         {
5425                 return RValue<UInt4>(Nucleus::createOr(lhs.value, rhs.value));
5426         }
5427
5428         RValue<UInt4> operator^(RValue<UInt4> lhs, RValue<UInt4> rhs)
5429         {
5430                 return RValue<UInt4>(Nucleus::createXor(lhs.value, rhs.value));
5431         }
5432
5433         RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
5434         {
5435                 return RValue<UInt4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
5436         }
5437
5438         RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
5439         {
5440                 return RValue<UInt4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
5441         }
5442
5443         RValue<UInt4> operator<<(RValue<UInt4> lhs, RValue<UInt4> rhs)
5444         {
5445                 return RValue<UInt4>(Nucleus::createShl(lhs.value, rhs.value));
5446         }
5447
5448         RValue<UInt4> operator>>(RValue<UInt4> lhs, RValue<UInt4> rhs)
5449         {
5450                 return RValue<UInt4>(Nucleus::createLShr(lhs.value, rhs.value));
5451         }
5452
5453         RValue<UInt4> operator+=(UInt4 &lhs, RValue<UInt4> rhs)
5454         {
5455                 return lhs = lhs + rhs;
5456         }
5457
5458         RValue<UInt4> operator-=(UInt4 &lhs, RValue<UInt4> rhs)
5459         {
5460                 return lhs = lhs - rhs;
5461         }
5462
5463         RValue<UInt4> operator*=(UInt4 &lhs, RValue<UInt4> rhs)
5464         {
5465                 return lhs = lhs * rhs;
5466         }
5467
5468 //      RValue<UInt4> operator/=(UInt4 &lhs, RValue<UInt4> rhs)
5469 //      {
5470 //              return lhs = lhs / rhs;
5471 //      }
5472
5473 //      RValue<UInt4> operator%=(UInt4 &lhs, RValue<UInt4> rhs)
5474 //      {
5475 //              return lhs = lhs % rhs;
5476 //      }
5477
5478         RValue<UInt4> operator&=(UInt4 &lhs, RValue<UInt4> rhs)
5479         {
5480                 return lhs = lhs & rhs;
5481         }
5482
5483         RValue<UInt4> operator|=(UInt4 &lhs, RValue<UInt4> rhs)
5484         {
5485                 return lhs = lhs | rhs;
5486         }
5487
5488         RValue<UInt4> operator^=(UInt4 &lhs, RValue<UInt4> rhs)
5489         {
5490                 return lhs = lhs ^ rhs;
5491         }
5492
5493         RValue<UInt4> operator<<=(UInt4 &lhs, unsigned char rhs)
5494         {
5495                 return lhs = lhs << rhs;
5496         }
5497
5498         RValue<UInt4> operator>>=(UInt4 &lhs, unsigned char rhs)
5499         {
5500                 return lhs = lhs >> rhs;
5501         }
5502
5503         RValue<UInt4> operator+(RValue<UInt4> val)
5504         {
5505                 return val;
5506         }
5507
5508         RValue<UInt4> operator-(RValue<UInt4> val)
5509         {
5510                 return RValue<UInt4>(Nucleus::createNeg(val.value));
5511         }
5512
5513         RValue<UInt4> operator~(RValue<UInt4> val)
5514         {
5515                 return RValue<UInt4>(Nucleus::createNot(val.value));
5516         }
5517
5518         RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
5519         {
5520                 return RValue<UInt4>(Nucleus::createICmpEQ(x.value, y.value));
5521         }
5522
5523         RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
5524         {
5525                 return RValue<UInt4>(Nucleus::createICmpULT(x.value, y.value));
5526         }
5527
5528         RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
5529         {
5530                 return RValue<UInt4>(Nucleus::createICmpULE(x.value, y.value));
5531         }
5532
5533         RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
5534         {
5535                 return RValue<UInt4>(Nucleus::createICmpNE(x.value, y.value));
5536         }
5537
5538         RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
5539         {
5540                 return RValue<UInt4>(Nucleus::createICmpUGE(x.value, y.value));
5541         }
5542
5543         RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
5544         {
5545                 return RValue<UInt4>(Nucleus::createICmpUGT(x.value, y.value));
5546         }
5547
5548         RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
5549         {
5550                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
5551                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value, y.value);
5552                 ::basicBlock->appendInst(cmp);
5553
5554                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
5555                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
5556                 ::basicBlock->appendInst(select);
5557
5558                 return RValue<UInt4>(V(result));
5559         }
5560
5561         RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
5562         {
5563                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
5564                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value, y.value);
5565                 ::basicBlock->appendInst(cmp);
5566
5567                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
5568                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
5569                 ::basicBlock->appendInst(select);
5570
5571                 return RValue<UInt4>(V(result));
5572         }
5573
5574         RValue<UShort8> Pack(RValue<UInt4> x, RValue<UInt4> y)
5575         {
5576                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
5577                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
5578                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
5579                 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
5580                 pack->addArg(x.value);
5581                 pack->addArg(y.value);
5582                 ::basicBlock->appendInst(pack);
5583
5584                 return RValue<UShort8>(V(result));
5585         }
5586
5587         Type *UInt4::getType()
5588         {
5589                 return T(Ice::IceType_v4i32);
5590         }
5591
5592         Float::Float(RValue<Int> cast)
5593         {
5594                 Value *integer = Nucleus::createSIToFP(cast.value, Float::getType());
5595
5596                 storeValue(integer);
5597         }
5598
5599         Float::Float(float x)
5600         {
5601                 storeValue(Nucleus::createConstantFloat(x));
5602         }
5603
5604         Float::Float(RValue<Float> rhs)
5605         {
5606                 storeValue(rhs.value);
5607         }
5608
5609         Float::Float(const Float &rhs)
5610         {
5611                 Value *value = rhs.loadValue();
5612                 storeValue(value);
5613         }
5614
5615         Float::Float(const Reference<Float> &rhs)
5616         {
5617                 Value *value = rhs.loadValue();
5618                 storeValue(value);
5619         }
5620
5621         RValue<Float> Float::operator=(RValue<Float> rhs)
5622         {
5623                 storeValue(rhs.value);
5624
5625                 return rhs;
5626         }
5627
5628         RValue<Float> Float::operator=(const Float &rhs)
5629         {
5630                 Value *value = rhs.loadValue();
5631                 storeValue(value);
5632
5633                 return RValue<Float>(value);
5634         }
5635
5636         RValue<Float> Float::operator=(const Reference<Float> &rhs)
5637         {
5638                 Value *value = rhs.loadValue();
5639                 storeValue(value);
5640
5641                 return RValue<Float>(value);
5642         }
5643
5644         RValue<Float> operator+(RValue<Float> lhs, RValue<Float> rhs)
5645         {
5646                 return RValue<Float>(Nucleus::createFAdd(lhs.value, rhs.value));
5647         }
5648
5649         RValue<Float> operator-(RValue<Float> lhs, RValue<Float> rhs)
5650         {
5651                 return RValue<Float>(Nucleus::createFSub(lhs.value, rhs.value));
5652         }
5653
5654         RValue<Float> operator*(RValue<Float> lhs, RValue<Float> rhs)
5655         {
5656                 return RValue<Float>(Nucleus::createFMul(lhs.value, rhs.value));
5657         }
5658
5659         RValue<Float> operator/(RValue<Float> lhs, RValue<Float> rhs)
5660         {
5661                 return RValue<Float>(Nucleus::createFDiv(lhs.value, rhs.value));
5662         }
5663
5664         RValue<Float> operator+=(Float &lhs, RValue<Float> rhs)
5665         {
5666                 return lhs = lhs + rhs;
5667         }
5668
5669         RValue<Float> operator-=(Float &lhs, RValue<Float> rhs)
5670         {
5671                 return lhs = lhs - rhs;
5672         }
5673
5674         RValue<Float> operator*=(Float &lhs, RValue<Float> rhs)
5675         {
5676                 return lhs = lhs * rhs;
5677         }
5678
5679         RValue<Float> operator/=(Float &lhs, RValue<Float> rhs)
5680         {
5681                 return lhs = lhs / rhs;
5682         }
5683
5684         RValue<Float> operator+(RValue<Float> val)
5685         {
5686                 return val;
5687         }
5688
5689         RValue<Float> operator-(RValue<Float> val)
5690         {
5691                 return RValue<Float>(Nucleus::createFNeg(val.value));
5692         }
5693
5694         RValue<Bool> operator<(RValue<Float> lhs, RValue<Float> rhs)
5695         {
5696                 return RValue<Bool>(Nucleus::createFCmpOLT(lhs.value, rhs.value));
5697         }
5698
5699         RValue<Bool> operator<=(RValue<Float> lhs, RValue<Float> rhs)
5700         {
5701                 return RValue<Bool>(Nucleus::createFCmpOLE(lhs.value, rhs.value));
5702         }
5703
5704         RValue<Bool> operator>(RValue<Float> lhs, RValue<Float> rhs)
5705         {
5706                 return RValue<Bool>(Nucleus::createFCmpOGT(lhs.value, rhs.value));
5707         }
5708
5709         RValue<Bool> operator>=(RValue<Float> lhs, RValue<Float> rhs)
5710         {
5711                 return RValue<Bool>(Nucleus::createFCmpOGE(lhs.value, rhs.value));
5712         }
5713
5714         RValue<Bool> operator!=(RValue<Float> lhs, RValue<Float> rhs)
5715         {
5716                 return RValue<Bool>(Nucleus::createFCmpONE(lhs.value, rhs.value));
5717         }
5718
5719         RValue<Bool> operator==(RValue<Float> lhs, RValue<Float> rhs)
5720         {
5721                 return RValue<Bool>(Nucleus::createFCmpOEQ(lhs.value, rhs.value));
5722         }
5723
5724         RValue<Float> Abs(RValue<Float> x)
5725         {
5726                 return IfThenElse(x > 0.0f, x, -x);
5727         }
5728
5729         RValue<Float> Max(RValue<Float> x, RValue<Float> y)
5730         {
5731                 return IfThenElse(x > y, x, y);
5732         }
5733
5734         RValue<Float> Min(RValue<Float> x, RValue<Float> y)
5735         {
5736                 return IfThenElse(x < y, x, y);
5737         }
5738
5739         RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
5740         {
5741                 return 1.0f / x;
5742         }
5743
5744         RValue<Float> RcpSqrt_pp(RValue<Float> x)
5745         {
5746                 return Rcp_pp(Sqrt(x));
5747         }
5748
5749         RValue<Float> Sqrt(RValue<Float> x)
5750         {
5751                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_f32);
5752                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
5753                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
5754                 auto sqrt = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
5755                 sqrt->addArg(x.value);
5756                 ::basicBlock->appendInst(sqrt);
5757
5758                 return RValue<Float>(V(result));
5759         }
5760
5761         RValue<Float> Round(RValue<Float> x)
5762         {
5763                 return Float4(Round(Float4(x))).x;
5764         }
5765
5766         RValue<Float> Trunc(RValue<Float> x)
5767         {
5768                 return Float4(Trunc(Float4(x))).x;
5769         }
5770
5771         RValue<Float> Frac(RValue<Float> x)
5772         {
5773                 return Float4(Frac(Float4(x))).x;
5774         }
5775
5776         RValue<Float> Floor(RValue<Float> x)
5777         {
5778                 return Float4(Floor(Float4(x))).x;
5779         }
5780
5781         RValue<Float> Ceil(RValue<Float> x)
5782         {
5783                 return Float4(Ceil(Float4(x))).x;
5784         }
5785
5786         Type *Float::getType()
5787         {
5788                 return T(Ice::IceType_f32);
5789         }
5790
5791         Float2::Float2(RValue<Float4> cast)
5792         {
5793                 storeValue(Nucleus::createBitCast(cast.value, getType()));
5794         }
5795
5796         Type *Float2::getType()
5797         {
5798                 return T(Type_v2f32);
5799         }
5800
5801         Float4::Float4(RValue<Byte4> cast) : FloatXYZW(this)
5802         {
5803                 Value *a = Int4(cast).loadValue();
5804                 Value *xyzw = Nucleus::createSIToFP(a, Float4::getType());
5805
5806                 storeValue(xyzw);
5807         }
5808
5809         Float4::Float4(RValue<SByte4> cast) : FloatXYZW(this)
5810         {
5811                 Value *a = Int4(cast).loadValue();
5812                 Value *xyzw = Nucleus::createSIToFP(a, Float4::getType());
5813
5814                 storeValue(xyzw);
5815         }
5816
5817         Float4::Float4(RValue<Short4> cast) : FloatXYZW(this)
5818         {
5819                 Int4 c(cast);
5820                 storeValue(Nucleus::createSIToFP(RValue<Int4>(c).value, Float4::getType()));
5821         }
5822
5823         Float4::Float4(RValue<UShort4> cast) : FloatXYZW(this)
5824         {
5825                 Int4 c(cast);
5826                 storeValue(Nucleus::createSIToFP(RValue<Int4>(c).value, Float4::getType()));
5827         }
5828
5829         Float4::Float4(RValue<Int4> cast) : FloatXYZW(this)
5830         {
5831                 Value *xyzw = Nucleus::createSIToFP(cast.value, Float4::getType());
5832
5833                 storeValue(xyzw);
5834         }
5835
5836         Float4::Float4(RValue<UInt4> cast) : FloatXYZW(this)
5837         {
5838                 RValue<Float4> result = Float4(Int4(cast & UInt4(0x7FFFFFFF))) +
5839                                         As<Float4>((As<Int4>(cast) >> 31) & As<Int4>(Float4(0x80000000u)));
5840
5841                 storeValue(result.value);
5842         }
5843
5844         Float4::Float4() : FloatXYZW(this)
5845         {
5846         }
5847
5848         Float4::Float4(float xyzw) : FloatXYZW(this)
5849         {
5850                 constant(xyzw, xyzw, xyzw, xyzw);
5851         }
5852
5853         Float4::Float4(float x, float yzw) : FloatXYZW(this)
5854         {
5855                 constant(x, yzw, yzw, yzw);
5856         }
5857
5858         Float4::Float4(float x, float y, float zw) : FloatXYZW(this)
5859         {
5860                 constant(x, y, zw, zw);
5861         }
5862
5863         Float4::Float4(float x, float y, float z, float w) : FloatXYZW(this)
5864         {
5865                 constant(x, y, z, w);
5866         }
5867
5868         void Float4::constant(float x, float y, float z, float w)
5869         {
5870                 double constantVector[4] = {x, y, z, w};
5871                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
5872         }
5873
5874         Float4::Float4(RValue<Float4> rhs) : FloatXYZW(this)
5875         {
5876                 storeValue(rhs.value);
5877         }
5878
5879         Float4::Float4(const Float4 &rhs) : FloatXYZW(this)
5880         {
5881                 Value *value = rhs.loadValue();
5882                 storeValue(value);
5883         }
5884
5885         Float4::Float4(const Reference<Float4> &rhs) : FloatXYZW(this)
5886         {
5887                 Value *value = rhs.loadValue();
5888                 storeValue(value);
5889         }
5890
5891         Float4::Float4(RValue<Float> rhs) : FloatXYZW(this)
5892         {
5893                 Value *vector = loadValue();
5894                 Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
5895
5896                 int swizzle[4] = {0, 0, 0, 0};
5897                 Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle);
5898
5899                 storeValue(replicate);
5900         }
5901
5902         Float4::Float4(const Float &rhs) : FloatXYZW(this)
5903         {
5904                 *this = RValue<Float>(rhs.loadValue());
5905         }
5906
5907         Float4::Float4(const Reference<Float> &rhs) : FloatXYZW(this)
5908         {
5909                 *this = RValue<Float>(rhs.loadValue());
5910         }
5911
5912         RValue<Float4> Float4::operator=(float x)
5913         {
5914                 return *this = Float4(x, x, x, x);
5915         }
5916
5917         RValue<Float4> Float4::operator=(RValue<Float4> rhs)
5918         {
5919                 storeValue(rhs.value);
5920
5921                 return rhs;
5922         }
5923
5924         RValue<Float4> Float4::operator=(const Float4 &rhs)
5925         {
5926                 Value *value = rhs.loadValue();
5927                 storeValue(value);
5928
5929                 return RValue<Float4>(value);
5930         }
5931
5932         RValue<Float4> Float4::operator=(const Reference<Float4> &rhs)
5933         {
5934                 Value *value = rhs.loadValue();
5935                 storeValue(value);
5936
5937                 return RValue<Float4>(value);
5938         }
5939
5940         RValue<Float4> Float4::operator=(RValue<Float> rhs)
5941         {
5942                 return *this = Float4(rhs);
5943         }
5944
5945         RValue<Float4> Float4::operator=(const Float &rhs)
5946         {
5947                 return *this = Float4(rhs);
5948         }
5949
5950         RValue<Float4> Float4::operator=(const Reference<Float> &rhs)
5951         {
5952                 return *this = Float4(rhs);
5953         }
5954
5955         RValue<Float4> operator+(RValue<Float4> lhs, RValue<Float4> rhs)
5956         {
5957                 return RValue<Float4>(Nucleus::createFAdd(lhs.value, rhs.value));
5958         }
5959
5960         RValue<Float4> operator-(RValue<Float4> lhs, RValue<Float4> rhs)
5961         {
5962                 return RValue<Float4>(Nucleus::createFSub(lhs.value, rhs.value));
5963         }
5964
5965         RValue<Float4> operator*(RValue<Float4> lhs, RValue<Float4> rhs)
5966         {
5967                 return RValue<Float4>(Nucleus::createFMul(lhs.value, rhs.value));
5968         }
5969
5970         RValue<Float4> operator/(RValue<Float4> lhs, RValue<Float4> rhs)
5971         {
5972                 return RValue<Float4>(Nucleus::createFDiv(lhs.value, rhs.value));
5973         }
5974
5975         RValue<Float4> operator%(RValue<Float4> lhs, RValue<Float4> rhs)
5976         {
5977                 return RValue<Float4>(Nucleus::createFRem(lhs.value, rhs.value));
5978         }
5979
5980         RValue<Float4> operator+=(Float4 &lhs, RValue<Float4> rhs)
5981         {
5982                 return lhs = lhs + rhs;
5983         }
5984
5985         RValue<Float4> operator-=(Float4 &lhs, RValue<Float4> rhs)
5986         {
5987                 return lhs = lhs - rhs;
5988         }
5989
5990         RValue<Float4> operator*=(Float4 &lhs, RValue<Float4> rhs)
5991         {
5992                 return lhs = lhs * rhs;
5993         }
5994
5995         RValue<Float4> operator/=(Float4 &lhs, RValue<Float4> rhs)
5996         {
5997                 return lhs = lhs / rhs;
5998         }
5999
6000         RValue<Float4> operator%=(Float4 &lhs, RValue<Float4> rhs)
6001         {
6002                 return lhs = lhs % rhs;
6003         }
6004
6005         RValue<Float4> operator+(RValue<Float4> val)
6006         {
6007                 return val;
6008         }
6009
6010         RValue<Float4> operator-(RValue<Float4> val)
6011         {
6012                 return RValue<Float4>(Nucleus::createFNeg(val.value));
6013         }
6014
6015         RValue<Float4> Abs(RValue<Float4> x)
6016         {
6017                 Value *vector = Nucleus::createBitCast(x.value, Int4::getType());
6018                 int64_t constantVector[4] = {0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF};
6019                 Value *result = Nucleus::createAnd(vector, V(Nucleus::createConstantVector(constantVector, Int4::getType())));
6020
6021                 return As<Float4>(result);
6022         }
6023
6024         RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
6025         {
6026                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
6027                 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Ule, condition, x.value, y.value);
6028                 ::basicBlock->appendInst(cmp);
6029
6030                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
6031                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
6032                 ::basicBlock->appendInst(select);
6033
6034                 return RValue<Float4>(V(result));
6035         }
6036
6037         RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
6038         {
6039                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
6040                 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Ugt, condition, x.value, y.value);
6041                 ::basicBlock->appendInst(cmp);
6042
6043                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
6044                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
6045                 ::basicBlock->appendInst(select);
6046
6047                 return RValue<Float4>(V(result));
6048         }
6049
6050         RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
6051         {
6052                 return Float4(1.0f) / x;
6053         }
6054
6055         RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
6056         {
6057                 return Rcp_pp(Sqrt(x));
6058         }
6059
6060         RValue<Float4> Sqrt(RValue<Float4> x)
6061         {
6062                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
6063                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
6064                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
6065                 auto sqrt = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
6066                 sqrt->addArg(x.value);
6067                 ::basicBlock->appendInst(sqrt);
6068
6069                 return RValue<Float4>(V(result));
6070         }
6071
6072         RValue<Float4> Insert(RValue<Float4> x, RValue<Float> element, int i)
6073         {
6074                 return RValue<Float4>(Nucleus::createInsertElement(x.value, element.value, i));
6075         }
6076
6077         RValue<Float> Extract(RValue<Float4> x, int i)
6078         {
6079                 return RValue<Float>(Nucleus::createExtractElement(x.value, Float::getType(), i));
6080         }
6081
6082         RValue<Float4> Swizzle(RValue<Float4> x, unsigned char select)
6083         {
6084                 return RValue<Float4>(createSwizzle4(x.value, select));
6085         }
6086
6087         RValue<Float4> ShuffleLowHigh(RValue<Float4> x, RValue<Float4> y, unsigned char imm)
6088         {
6089                 int shuffle[4] =
6090                 {
6091                         ((imm >> 0) & 0x03) + 0,
6092                         ((imm >> 2) & 0x03) + 0,
6093                         ((imm >> 4) & 0x03) + 4,
6094                         ((imm >> 6) & 0x03) + 4,
6095                 };
6096
6097                 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
6098         }
6099
6100         RValue<Float4> UnpackLow(RValue<Float4> x, RValue<Float4> y)
6101         {
6102                 int shuffle[4] = {0, 4, 1, 5};
6103                 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
6104         }
6105
6106         RValue<Float4> UnpackHigh(RValue<Float4> x, RValue<Float4> y)
6107         {
6108                 int shuffle[4] = {2, 6, 3, 7};
6109                 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
6110         }
6111
6112         RValue<Float4> Mask(Float4 &lhs, RValue<Float4> rhs, unsigned char select)
6113         {
6114                 Value *vector = lhs.loadValue();
6115                 Value *result = createMask4(vector, rhs.value, select);
6116                 lhs.storeValue(result);
6117
6118                 return RValue<Float4>(result);
6119         }
6120
6121         RValue<Int> SignMask(RValue<Float4> x)
6122         {
6123                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
6124                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
6125                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
6126                 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
6127                 movmsk->addArg(x.value);
6128                 ::basicBlock->appendInst(movmsk);
6129
6130                 return RValue<Int>(V(result));
6131         }
6132
6133         RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
6134         {
6135                 return RValue<Int4>(Nucleus::createFCmpOEQ(x.value, y.value));
6136         }
6137
6138         RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
6139         {
6140                 return RValue<Int4>(Nucleus::createFCmpOLT(x.value, y.value));
6141         }
6142
6143         RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
6144         {
6145                 return RValue<Int4>(Nucleus::createFCmpOLE(x.value, y.value));
6146         }
6147
6148         RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
6149         {
6150                 return RValue<Int4>(Nucleus::createFCmpONE(x.value, y.value));
6151         }
6152
6153         RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
6154         {
6155                 return RValue<Int4>(Nucleus::createFCmpOGE(x.value, y.value));
6156         }
6157
6158         RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
6159         {
6160                 return RValue<Int4>(Nucleus::createFCmpOGT(x.value, y.value));
6161         }
6162
6163         RValue<Float4> Round(RValue<Float4> x)
6164         {
6165                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
6166                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
6167                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
6168                 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
6169                 round->addArg(x.value);
6170                 round->addArg(::context->getConstantInt32(0));
6171                 ::basicBlock->appendInst(round);
6172
6173                 return RValue<Float4>(V(result));
6174         }
6175
6176         RValue<Float4> Trunc(RValue<Float4> x)
6177         {
6178                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
6179                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
6180                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
6181                 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
6182                 round->addArg(x.value);
6183                 round->addArg(::context->getConstantInt32(3));
6184                 ::basicBlock->appendInst(round);
6185
6186                 return RValue<Float4>(V(result));
6187         }
6188
6189         RValue<Float4> Frac(RValue<Float4> x)
6190         {
6191                 return x - Floor(x);
6192         }
6193
6194         RValue<Float4> Floor(RValue<Float4> x)
6195         {
6196                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
6197                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
6198                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
6199                 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
6200                 round->addArg(x.value);
6201                 round->addArg(::context->getConstantInt32(1));
6202                 ::basicBlock->appendInst(round);
6203
6204                 return RValue<Float4>(V(result));
6205         }
6206
6207         RValue<Float4> Ceil(RValue<Float4> x)
6208         {
6209                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
6210                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
6211                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
6212                 auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
6213                 round->addArg(x.value);
6214                 round->addArg(::context->getConstantInt32(2));
6215                 ::basicBlock->appendInst(round);
6216
6217                 return RValue<Float4>(V(result));
6218         }
6219
6220         Type *Float4::getType()
6221         {
6222                 return T(Ice::IceType_v4f32);
6223         }
6224
6225         RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, int offset)
6226         {
6227                 return lhs + RValue<Int>(Nucleus::createConstantInt(offset));
6228         }
6229
6230         RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<Int> offset)
6231         {
6232                 return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), offset.value));
6233         }
6234
6235         RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<UInt> offset)
6236         {
6237                 return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), offset.value));
6238         }
6239
6240         RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, int offset)
6241         {
6242                 return lhs = lhs + offset;
6243         }
6244
6245         RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, RValue<Int> offset)
6246         {
6247                 return lhs = lhs + offset;
6248         }
6249
6250         RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, RValue<UInt> offset)
6251         {
6252                 return lhs = lhs + offset;
6253         }
6254
6255         RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, int offset)
6256         {
6257                 return lhs + -offset;
6258         }
6259
6260         RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, RValue<Int> offset)
6261         {
6262                 return lhs + -offset;
6263         }
6264
6265         RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, RValue<UInt> offset)
6266         {
6267                 return lhs + -offset;
6268         }
6269
6270         RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, int offset)
6271         {
6272                 return lhs = lhs - offset;
6273         }
6274
6275         RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, RValue<Int> offset)
6276         {
6277                 return lhs = lhs - offset;
6278         }
6279
6280         RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, RValue<UInt> offset)
6281         {
6282                 return lhs = lhs - offset;
6283         }
6284
6285         void Return()
6286         {
6287                 Nucleus::createRetVoid();
6288                 Nucleus::setInsertBlock(Nucleus::createBasicBlock());
6289                 Nucleus::createUnreachable();
6290         }
6291
6292         void Return(RValue<Int> ret)
6293         {
6294                 Nucleus::createRet(ret.value);
6295                 Nucleus::setInsertBlock(Nucleus::createBasicBlock());
6296                 Nucleus::createUnreachable();
6297         }
6298
6299         bool branch(RValue<Bool> cmp, BasicBlock *bodyBB, BasicBlock *endBB)
6300         {
6301                 Nucleus::createCondBr(cmp.value, bodyBB, endBB);
6302                 Nucleus::setInsertBlock(bodyBB);
6303
6304                 return true;
6305         }
6306
6307         RValue<Long> Ticks()
6308         {
6309                 assert(false && "UNIMPLEMENTED"); return RValue<Long>(V(nullptr));
6310         }
6311 }