OSDN Git Service

Emit SSE2 compatible operations if SSE4.1 is not supported.
[android-x86/external-swiftshader.git] / src / Reactor / SubzeroReactor.cpp
1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "Nucleus.hpp"
16
17 #include "Reactor.hpp"
18 #include "Routine.hpp"
19
20 #include "Optimizer.hpp"
21
22 #include "src/IceTypes.h"
23 #include "src/IceCfg.h"
24 #include "src/IceELFStreamer.h"
25 #include "src/IceGlobalContext.h"
26 #include "src/IceCfgNode.h"
27 #include "src/IceELFObjectWriter.h"
28 #include "src/IceGlobalInits.h"
29
30 #include "llvm/Support/FileSystem.h"
31 #include "llvm/Support/raw_os_ostream.h"
32
33 #if defined(_WIN32)
34 #define WIN32_LEAN_AND_MEAN
35 #define NOMINMAX
36 #include <Windows.h>
37 #else
38 #include <sys/mman.h>
39 #endif
40
41 #include <mutex>
42 #include <limits>
43 #include <iostream>
44 #include <cassert>
45
46 namespace
47 {
48         Ice::GlobalContext *context = nullptr;
49         Ice::Cfg *function = nullptr;
50         Ice::CfgNode *basicBlock = nullptr;
51         Ice::CfgLocalAllocatorScope *allocator = nullptr;
52         sw::Routine *routine = nullptr;
53
54         std::mutex codegenMutex;
55
56         Ice::ELFFileStreamer *elfFile = nullptr;
57         Ice::Fdstream *out = nullptr;
58 }
59
60 namespace
61 {
62         class CPUID
63         {
64         public:
65                 const static bool SSE4_1;
66
67         private:
68                 static void cpuid(int registers[4], int info)
69                 {
70                         #if defined(_WIN32)
71                                 __cpuid(registers, info);
72                         #else
73                                 __asm volatile("cpuid": "=a" (registers[0]), "=b" (registers[1]), "=c" (registers[2]), "=d" (registers[3]): "a" (info));
74                         #endif
75                 }
76
77                 static bool detectSSE4_1()
78                 {
79                         int registers[4];
80                         cpuid(registers, 1);
81                         return (registers[2] & 0x00080000) != 0;
82                 }
83         };
84
85         const bool CPUID::SSE4_1 = CPUID::detectSSE4_1();
86 }
87
88 namespace sw
89 {
90         enum EmulatedType
91         {
92                 EmulatedShift = 16,
93                 EmulatedV2 = 2 << EmulatedShift,
94                 EmulatedV4 = 4 << EmulatedShift,
95                 EmulatedV8 = 8 << EmulatedShift,
96                 EmulatedBits = EmulatedV2 | EmulatedV4 | EmulatedV8,
97
98                 Type_v2i32 = Ice::IceType_v4i32 | EmulatedV2,
99                 Type_v4i16 = Ice::IceType_v8i16 | EmulatedV4,
100                 Type_v2i16 = Ice::IceType_v8i16 | EmulatedV2,
101                 Type_v8i8 =  Ice::IceType_v16i8 | EmulatedV8,
102                 Type_v4i8 =  Ice::IceType_v16i8 | EmulatedV4,
103                 Type_v2f32 = Ice::IceType_v4f32 | EmulatedV2,
104         };
105
106         class Value : public Ice::Operand {};
107         class SwitchCases : public Ice::InstSwitch {};
108         class BasicBlock : public Ice::CfgNode {};
109
110         Ice::Type T(Type *t)
111         {
112                 static_assert(Ice::IceType_NUM < EmulatedBits, "Ice::Type overlaps with our emulated types!");
113                 return (Ice::Type)(reinterpret_cast<std::intptr_t>(t) & ~EmulatedBits);
114         }
115
116         Type *T(Ice::Type t)
117         {
118                 return reinterpret_cast<Type*>(t);
119         }
120
121         Type *T(EmulatedType t)
122         {
123                 return reinterpret_cast<Type*>(t);
124         }
125
126         Value *V(Ice::Operand *v)
127         {
128                 return reinterpret_cast<Value*>(v);
129         }
130
131         BasicBlock *B(Ice::CfgNode *b)
132         {
133                 return reinterpret_cast<BasicBlock*>(b);
134         }
135
136         Optimization optimization[10] = {InstructionCombining, Disabled};
137
138         using ElfHeader = std::conditional<sizeof(void*) == 8, Elf64_Ehdr, Elf32_Ehdr>::type;
139         using SectionHeader = std::conditional<sizeof(void*) == 8, Elf64_Shdr, Elf32_Shdr>::type;
140
141         inline const SectionHeader *sectionHeader(const ElfHeader *elfHeader)
142         {
143                 return reinterpret_cast<const SectionHeader*>((intptr_t)elfHeader + elfHeader->e_shoff);
144         }
145
146         inline const SectionHeader *elfSection(const ElfHeader *elfHeader, int index)
147         {
148                 return &sectionHeader(elfHeader)[index];
149         }
150
151         static void *relocateSymbol(const ElfHeader *elfHeader, const Elf32_Rel &relocation, const SectionHeader &relocationTable)
152         {
153                 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
154
155                 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
156                 int32_t *patchSite = (int*)(address + relocation.r_offset);
157                 uint32_t index = relocation.getSymbol();
158                 int table = relocationTable.sh_link;
159                 void *symbolValue = nullptr;
160
161                 if(index != SHN_UNDEF)
162                 {
163                         if(table == SHN_UNDEF) return nullptr;
164                         const SectionHeader *symbolTable = elfSection(elfHeader, table);
165
166                         uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
167                         if(index >= symtab_entries)
168                         {
169                                 assert(index < symtab_entries && "Symbol Index out of range");
170                                 return nullptr;
171                         }
172
173                         intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
174                         Elf32_Sym &symbol = ((Elf32_Sym*)symbolAddress)[index];
175                         uint16_t section = symbol.st_shndx;
176
177                         if(section != SHN_UNDEF && section < SHN_LORESERVE)
178                         {
179                                 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
180                                 symbolValue = reinterpret_cast<void*>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
181                         }
182                         else
183                         {
184                                 return nullptr;
185                         }
186                 }
187
188                 switch(relocation.getType())
189                 {
190                 case R_386_NONE:
191                         // No relocation
192                         break;
193                 case R_386_32:
194                         *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite);
195                         break;
196         //      case R_386_PC32:
197         //              *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite - (intptr_t)patchSite);
198         //              break;
199                 default:
200                         assert(false && "Unsupported relocation type");
201                         return nullptr;
202                 }
203
204                 return symbolValue;
205         }
206
207         static void *relocateSymbol(const ElfHeader *elfHeader, const Elf64_Rela &relocation, const SectionHeader &relocationTable)
208         {
209                 const SectionHeader *target = elfSection(elfHeader, relocationTable.sh_info);
210
211                 intptr_t address = (intptr_t)elfHeader + target->sh_offset;
212                 int32_t *patchSite = (int*)(address + relocation.r_offset);
213                 uint32_t index = relocation.getSymbol();
214                 int table = relocationTable.sh_link;
215                 void *symbolValue = nullptr;
216
217                 if(index != SHN_UNDEF)
218                 {
219                         if(table == SHN_UNDEF) return nullptr;
220                         const SectionHeader *symbolTable = elfSection(elfHeader, table);
221
222                         uint32_t symtab_entries = symbolTable->sh_size / symbolTable->sh_entsize;
223                         if(index >= symtab_entries)
224                         {
225                                 assert(index < symtab_entries && "Symbol Index out of range");
226                                 return nullptr;
227                         }
228
229                         intptr_t symbolAddress = (intptr_t)elfHeader + symbolTable->sh_offset;
230                         Elf64_Sym &symbol = ((Elf64_Sym*)symbolAddress)[index];
231                         uint16_t section = symbol.st_shndx;
232
233                         if(section != SHN_UNDEF && section < SHN_LORESERVE)
234                         {
235                                 const SectionHeader *target = elfSection(elfHeader, symbol.st_shndx);
236                                 symbolValue = reinterpret_cast<void*>((intptr_t)elfHeader + symbol.st_value + target->sh_offset);
237                         }
238                         else
239                         {
240                                 return nullptr;
241                         }
242                 }
243
244                 switch(relocation.getType())
245                 {
246                 case R_X86_64_NONE:
247                         // No relocation
248                         break;
249                 case R_X86_64_64:
250                         *(int64_t*)patchSite = (int64_t)((intptr_t)symbolValue + *(int64_t*)patchSite) + relocation.r_addend;
251                         break;
252                 case R_X86_64_PC32:
253                         *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite - (intptr_t)patchSite) + relocation.r_addend;
254                         break;
255                 case R_X86_64_32S:
256                         *patchSite = (int32_t)((intptr_t)symbolValue + *patchSite) + relocation.r_addend;
257                         break;
258                 default:
259                         assert(false && "Unsupported relocation type");
260                         return nullptr;
261                 }
262
263                 return symbolValue;
264         }
265
266         void *loadImage(uint8_t *const elfImage)
267         {
268                 ElfHeader *elfHeader = (ElfHeader*)elfImage;
269
270                 if(!elfHeader->checkMagic())
271                 {
272                         return nullptr;
273                 }
274
275                 // Expect ELF bitness to match platform
276                 assert(sizeof(void*) == 8 ? elfHeader->getFileClass() == ELFCLASS64 : elfHeader->getFileClass() == ELFCLASS32);
277                 assert(sizeof(void*) == 8 ? elfHeader->e_machine == EM_X86_64 : elfHeader->e_machine == EM_386);
278
279                 SectionHeader *sectionHeader = (SectionHeader*)(elfImage + elfHeader->e_shoff);
280                 void *entry = nullptr;
281
282                 for(int i = 0; i < elfHeader->e_shnum; i++)
283                 {
284                         if(sectionHeader[i].sh_type == SHT_PROGBITS)
285                         {
286                                 if(sectionHeader[i].sh_flags & SHF_EXECINSTR)
287                                 {
288                                         entry = elfImage + sectionHeader[i].sh_offset;
289                                 }
290                         }
291                         else if(sectionHeader[i].sh_type == SHT_REL)
292                         {
293                                 assert(sizeof(void*) == 4 && "UNIMPLEMENTED");   // Only expected/implemented for 32-bit code
294
295                                 for(int index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
296                                 {
297                                         const Elf32_Rel &relocation = ((const Elf32_Rel*)(elfImage + sectionHeader[i].sh_offset))[index];
298                                         void *symbol = relocateSymbol(elfHeader, relocation, sectionHeader[i]);
299                                 }
300                         }
301                         else if(sectionHeader[i].sh_type == SHT_RELA)
302                         {
303                                 assert(sizeof(void*) == 8 && "UNIMPLEMENTED");   // Only expected/implemented for 64-bit code
304
305                                 for(int index = 0; index < sectionHeader[i].sh_size / sectionHeader[i].sh_entsize; index++)
306                                 {
307                                         const Elf64_Rela &relocation = ((const Elf64_Rela*)(elfImage + sectionHeader[i].sh_offset))[index];
308                                         void *symbol = relocateSymbol(elfHeader, relocation, sectionHeader[i]);
309                                 }
310                         }
311                 }
312
313                 return entry;
314         }
315
316         template<typename T>
317         struct ExecutableAllocator
318         {
319                 ExecutableAllocator() {};
320                 template<class U> ExecutableAllocator(const ExecutableAllocator<U> &other) {};
321
322                 using value_type = T;
323                 using size_type = std::size_t;
324
325                 T *allocate(size_type n)
326                 {
327                         #if defined(_WIN32)
328                                 return (T*)VirtualAlloc(NULL, sizeof(T) * n, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
329                         #else
330                                 return (T*)mmap(nullptr, sizeof(T) * n, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
331                         #endif
332                 }
333
334                 void deallocate(T *p, size_type n)
335                 {
336                         #if defined(_WIN32)
337                                 VirtualFree(p, 0, MEM_RELEASE);
338                         #else
339                                 munmap(p, sizeof(T) * n);
340                         #endif
341                 }
342         };
343
344         class ELFMemoryStreamer : public Ice::ELFStreamer, public Routine
345         {
346                 ELFMemoryStreamer(const ELFMemoryStreamer &) = delete;
347                 ELFMemoryStreamer &operator=(const ELFMemoryStreamer &) = delete;
348
349         public:
350                 ELFMemoryStreamer() : Routine(), entry(nullptr)
351                 {
352                         position = 0;
353                         buffer.reserve(0x1000);
354                 }
355
356                 virtual ~ELFMemoryStreamer()
357                 {
358                         #if defined(_WIN32)
359                                 if(buffer.size() != 0)
360                                 {
361                                         DWORD exeProtection;
362                                         VirtualProtect(&buffer[0], buffer.size(), oldProtection, &exeProtection);
363                                 }
364                         #endif
365                 }
366
367                 void write8(uint8_t Value) override
368                 {
369                         if(position == (uint64_t)buffer.size())
370                         {
371                                 buffer.push_back(Value);
372                                 position++;
373                         }
374                         else if(position < (uint64_t)buffer.size())
375                         {
376                                 buffer[position] = Value;
377                                 position++;
378                         }
379                         else assert(false && "UNIMPLEMENTED");
380                 }
381
382                 void writeBytes(llvm::StringRef Bytes) override
383                 {
384                         std::size_t oldSize = buffer.size();
385                         buffer.resize(oldSize + Bytes.size());
386                         memcpy(&buffer[oldSize], Bytes.begin(), Bytes.size());
387                         position += Bytes.size();
388                 }
389
390                 uint64_t tell() const override { return position; }
391
392                 void seek(uint64_t Off) override { position = Off; }
393
394                 const void *getEntry() override
395                 {
396                         if(!entry)
397                         {
398                                 #if defined(_WIN32)
399                                         VirtualProtect(&buffer[0], buffer.size(), PAGE_EXECUTE_READWRITE, &oldProtection);
400                                 #else
401                                         mprotect(&buffer[0], buffer.size(), PROT_READ | PROT_WRITE | PROT_EXEC);
402                                 #endif
403
404                                 position = std::numeric_limits<std::size_t>::max();   // Can't stream more data after this
405
406                                 entry = loadImage(&buffer[0]);
407                         }
408
409                         return entry;
410                 }
411
412         private:
413                 void *entry;
414                 std::vector<uint8_t, ExecutableAllocator<uint8_t>> buffer;
415                 std::size_t position;
416
417                 #if defined(_WIN32)
418                 DWORD oldProtection;
419                 #endif
420         };
421
422         Nucleus::Nucleus()
423         {
424                 ::codegenMutex.lock();   // Reactor is currently not thread safe
425
426                 Ice::ClFlags &Flags = Ice::ClFlags::Flags;
427                 Ice::ClFlags::getParsedClFlags(Flags);
428
429                 Flags.setTargetArch(sizeof(void*) == 8 ? Ice::Target_X8664 : Ice::Target_X8632);
430                 Flags.setOutFileType(Ice::FT_Elf);
431                 Flags.setOptLevel(Ice::Opt_2);
432                 Flags.setApplicationBinaryInterface(Ice::ABI_Platform);
433                 Flags.setTargetInstructionSet(CPUID::SSE4_1 ? Ice::X86InstructionSet_SSE4_1 : Ice::X86InstructionSet_SSE2);
434                 Flags.setVerbose(false ? Ice::IceV_All : Ice::IceV_None);
435
436                 static llvm::raw_os_ostream cout(std::cout);
437                 static llvm::raw_os_ostream cerr(std::cerr);
438
439                 if(false)   // Write out to a file
440                 {
441                         std::error_code errorCode;
442                         ::out = new Ice::Fdstream("out.o", errorCode, llvm::sys::fs::F_None);
443                         ::elfFile = new Ice::ELFFileStreamer(*out);
444                         ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfFile);
445                 }
446                 else
447                 {
448                         ELFMemoryStreamer *elfMemory = new ELFMemoryStreamer();
449                         ::context = new Ice::GlobalContext(&cout, &cout, &cerr, elfMemory);
450                         ::routine = elfMemory;
451                 }
452         }
453
454         Nucleus::~Nucleus()
455         {
456                 delete ::allocator;
457                 delete ::function;
458                 delete ::context;
459
460                 delete ::elfFile;
461                 delete ::out;
462
463                 ::codegenMutex.unlock();
464         }
465
466         Routine *Nucleus::acquireRoutine(const wchar_t *name, bool runOptimizations)
467         {
468                 if(basicBlock->getInsts().empty() || basicBlock->getInsts().back().getKind() != Ice::Inst::Ret)
469                 {
470                         createRetVoid();
471                 }
472
473                 std::wstring wideName(name);
474                 std::string asciiName(wideName.begin(), wideName.end());
475                 ::function->setFunctionName(Ice::GlobalString::createWithString(::context, asciiName));
476
477                 optimize();
478
479                 ::function->translate();
480                 assert(!::function->hasError());
481
482                 auto *globals = ::function->getGlobalInits().release();
483
484                 if(globals && !globals->empty())
485                 {
486                         ::context->getGlobals()->merge(globals);
487                 }
488
489                 ::context->emitFileHeader();
490                 ::function->emitIAS();
491                 auto assembler = ::function->releaseAssembler();
492                 auto objectWriter = ::context->getObjectWriter();
493                 assembler->alignFunction();
494                 objectWriter->writeFunctionCode(::function->getFunctionName(), false, assembler.get());
495                 ::context->lowerGlobals("last");
496                 ::context->lowerConstants();
497                 ::context->lowerJumpTables();
498                 objectWriter->setUndefinedSyms(::context->getConstantExternSyms());
499                 objectWriter->writeNonUserSections();
500
501                 return ::routine;
502         }
503
504         void Nucleus::optimize()
505         {
506                 sw::optimize(::function);
507         }
508
509         Value *Nucleus::allocateStackVariable(Type *t, int arraySize)
510         {
511                 Ice::Type type = T(t);
512                 int typeSize = Ice::typeWidthInBytes(type);
513                 int totalSize = typeSize * (arraySize ? arraySize : 1);
514
515                 auto bytes = Ice::ConstantInteger32::create(::context, type, totalSize);
516                 auto address = ::function->makeVariable(T(getPointerType(t)));
517                 auto alloca = Ice::InstAlloca::create(::function, address, bytes, typeSize);
518                 ::function->getEntryNode()->getInsts().push_front(alloca);
519
520                 return V(address);
521         }
522
523         BasicBlock *Nucleus::createBasicBlock()
524         {
525                 return B(::function->makeNode());
526         }
527
528         BasicBlock *Nucleus::getInsertBlock()
529         {
530                 return B(::basicBlock);
531         }
532
533         void Nucleus::setInsertBlock(BasicBlock *basicBlock)
534         {
535         //      assert(::basicBlock->getInsts().back().getTerminatorEdges().size() >= 0 && "Previous basic block must have a terminator");
536                 ::basicBlock = basicBlock;
537         }
538
539         void Nucleus::createFunction(Type *ReturnType, std::vector<Type*> &Params)
540         {
541                 uint32_t sequenceNumber = 0;
542                 ::function = Ice::Cfg::create(::context, sequenceNumber).release();
543                 ::allocator = new Ice::CfgLocalAllocatorScope(::function);
544
545                 for(Type *type : Params)
546                 {
547                         Ice::Variable *arg = ::function->makeVariable(T(type));
548                         ::function->addArg(arg);
549                 }
550
551                 Ice::CfgNode *node = ::function->makeNode();
552                 ::function->setEntryNode(node);
553                 ::basicBlock = node;
554         }
555
556         Value *Nucleus::getArgument(unsigned int index)
557         {
558                 return V(::function->getArgs()[index]);
559         }
560
561         void Nucleus::createRetVoid()
562         {
563                 Ice::InstRet *ret = Ice::InstRet::create(::function);
564                 ::basicBlock->appendInst(ret);
565         }
566
567         void Nucleus::createRet(Value *v)
568         {
569                 Ice::InstRet *ret = Ice::InstRet::create(::function, v);
570                 ::basicBlock->appendInst(ret);
571         }
572
573         void Nucleus::createBr(BasicBlock *dest)
574         {
575                 auto br = Ice::InstBr::create(::function, dest);
576                 ::basicBlock->appendInst(br);
577         }
578
579         void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
580         {
581                 auto br = Ice::InstBr::create(::function, cond, ifTrue, ifFalse);
582                 ::basicBlock->appendInst(br);
583         }
584
585         static Value *createArithmetic(Ice::InstArithmetic::OpKind op, Value *lhs, Value *rhs)
586         {
587                 assert(lhs->getType() == rhs->getType() || (llvm::isa<Ice::Constant>(rhs) && (op == Ice::InstArithmetic::Shl || Ice::InstArithmetic::Lshr || Ice::InstArithmetic::Ashr)));
588
589                 Ice::Variable *result = ::function->makeVariable(lhs->getType());
590                 Ice::InstArithmetic *arithmetic = Ice::InstArithmetic::create(::function, op, result, lhs, rhs);
591                 ::basicBlock->appendInst(arithmetic);
592
593                 return V(result);
594         }
595
596         Value *Nucleus::createAdd(Value *lhs, Value *rhs)
597         {
598                 return createArithmetic(Ice::InstArithmetic::Add, lhs, rhs);
599         }
600
601         Value *Nucleus::createSub(Value *lhs, Value *rhs)
602         {
603                 return createArithmetic(Ice::InstArithmetic::Sub, lhs, rhs);
604         }
605
606         Value *Nucleus::createMul(Value *lhs, Value *rhs)
607         {
608                 return createArithmetic(Ice::InstArithmetic::Mul, lhs, rhs);
609         }
610
611         Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
612         {
613                 return createArithmetic(Ice::InstArithmetic::Udiv, lhs, rhs);
614         }
615
616         Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
617         {
618                 return createArithmetic(Ice::InstArithmetic::Sdiv, lhs, rhs);
619         }
620
621         Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
622         {
623                 return createArithmetic(Ice::InstArithmetic::Fadd, lhs, rhs);
624         }
625
626         Value *Nucleus::createFSub(Value *lhs, Value *rhs)
627         {
628                 return createArithmetic(Ice::InstArithmetic::Fsub, lhs, rhs);
629         }
630
631         Value *Nucleus::createFMul(Value *lhs, Value *rhs)
632         {
633                 return createArithmetic(Ice::InstArithmetic::Fmul, lhs, rhs);
634         }
635
636         Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
637         {
638                 return createArithmetic(Ice::InstArithmetic::Fdiv, lhs, rhs);
639         }
640
641         Value *Nucleus::createURem(Value *lhs, Value *rhs)
642         {
643                 return createArithmetic(Ice::InstArithmetic::Urem, lhs, rhs);
644         }
645
646         Value *Nucleus::createSRem(Value *lhs, Value *rhs)
647         {
648                 return createArithmetic(Ice::InstArithmetic::Srem, lhs, rhs);
649         }
650
651         Value *Nucleus::createFRem(Value *lhs, Value *rhs)
652         {
653                 return createArithmetic(Ice::InstArithmetic::Frem, lhs, rhs);
654         }
655
656         Value *Nucleus::createShl(Value *lhs, Value *rhs)
657         {
658                 return createArithmetic(Ice::InstArithmetic::Shl, lhs, rhs);
659         }
660
661         Value *Nucleus::createLShr(Value *lhs, Value *rhs)
662         {
663                 return createArithmetic(Ice::InstArithmetic::Lshr, lhs, rhs);
664         }
665
666         Value *Nucleus::createAShr(Value *lhs, Value *rhs)
667         {
668                 return createArithmetic(Ice::InstArithmetic::Ashr, lhs, rhs);
669         }
670
671         Value *Nucleus::createAnd(Value *lhs, Value *rhs)
672         {
673                 return createArithmetic(Ice::InstArithmetic::And, lhs, rhs);
674         }
675
676         Value *Nucleus::createOr(Value *lhs, Value *rhs)
677         {
678                 return createArithmetic(Ice::InstArithmetic::Or, lhs, rhs);
679         }
680
681         Value *Nucleus::createXor(Value *lhs, Value *rhs)
682         {
683                 return createArithmetic(Ice::InstArithmetic::Xor, lhs, rhs);
684         }
685
686         static Ice::Variable *createAssign(Ice::Operand *constant)
687         {
688                 Ice::Variable *value = ::function->makeVariable(constant->getType());
689                 auto assign = Ice::InstAssign::create(::function, value, constant);
690                 ::basicBlock->appendInst(assign);
691
692                 return value;
693         }
694
695         Value *Nucleus::createNeg(Value *v)
696         {
697                 return createSub(createNullValue(T(v->getType())), v);
698         }
699
700         Value *Nucleus::createFNeg(Value *v)
701         {
702                 double c[4] = {-0.0, -0.0, -0.0, -0.0};
703                 Value *negativeZero = Ice::isVectorType(v->getType()) ?
704                                       createConstantVector(c, T(v->getType())) :
705                                       V(::context->getConstantFloat(-0.0f));
706
707                 return createFSub(negativeZero, v);
708         }
709
710         Value *Nucleus::createNot(Value *v)
711         {
712                 if(Ice::isScalarIntegerType(v->getType()))
713                 {
714                         return createXor(v, V(::context->getConstantInt(v->getType(), -1)));
715                 }
716                 else   // Vector
717                 {
718                         int64_t c[4] = {-1, -1, -1, -1};
719                         return createXor(v, createConstantVector(c, T(v->getType())));
720                 }
721         }
722
723         Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int align)
724         {
725                 int valueType = (int)reinterpret_cast<intptr_t>(type);
726                 Ice::Variable *result = ::function->makeVariable(T(type));
727
728                 if(valueType & EmulatedBits)
729                 {
730                         switch(valueType)
731                         {
732                         case Type_v4i8:
733                         case Type_v2i16:
734                                 {
735                                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::LoadSubVector, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
736                                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
737                                         auto load = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
738                                         load->addArg(ptr);
739                                         load->addArg(::context->getConstantInt32(4));
740                                         ::basicBlock->appendInst(load);
741                                 }
742                                 break;
743                         case Type_v2i32:
744                         case Type_v8i8:
745                         case Type_v4i16:
746                         case Type_v2f32:
747                                 {
748                                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::LoadSubVector, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
749                                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
750                                         auto load = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
751                                         load->addArg(ptr);
752                                         load->addArg(::context->getConstantInt32(8));
753                                         ::basicBlock->appendInst(load);
754                                 }
755                                 break;
756                         default: assert(false && "UNIMPLEMENTED");
757                         }
758                 }
759                 else
760                 {
761                         auto load = Ice::InstLoad::create(::function, result, ptr, align);
762                         ::basicBlock->appendInst(load);
763                 }
764
765                 return V(result);
766         }
767
768         Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int align)
769         {
770                 int valueType = (int)reinterpret_cast<intptr_t>(type);
771
772                 if(valueType & EmulatedBits)
773                 {
774                         switch(valueType)
775                         {
776                         case Type_v4i8:
777                         case Type_v2i16:
778                                 {
779                                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::StoreSubVector, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T};
780                                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
781                                         auto store = Ice::InstIntrinsicCall::create(::function, 3, nullptr, target, intrinsic);
782                                         store->addArg(value);
783                                         store->addArg(ptr);
784                                         store->addArg(::context->getConstantInt32(4));
785                                         ::basicBlock->appendInst(store);
786                                 }
787                                 break;
788                         case Type_v2i32:
789                         case Type_v8i8:
790                         case Type_v4i16:
791                         case Type_v2f32:
792                                 {
793                                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::StoreSubVector, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T};
794                                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
795                                         auto store = Ice::InstIntrinsicCall::create(::function, 3, nullptr, target, intrinsic);
796                                         store->addArg(value);
797                                         store->addArg(ptr);
798                                         store->addArg(::context->getConstantInt32(8));
799                                         ::basicBlock->appendInst(store);
800                                 }
801                                 break;
802                         default: assert(false && "UNIMPLEMENTED");
803                         }
804                 }
805                 else
806                 {
807                         assert(T(value->getType()) == type);
808
809                         auto store = Ice::InstStore::create(::function, value, ptr, align);
810                         ::basicBlock->appendInst(store);
811                 }
812
813                 return value;
814         }
815
816         Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index)
817         {
818                 assert(index->getType() == Ice::IceType_i32);
819
820                 if(auto *constant = llvm::dyn_cast<Ice::ConstantInteger32>(index))
821                 {
822                         int32_t offset = constant->getValue() * (int)Ice::typeWidthInBytes(T(type));
823
824                         if(offset == 0)
825                         {
826                                 return ptr;
827                         }
828
829                         return createAdd(ptr, createConstantInt(offset));
830                 }
831
832                 if(!Ice::isByteSizedType(T(type)))
833                 {
834                         index = createMul(index, createConstantInt((int)Ice::typeWidthInBytes(T(type))));
835                 }
836
837                 if(sizeof(void*) == 8)
838                 {
839                         index = createSExt(index, T(Ice::IceType_i64));
840                 }
841
842                 return createAdd(ptr, index);
843         }
844
845         Value *Nucleus::createAtomicAdd(Value *ptr, Value *value)
846         {
847                 assert(false && "UNIMPLEMENTED"); return nullptr;
848         }
849
850         static Value *createCast(Ice::InstCast::OpKind op, Value *v, Type *destType)
851         {
852                 if(v->getType() == T(destType))
853                 {
854                         return v;
855                 }
856
857                 Ice::Variable *result = ::function->makeVariable(T(destType));
858                 Ice::InstCast *cast = Ice::InstCast::create(::function, op, result, v);
859                 ::basicBlock->appendInst(cast);
860
861                 return V(result);
862         }
863
864         Value *Nucleus::createTrunc(Value *v, Type *destType)
865         {
866                 return createCast(Ice::InstCast::Trunc, v, destType);
867         }
868
869         Value *Nucleus::createZExt(Value *v, Type *destType)
870         {
871                 return createCast(Ice::InstCast::Zext, v, destType);
872         }
873
874         Value *Nucleus::createSExt(Value *v, Type *destType)
875         {
876                 return createCast(Ice::InstCast::Sext, v, destType);
877         }
878
879         Value *Nucleus::createFPToSI(Value *v, Type *destType)
880         {
881                 return createCast(Ice::InstCast::Fptosi, v, destType);
882         }
883
884         Value *Nucleus::createSIToFP(Value *v, Type *destType)
885         {
886                 return createCast(Ice::InstCast::Sitofp, v, destType);
887         }
888
889         Value *Nucleus::createFPTrunc(Value *v, Type *destType)
890         {
891                 return createCast(Ice::InstCast::Fptrunc, v, destType);
892         }
893
894         Value *Nucleus::createFPExt(Value *v, Type *destType)
895         {
896                 return createCast(Ice::InstCast::Fpext, v, destType);
897         }
898
899         Value *Nucleus::createBitCast(Value *v, Type *destType)
900         {
901                 return createCast(Ice::InstCast::Bitcast, v, destType);
902         }
903
904         static Value *createIntCompare(Ice::InstIcmp::ICond condition, Value *lhs, Value *rhs)
905         {
906                 assert(lhs->getType() == rhs->getType());
907
908                 auto result = ::function->makeVariable(Ice::isScalarIntegerType(lhs->getType()) ? Ice::IceType_i1 : lhs->getType());
909                 auto cmp = Ice::InstIcmp::create(::function, condition, result, lhs, rhs);
910                 ::basicBlock->appendInst(cmp);
911
912                 return V(result);
913         }
914
915         Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
916         {
917                 return createIntCompare(Ice::InstIcmp::Eq, lhs, rhs);
918         }
919
920         Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
921         {
922                 return createIntCompare(Ice::InstIcmp::Ne, lhs, rhs);
923         }
924
925         Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
926         {
927                 return createIntCompare(Ice::InstIcmp::Ugt, lhs, rhs);
928         }
929
930         Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
931         {
932                 return createIntCompare(Ice::InstIcmp::Uge, lhs, rhs);
933         }
934
935         Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
936         {
937                 return createIntCompare(Ice::InstIcmp::Ult, lhs, rhs);
938         }
939
940         Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
941         {
942                 return createIntCompare(Ice::InstIcmp::Ule, lhs, rhs);
943         }
944
945         Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
946         {
947                 return createIntCompare(Ice::InstIcmp::Sgt, lhs, rhs);
948         }
949
950         Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
951         {
952                 return createIntCompare(Ice::InstIcmp::Sge, lhs, rhs);
953         }
954
955         Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
956         {
957                 return createIntCompare(Ice::InstIcmp::Slt, lhs, rhs);
958         }
959
960         Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
961         {
962                 return createIntCompare(Ice::InstIcmp::Sle, lhs, rhs);
963         }
964
965         static Value *createFloatCompare(Ice::InstFcmp::FCond condition, Value *lhs, Value *rhs)
966         {
967                 assert(lhs->getType() == rhs->getType());
968                 assert(Ice::isScalarFloatingType(lhs->getType()) || lhs->getType() == Ice::IceType_v4f32);
969
970                 auto result = ::function->makeVariable(Ice::isScalarFloatingType(lhs->getType()) ? Ice::IceType_i1 : Ice::IceType_v4i32);
971                 auto cmp = Ice::InstFcmp::create(::function, condition, result, lhs, rhs);
972                 ::basicBlock->appendInst(cmp);
973
974                 return V(result);
975         }
976
977         Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
978         {
979                 return createFloatCompare(Ice::InstFcmp::Oeq, lhs, rhs);
980         }
981
982         Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
983         {
984                 return createFloatCompare(Ice::InstFcmp::Ogt, lhs, rhs);
985         }
986
987         Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
988         {
989                 return createFloatCompare(Ice::InstFcmp::Oge, lhs, rhs);
990         }
991
992         Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
993         {
994                 return createFloatCompare(Ice::InstFcmp::Olt, lhs, rhs);
995         }
996
997         Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
998         {
999                 return createFloatCompare(Ice::InstFcmp::Ole, lhs, rhs);
1000         }
1001
1002         Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
1003         {
1004                 return createFloatCompare(Ice::InstFcmp::One, lhs, rhs);
1005         }
1006
1007         Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
1008         {
1009                 return createFloatCompare(Ice::InstFcmp::Ord, lhs, rhs);
1010         }
1011
1012         Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
1013         {
1014                 return createFloatCompare(Ice::InstFcmp::Uno, lhs, rhs);
1015         }
1016
1017         Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
1018         {
1019                 return createFloatCompare(Ice::InstFcmp::Ueq, lhs, rhs);
1020         }
1021
1022         Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
1023         {
1024                 return createFloatCompare(Ice::InstFcmp::Ugt, lhs, rhs);
1025         }
1026
1027         Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
1028         {
1029                 return createFloatCompare(Ice::InstFcmp::Uge, lhs, rhs);
1030         }
1031
1032         Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
1033         {
1034                 return createFloatCompare(Ice::InstFcmp::Ult, lhs, rhs);
1035         }
1036
1037         Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
1038         {
1039                 return createFloatCompare(Ice::InstFcmp::Ule, lhs, rhs);
1040         }
1041
1042         Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
1043         {
1044                 return createFloatCompare(Ice::InstFcmp::Une, lhs, rhs);
1045         }
1046
1047         Value *Nucleus::createExtractElement(Value *vector, Type *type, int index)
1048         {
1049                 auto result = ::function->makeVariable(T(type));
1050                 auto extract = Ice::InstExtractElement::create(::function, result, vector, ::context->getConstantInt32(index));
1051                 ::basicBlock->appendInst(extract);
1052
1053                 return V(result);
1054         }
1055
1056         Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
1057         {
1058                 auto result = ::function->makeVariable(vector->getType());
1059                 auto insert = Ice::InstInsertElement::create(::function, result, vector, element, ::context->getConstantInt32(index));
1060                 ::basicBlock->appendInst(insert);
1061
1062                 return V(result);
1063         }
1064
1065         Value *Nucleus::createShuffleVector(Value *V1, Value *V2, const int *select)
1066         {
1067                 assert(V1->getType() == V2->getType());
1068
1069                 int size = Ice::typeNumElements(V1->getType());
1070                 auto result = ::function->makeVariable(V1->getType());
1071                 auto shuffle = Ice::InstShuffleVector::create(::function, result, V1, V2);
1072
1073                 for(int i = 0; i < size; i++)
1074                 {
1075                         shuffle->addIndex(llvm::cast<Ice::ConstantInteger32>(::context->getConstantInt32(select[i])));
1076                 }
1077
1078                 ::basicBlock->appendInst(shuffle);
1079
1080                 return V(result);
1081         }
1082
1083         Value *Nucleus::createSelect(Value *C, Value *ifTrue, Value *ifFalse)
1084         {
1085                 assert(ifTrue->getType() == ifFalse->getType());
1086
1087                 auto result = ::function->makeVariable(ifTrue->getType());
1088                 auto *select = Ice::InstSelect::create(::function, result, C, ifTrue, ifFalse);
1089                 ::basicBlock->appendInst(select);
1090
1091                 return V(result);
1092         }
1093
1094         SwitchCases *Nucleus::createSwitch(Value *control, BasicBlock *defaultBranch, unsigned numCases)
1095         {
1096                 auto switchInst = Ice::InstSwitch::create(::function, numCases, control, defaultBranch);
1097                 ::basicBlock->appendInst(switchInst);
1098
1099                 return reinterpret_cast<SwitchCases*>(switchInst);
1100         }
1101
1102         void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch)
1103         {
1104                 switchCases->addBranch(label, label, branch);
1105         }
1106
1107         void Nucleus::createUnreachable()
1108         {
1109                 Ice::InstUnreachable *unreachable = Ice::InstUnreachable::create(::function);
1110                 ::basicBlock->appendInst(unreachable);
1111         }
1112
1113         static Value *createSwizzle4(Value *val, unsigned char select)
1114         {
1115                 int swizzle[4] =
1116                 {
1117                         (select >> 0) & 0x03,
1118                         (select >> 2) & 0x03,
1119                         (select >> 4) & 0x03,
1120                         (select >> 6) & 0x03,
1121                 };
1122
1123                 return Nucleus::createShuffleVector(val, val, swizzle);
1124         }
1125
1126         static Value *createMask4(Value *lhs, Value *rhs, unsigned char select)
1127         {
1128                 int64_t mask[4] = {0, 0, 0, 0};
1129
1130                 mask[(select >> 0) & 0x03] = -1;
1131                 mask[(select >> 2) & 0x03] = -1;
1132                 mask[(select >> 4) & 0x03] = -1;
1133                 mask[(select >> 6) & 0x03] = -1;
1134
1135                 Value *condition = Nucleus::createConstantVector(mask, T(Ice::IceType_v4i1));
1136                 Value *result = Nucleus::createSelect(condition, rhs, lhs);
1137
1138                 return result;
1139         }
1140
1141         Type *Nucleus::getPointerType(Type *ElementType)
1142         {
1143                 if(sizeof(void*) == 8)
1144                 {
1145                         return T(Ice::IceType_i64);
1146                 }
1147                 else
1148                 {
1149                         return T(Ice::IceType_i32);
1150                 }
1151         }
1152
1153         Value *Nucleus::createNullValue(Type *Ty)
1154         {
1155                 if(Ice::isVectorType(T(Ty)))
1156                 {
1157                         int64_t c[4] = {0, 0, 0, 0};
1158                         return createConstantVector(c, Ty);
1159                 }
1160                 else
1161                 {
1162                         return V(::context->getConstantZero(T(Ty)));
1163                 }
1164         }
1165
1166         Value *Nucleus::createConstantLong(int64_t i)
1167         {
1168                 return V(::context->getConstantInt64(i));
1169         }
1170
1171         Value *Nucleus::createConstantInt(int i)
1172         {
1173                 return V(::context->getConstantInt32(i));
1174         }
1175
1176         Value *Nucleus::createConstantInt(unsigned int i)
1177         {
1178                 return V(::context->getConstantInt32(i));
1179         }
1180
1181         Value *Nucleus::createConstantBool(bool b)
1182         {
1183                 return V(::context->getConstantInt1(b));
1184         }
1185
1186         Value *Nucleus::createConstantByte(signed char i)
1187         {
1188                 return V(::context->getConstantInt8(i));
1189         }
1190
1191         Value *Nucleus::createConstantByte(unsigned char i)
1192         {
1193                 return V(::context->getConstantInt8(i));
1194         }
1195
1196         Value *Nucleus::createConstantShort(short i)
1197         {
1198                 return V(::context->getConstantInt16(i));
1199         }
1200
1201         Value *Nucleus::createConstantShort(unsigned short i)
1202         {
1203                 return V(::context->getConstantInt16(i));
1204         }
1205
1206         Value *Nucleus::createConstantFloat(float x)
1207         {
1208                 return V(::context->getConstantFloat(x));
1209         }
1210
1211         Value *Nucleus::createNullPointer(Type *Ty)
1212         {
1213                 return createNullValue(T(sizeof(void*) == 8 ? Ice::IceType_i64 : Ice::IceType_i32));
1214         }
1215
1216         Value *Nucleus::createConstantVector(const int64_t *constants, Type *type)
1217         {
1218                 const int vectorSize = 16;
1219                 assert(Ice::typeWidthInBytes(T(type)) == vectorSize);
1220                 const int alignment = vectorSize;
1221                 auto globalPool = ::function->getGlobalPool();
1222
1223                 const int64_t *i = constants;
1224                 const double *f = reinterpret_cast<const double*>(constants);
1225                 Ice::VariableDeclaration::DataInitializer *dataInitializer = nullptr;
1226
1227                 switch((int)reinterpret_cast<intptr_t>(type))
1228                 {
1229                 case Ice::IceType_v4i32:
1230                 case Ice::IceType_v4i1:
1231                         {
1232                                 const int initializer[4] = {(int)i[0], (int)i[1], (int)i[2], (int)i[3]};
1233                                 static_assert(sizeof(initializer) == vectorSize, "!");
1234                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1235                         }
1236                         break;
1237                 case Ice::IceType_v4f32:
1238                         {
1239                                 const float initializer[4] = {(float)f[0], (float)f[1], (float)f[2], (float)f[3]};
1240                                 static_assert(sizeof(initializer) == vectorSize, "!");
1241                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1242                         }
1243                         break;
1244                 case Ice::IceType_v8i16:
1245                 case Ice::IceType_v8i1:
1246                         {
1247                                 const short initializer[8] = {(short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[4], (short)i[5], (short)i[6], (short)i[7]};
1248                                 static_assert(sizeof(initializer) == vectorSize, "!");
1249                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1250                         }
1251                         break;
1252                 case Ice::IceType_v16i8:
1253                 case Ice::IceType_v16i1:
1254                         {
1255                                 const char initializer[16] = {(char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[8], (char)i[9], (char)i[10], (char)i[11], (char)i[12], (char)i[13], (char)i[14], (char)i[15]};
1256                                 static_assert(sizeof(initializer) == vectorSize, "!");
1257                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1258                         }
1259                         break;
1260                 case Type_v2i32:
1261                         {
1262                                 const int initializer[4] = {(int)i[0], (int)i[1], (int)i[0], (int)i[1]};
1263                                 static_assert(sizeof(initializer) == vectorSize, "!");
1264                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1265                         }
1266                         break;
1267                 case Type_v2f32:
1268                         {
1269                                 const float initializer[4] = {(float)f[0], (float)f[1], (float)f[0], (float)f[1]};
1270                                 static_assert(sizeof(initializer) == vectorSize, "!");
1271                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1272                         }
1273                         break;
1274                 case Type_v4i16:
1275                         {
1276                                 const short initializer[8] = {(short)i[0], (short)i[1], (short)i[2], (short)i[3], (short)i[0], (short)i[1], (short)i[2], (short)i[3]};
1277                                 static_assert(sizeof(initializer) == vectorSize, "!");
1278                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1279                         }
1280                         break;
1281                 case Type_v8i8:
1282                         {
1283                                 const char initializer[16] = {(char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[4], (char)i[5], (char)i[6], (char)i[7]};
1284                                 static_assert(sizeof(initializer) == vectorSize, "!");
1285                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1286                         }
1287                         break;
1288                 case Type_v4i8:
1289                         {
1290                                 const char initializer[16] = {(char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3], (char)i[0], (char)i[1], (char)i[2], (char)i[3]};
1291                                 static_assert(sizeof(initializer) == vectorSize, "!");
1292                                 dataInitializer = Ice::VariableDeclaration::DataInitializer::create(globalPool, (const char*)initializer, vectorSize);
1293                         }
1294                         break;
1295                 default:
1296                         assert(false && "Unknown constant vector type" && type);
1297                 }
1298
1299                 auto name = Ice::GlobalString::createWithoutString(::context);
1300                 auto *variableDeclaration = Ice::VariableDeclaration::create(globalPool);
1301                 variableDeclaration->setName(name);
1302                 variableDeclaration->setAlignment(alignment);
1303                 variableDeclaration->setIsConstant(true);
1304                 variableDeclaration->addInitializer(dataInitializer);
1305
1306                 ::function->addGlobal(variableDeclaration);
1307
1308                 constexpr int32_t offset = 0;
1309                 Ice::Operand *ptr = ::context->getConstantSym(offset, name);
1310
1311                 Ice::Variable *result = ::function->makeVariable(T(type));
1312                 auto load = Ice::InstLoad::create(::function, result, ptr, alignment);
1313                 ::basicBlock->appendInst(load);
1314
1315                 return V(result);
1316         }
1317
1318         Value *Nucleus::createConstantVector(const double *constants, Type *type)
1319         {
1320                 return createConstantVector((const int64_t*)constants, type);
1321         }
1322
1323         Type *Void::getType()
1324         {
1325                 return T(Ice::IceType_void);
1326         }
1327
1328         Bool::Bool(Argument<Bool> argument)
1329         {
1330                 storeValue(argument.value);
1331         }
1332
1333         Bool::Bool(bool x)
1334         {
1335                 storeValue(Nucleus::createConstantBool(x));
1336         }
1337
1338         Bool::Bool(RValue<Bool> rhs)
1339         {
1340                 storeValue(rhs.value);
1341         }
1342
1343         Bool::Bool(const Bool &rhs)
1344         {
1345                 Value *value = rhs.loadValue();
1346                 storeValue(value);
1347         }
1348
1349         Bool::Bool(const Reference<Bool> &rhs)
1350         {
1351                 Value *value = rhs.loadValue();
1352                 storeValue(value);
1353         }
1354
1355         RValue<Bool> Bool::operator=(RValue<Bool> rhs)
1356         {
1357                 storeValue(rhs.value);
1358
1359                 return rhs;
1360         }
1361
1362         RValue<Bool> Bool::operator=(const Bool &rhs)
1363         {
1364                 Value *value = rhs.loadValue();
1365                 storeValue(value);
1366
1367                 return RValue<Bool>(value);
1368         }
1369
1370         RValue<Bool> Bool::operator=(const Reference<Bool> &rhs)
1371         {
1372                 Value *value = rhs.loadValue();
1373                 storeValue(value);
1374
1375                 return RValue<Bool>(value);
1376         }
1377
1378         RValue<Bool> operator!(RValue<Bool> val)
1379         {
1380                 return RValue<Bool>(Nucleus::createNot(val.value));
1381         }
1382
1383         RValue<Bool> operator&&(RValue<Bool> lhs, RValue<Bool> rhs)
1384         {
1385                 return RValue<Bool>(Nucleus::createAnd(lhs.value, rhs.value));
1386         }
1387
1388         RValue<Bool> operator||(RValue<Bool> lhs, RValue<Bool> rhs)
1389         {
1390                 return RValue<Bool>(Nucleus::createOr(lhs.value, rhs.value));
1391         }
1392
1393         Type *Bool::getType()
1394         {
1395                 return T(Ice::IceType_i1);
1396         }
1397
1398         Byte::Byte(Argument<Byte> argument)
1399         {
1400                 storeValue(argument.value);
1401         }
1402
1403         Byte::Byte(RValue<Int> cast)
1404         {
1405                 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
1406
1407                 storeValue(integer);
1408         }
1409
1410         Byte::Byte(RValue<UInt> cast)
1411         {
1412                 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
1413
1414                 storeValue(integer);
1415         }
1416
1417         Byte::Byte(RValue<UShort> cast)
1418         {
1419                 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
1420
1421                 storeValue(integer);
1422         }
1423
1424         Byte::Byte(int x)
1425         {
1426                 storeValue(Nucleus::createConstantByte((unsigned char)x));
1427         }
1428
1429         Byte::Byte(unsigned char x)
1430         {
1431                 storeValue(Nucleus::createConstantByte(x));
1432         }
1433
1434         Byte::Byte(RValue<Byte> rhs)
1435         {
1436                 storeValue(rhs.value);
1437         }
1438
1439         Byte::Byte(const Byte &rhs)
1440         {
1441                 Value *value = rhs.loadValue();
1442                 storeValue(value);
1443         }
1444
1445         Byte::Byte(const Reference<Byte> &rhs)
1446         {
1447                 Value *value = rhs.loadValue();
1448                 storeValue(value);
1449         }
1450
1451         RValue<Byte> Byte::operator=(RValue<Byte> rhs)
1452         {
1453                 storeValue(rhs.value);
1454
1455                 return rhs;
1456         }
1457
1458         RValue<Byte> Byte::operator=(const Byte &rhs)
1459         {
1460                 Value *value = rhs.loadValue();
1461                 storeValue(value);
1462
1463                 return RValue<Byte>(value);
1464         }
1465
1466         RValue<Byte> Byte::operator=(const Reference<Byte> &rhs)
1467         {
1468                 Value *value = rhs.loadValue();
1469                 storeValue(value);
1470
1471                 return RValue<Byte>(value);
1472         }
1473
1474         RValue<Byte> operator+(RValue<Byte> lhs, RValue<Byte> rhs)
1475         {
1476                 return RValue<Byte>(Nucleus::createAdd(lhs.value, rhs.value));
1477         }
1478
1479         RValue<Byte> operator-(RValue<Byte> lhs, RValue<Byte> rhs)
1480         {
1481                 return RValue<Byte>(Nucleus::createSub(lhs.value, rhs.value));
1482         }
1483
1484         RValue<Byte> operator*(RValue<Byte> lhs, RValue<Byte> rhs)
1485         {
1486                 return RValue<Byte>(Nucleus::createMul(lhs.value, rhs.value));
1487         }
1488
1489         RValue<Byte> operator/(RValue<Byte> lhs, RValue<Byte> rhs)
1490         {
1491                 return RValue<Byte>(Nucleus::createUDiv(lhs.value, rhs.value));
1492         }
1493
1494         RValue<Byte> operator%(RValue<Byte> lhs, RValue<Byte> rhs)
1495         {
1496                 return RValue<Byte>(Nucleus::createURem(lhs.value, rhs.value));
1497         }
1498
1499         RValue<Byte> operator&(RValue<Byte> lhs, RValue<Byte> rhs)
1500         {
1501                 return RValue<Byte>(Nucleus::createAnd(lhs.value, rhs.value));
1502         }
1503
1504         RValue<Byte> operator|(RValue<Byte> lhs, RValue<Byte> rhs)
1505         {
1506                 return RValue<Byte>(Nucleus::createOr(lhs.value, rhs.value));
1507         }
1508
1509         RValue<Byte> operator^(RValue<Byte> lhs, RValue<Byte> rhs)
1510         {
1511                 return RValue<Byte>(Nucleus::createXor(lhs.value, rhs.value));
1512         }
1513
1514         RValue<Byte> operator<<(RValue<Byte> lhs, RValue<Byte> rhs)
1515         {
1516                 return RValue<Byte>(Nucleus::createShl(lhs.value, rhs.value));
1517         }
1518
1519         RValue<Byte> operator>>(RValue<Byte> lhs, RValue<Byte> rhs)
1520         {
1521                 return RValue<Byte>(Nucleus::createLShr(lhs.value, rhs.value));
1522         }
1523
1524         RValue<Byte> operator+=(Byte &lhs, RValue<Byte> rhs)
1525         {
1526                 return lhs = lhs + rhs;
1527         }
1528
1529         RValue<Byte> operator-=(Byte &lhs, RValue<Byte> rhs)
1530         {
1531                 return lhs = lhs - rhs;
1532         }
1533
1534         RValue<Byte> operator*=(Byte &lhs, RValue<Byte> rhs)
1535         {
1536                 return lhs = lhs * rhs;
1537         }
1538
1539         RValue<Byte> operator/=(Byte &lhs, RValue<Byte> rhs)
1540         {
1541                 return lhs = lhs / rhs;
1542         }
1543
1544         RValue<Byte> operator%=(Byte &lhs, RValue<Byte> rhs)
1545         {
1546                 return lhs = lhs % rhs;
1547         }
1548
1549         RValue<Byte> operator&=(Byte &lhs, RValue<Byte> rhs)
1550         {
1551                 return lhs = lhs & rhs;
1552         }
1553
1554         RValue<Byte> operator|=(Byte &lhs, RValue<Byte> rhs)
1555         {
1556                 return lhs = lhs | rhs;
1557         }
1558
1559         RValue<Byte> operator^=(Byte &lhs, RValue<Byte> rhs)
1560         {
1561                 return lhs = lhs ^ rhs;
1562         }
1563
1564         RValue<Byte> operator<<=(Byte &lhs, RValue<Byte> rhs)
1565         {
1566                 return lhs = lhs << rhs;
1567         }
1568
1569         RValue<Byte> operator>>=(Byte &lhs, RValue<Byte> rhs)
1570         {
1571                 return lhs = lhs >> rhs;
1572         }
1573
1574         RValue<Byte> operator+(RValue<Byte> val)
1575         {
1576                 return val;
1577         }
1578
1579         RValue<Byte> operator-(RValue<Byte> val)
1580         {
1581                 return RValue<Byte>(Nucleus::createNeg(val.value));
1582         }
1583
1584         RValue<Byte> operator~(RValue<Byte> val)
1585         {
1586                 return RValue<Byte>(Nucleus::createNot(val.value));
1587         }
1588
1589         RValue<Byte> operator++(Byte &val, int)   // Post-increment
1590         {
1591                 RValue<Byte> res = val;
1592                 val += Byte(1);
1593                 return res;
1594         }
1595
1596         const Byte &operator++(Byte &val)   // Pre-increment
1597         {
1598                 val += Byte(1);
1599                 return val;
1600         }
1601
1602         RValue<Byte> operator--(Byte &val, int)   // Post-decrement
1603         {
1604                 RValue<Byte> res = val;
1605                 val -= Byte(1);
1606                 return res;
1607         }
1608
1609         const Byte &operator--(Byte &val)   // Pre-decrement
1610         {
1611                 val -= Byte(1);
1612                 return val;
1613         }
1614
1615         RValue<Bool> operator<(RValue<Byte> lhs, RValue<Byte> rhs)
1616         {
1617                 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
1618         }
1619
1620         RValue<Bool> operator<=(RValue<Byte> lhs, RValue<Byte> rhs)
1621         {
1622                 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
1623         }
1624
1625         RValue<Bool> operator>(RValue<Byte> lhs, RValue<Byte> rhs)
1626         {
1627                 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
1628         }
1629
1630         RValue<Bool> operator>=(RValue<Byte> lhs, RValue<Byte> rhs)
1631         {
1632                 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
1633         }
1634
1635         RValue<Bool> operator!=(RValue<Byte> lhs, RValue<Byte> rhs)
1636         {
1637                 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
1638         }
1639
1640         RValue<Bool> operator==(RValue<Byte> lhs, RValue<Byte> rhs)
1641         {
1642                 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
1643         }
1644
1645         Type *Byte::getType()
1646         {
1647                 return T(Ice::IceType_i8);
1648         }
1649
1650         SByte::SByte(Argument<SByte> argument)
1651         {
1652                 storeValue(argument.value);
1653         }
1654
1655         SByte::SByte(RValue<Int> cast)
1656         {
1657                 Value *integer = Nucleus::createTrunc(cast.value, SByte::getType());
1658
1659                 storeValue(integer);
1660         }
1661
1662         SByte::SByte(RValue<Short> cast)
1663         {
1664                 Value *integer = Nucleus::createTrunc(cast.value, SByte::getType());
1665
1666                 storeValue(integer);
1667         }
1668
1669         SByte::SByte(signed char x)
1670         {
1671                 storeValue(Nucleus::createConstantByte(x));
1672         }
1673
1674         SByte::SByte(RValue<SByte> rhs)
1675         {
1676                 storeValue(rhs.value);
1677         }
1678
1679         SByte::SByte(const SByte &rhs)
1680         {
1681                 Value *value = rhs.loadValue();
1682                 storeValue(value);
1683         }
1684
1685         SByte::SByte(const Reference<SByte> &rhs)
1686         {
1687                 Value *value = rhs.loadValue();
1688                 storeValue(value);
1689         }
1690
1691         RValue<SByte> SByte::operator=(RValue<SByte> rhs)
1692         {
1693                 storeValue(rhs.value);
1694
1695                 return rhs;
1696         }
1697
1698         RValue<SByte> SByte::operator=(const SByte &rhs)
1699         {
1700                 Value *value = rhs.loadValue();
1701                 storeValue(value);
1702
1703                 return RValue<SByte>(value);
1704         }
1705
1706         RValue<SByte> SByte::operator=(const Reference<SByte> &rhs)
1707         {
1708                 Value *value = rhs.loadValue();
1709                 storeValue(value);
1710
1711                 return RValue<SByte>(value);
1712         }
1713
1714         RValue<SByte> operator+(RValue<SByte> lhs, RValue<SByte> rhs)
1715         {
1716                 return RValue<SByte>(Nucleus::createAdd(lhs.value, rhs.value));
1717         }
1718
1719         RValue<SByte> operator-(RValue<SByte> lhs, RValue<SByte> rhs)
1720         {
1721                 return RValue<SByte>(Nucleus::createSub(lhs.value, rhs.value));
1722         }
1723
1724         RValue<SByte> operator*(RValue<SByte> lhs, RValue<SByte> rhs)
1725         {
1726                 return RValue<SByte>(Nucleus::createMul(lhs.value, rhs.value));
1727         }
1728
1729         RValue<SByte> operator/(RValue<SByte> lhs, RValue<SByte> rhs)
1730         {
1731                 return RValue<SByte>(Nucleus::createSDiv(lhs.value, rhs.value));
1732         }
1733
1734         RValue<SByte> operator%(RValue<SByte> lhs, RValue<SByte> rhs)
1735         {
1736                 return RValue<SByte>(Nucleus::createSRem(lhs.value, rhs.value));
1737         }
1738
1739         RValue<SByte> operator&(RValue<SByte> lhs, RValue<SByte> rhs)
1740         {
1741                 return RValue<SByte>(Nucleus::createAnd(lhs.value, rhs.value));
1742         }
1743
1744         RValue<SByte> operator|(RValue<SByte> lhs, RValue<SByte> rhs)
1745         {
1746                 return RValue<SByte>(Nucleus::createOr(lhs.value, rhs.value));
1747         }
1748
1749         RValue<SByte> operator^(RValue<SByte> lhs, RValue<SByte> rhs)
1750         {
1751                 return RValue<SByte>(Nucleus::createXor(lhs.value, rhs.value));
1752         }
1753
1754         RValue<SByte> operator<<(RValue<SByte> lhs, RValue<SByte> rhs)
1755         {
1756                 return RValue<SByte>(Nucleus::createShl(lhs.value, rhs.value));
1757         }
1758
1759         RValue<SByte> operator>>(RValue<SByte> lhs, RValue<SByte> rhs)
1760         {
1761                 return RValue<SByte>(Nucleus::createAShr(lhs.value, rhs.value));
1762         }
1763
1764         RValue<SByte> operator+=(SByte &lhs, RValue<SByte> rhs)
1765         {
1766                 return lhs = lhs + rhs;
1767         }
1768
1769         RValue<SByte> operator-=(SByte &lhs, RValue<SByte> rhs)
1770         {
1771                 return lhs = lhs - rhs;
1772         }
1773
1774         RValue<SByte> operator*=(SByte &lhs, RValue<SByte> rhs)
1775         {
1776                 return lhs = lhs * rhs;
1777         }
1778
1779         RValue<SByte> operator/=(SByte &lhs, RValue<SByte> rhs)
1780         {
1781                 return lhs = lhs / rhs;
1782         }
1783
1784         RValue<SByte> operator%=(SByte &lhs, RValue<SByte> rhs)
1785         {
1786                 return lhs = lhs % rhs;
1787         }
1788
1789         RValue<SByte> operator&=(SByte &lhs, RValue<SByte> rhs)
1790         {
1791                 return lhs = lhs & rhs;
1792         }
1793
1794         RValue<SByte> operator|=(SByte &lhs, RValue<SByte> rhs)
1795         {
1796                 return lhs = lhs | rhs;
1797         }
1798
1799         RValue<SByte> operator^=(SByte &lhs, RValue<SByte> rhs)
1800         {
1801                 return lhs = lhs ^ rhs;
1802         }
1803
1804         RValue<SByte> operator<<=(SByte &lhs, RValue<SByte> rhs)
1805         {
1806                 return lhs = lhs << rhs;
1807         }
1808
1809         RValue<SByte> operator>>=(SByte &lhs, RValue<SByte> rhs)
1810         {
1811                 return lhs = lhs >> rhs;
1812         }
1813
1814         RValue<SByte> operator+(RValue<SByte> val)
1815         {
1816                 return val;
1817         }
1818
1819         RValue<SByte> operator-(RValue<SByte> val)
1820         {
1821                 return RValue<SByte>(Nucleus::createNeg(val.value));
1822         }
1823
1824         RValue<SByte> operator~(RValue<SByte> val)
1825         {
1826                 return RValue<SByte>(Nucleus::createNot(val.value));
1827         }
1828
1829         RValue<SByte> operator++(SByte &val, int)   // Post-increment
1830         {
1831                 RValue<SByte> res = val;
1832                 val += SByte(1);
1833                 return res;
1834         }
1835
1836         const SByte &operator++(SByte &val)   // Pre-increment
1837         {
1838                 val += SByte(1);
1839                 return val;
1840         }
1841
1842         RValue<SByte> operator--(SByte &val, int)   // Post-decrement
1843         {
1844                 RValue<SByte> res = val;
1845                 val -= SByte(1);
1846                 return res;
1847         }
1848
1849         const SByte &operator--(SByte &val)   // Pre-decrement
1850         {
1851                 val -= SByte(1);
1852                 return val;
1853         }
1854
1855         RValue<Bool> operator<(RValue<SByte> lhs, RValue<SByte> rhs)
1856         {
1857                 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
1858         }
1859
1860         RValue<Bool> operator<=(RValue<SByte> lhs, RValue<SByte> rhs)
1861         {
1862                 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
1863         }
1864
1865         RValue<Bool> operator>(RValue<SByte> lhs, RValue<SByte> rhs)
1866         {
1867                 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
1868         }
1869
1870         RValue<Bool> operator>=(RValue<SByte> lhs, RValue<SByte> rhs)
1871         {
1872                 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
1873         }
1874
1875         RValue<Bool> operator!=(RValue<SByte> lhs, RValue<SByte> rhs)
1876         {
1877                 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
1878         }
1879
1880         RValue<Bool> operator==(RValue<SByte> lhs, RValue<SByte> rhs)
1881         {
1882                 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
1883         }
1884
1885         Type *SByte::getType()
1886         {
1887                 return T(Ice::IceType_i8);
1888         }
1889
1890         Short::Short(Argument<Short> argument)
1891         {
1892                 storeValue(argument.value);
1893         }
1894
1895         Short::Short(RValue<Int> cast)
1896         {
1897                 Value *integer = Nucleus::createTrunc(cast.value, Short::getType());
1898
1899                 storeValue(integer);
1900         }
1901
1902         Short::Short(short x)
1903         {
1904                 storeValue(Nucleus::createConstantShort(x));
1905         }
1906
1907         Short::Short(RValue<Short> rhs)
1908         {
1909                 storeValue(rhs.value);
1910         }
1911
1912         Short::Short(const Short &rhs)
1913         {
1914                 Value *value = rhs.loadValue();
1915                 storeValue(value);
1916         }
1917
1918         Short::Short(const Reference<Short> &rhs)
1919         {
1920                 Value *value = rhs.loadValue();
1921                 storeValue(value);
1922         }
1923
1924         RValue<Short> Short::operator=(RValue<Short> rhs)
1925         {
1926                 storeValue(rhs.value);
1927
1928                 return rhs;
1929         }
1930
1931         RValue<Short> Short::operator=(const Short &rhs)
1932         {
1933                 Value *value = rhs.loadValue();
1934                 storeValue(value);
1935
1936                 return RValue<Short>(value);
1937         }
1938
1939         RValue<Short> Short::operator=(const Reference<Short> &rhs)
1940         {
1941                 Value *value = rhs.loadValue();
1942                 storeValue(value);
1943
1944                 return RValue<Short>(value);
1945         }
1946
1947         RValue<Short> operator+(RValue<Short> lhs, RValue<Short> rhs)
1948         {
1949                 return RValue<Short>(Nucleus::createAdd(lhs.value, rhs.value));
1950         }
1951
1952         RValue<Short> operator-(RValue<Short> lhs, RValue<Short> rhs)
1953         {
1954                 return RValue<Short>(Nucleus::createSub(lhs.value, rhs.value));
1955         }
1956
1957         RValue<Short> operator*(RValue<Short> lhs, RValue<Short> rhs)
1958         {
1959                 return RValue<Short>(Nucleus::createMul(lhs.value, rhs.value));
1960         }
1961
1962         RValue<Short> operator/(RValue<Short> lhs, RValue<Short> rhs)
1963         {
1964                 return RValue<Short>(Nucleus::createSDiv(lhs.value, rhs.value));
1965         }
1966
1967         RValue<Short> operator%(RValue<Short> lhs, RValue<Short> rhs)
1968         {
1969                 return RValue<Short>(Nucleus::createSRem(lhs.value, rhs.value));
1970         }
1971
1972         RValue<Short> operator&(RValue<Short> lhs, RValue<Short> rhs)
1973         {
1974                 return RValue<Short>(Nucleus::createAnd(lhs.value, rhs.value));
1975         }
1976
1977         RValue<Short> operator|(RValue<Short> lhs, RValue<Short> rhs)
1978         {
1979                 return RValue<Short>(Nucleus::createOr(lhs.value, rhs.value));
1980         }
1981
1982         RValue<Short> operator^(RValue<Short> lhs, RValue<Short> rhs)
1983         {
1984                 return RValue<Short>(Nucleus::createXor(lhs.value, rhs.value));
1985         }
1986
1987         RValue<Short> operator<<(RValue<Short> lhs, RValue<Short> rhs)
1988         {
1989                 return RValue<Short>(Nucleus::createShl(lhs.value, rhs.value));
1990         }
1991
1992         RValue<Short> operator>>(RValue<Short> lhs, RValue<Short> rhs)
1993         {
1994                 return RValue<Short>(Nucleus::createAShr(lhs.value, rhs.value));
1995         }
1996
1997         RValue<Short> operator+=(Short &lhs, RValue<Short> rhs)
1998         {
1999                 return lhs = lhs + rhs;
2000         }
2001
2002         RValue<Short> operator-=(Short &lhs, RValue<Short> rhs)
2003         {
2004                 return lhs = lhs - rhs;
2005         }
2006
2007         RValue<Short> operator*=(Short &lhs, RValue<Short> rhs)
2008         {
2009                 return lhs = lhs * rhs;
2010         }
2011
2012         RValue<Short> operator/=(Short &lhs, RValue<Short> rhs)
2013         {
2014                 return lhs = lhs / rhs;
2015         }
2016
2017         RValue<Short> operator%=(Short &lhs, RValue<Short> rhs)
2018         {
2019                 return lhs = lhs % rhs;
2020         }
2021
2022         RValue<Short> operator&=(Short &lhs, RValue<Short> rhs)
2023         {
2024                 return lhs = lhs & rhs;
2025         }
2026
2027         RValue<Short> operator|=(Short &lhs, RValue<Short> rhs)
2028         {
2029                 return lhs = lhs | rhs;
2030         }
2031
2032         RValue<Short> operator^=(Short &lhs, RValue<Short> rhs)
2033         {
2034                 return lhs = lhs ^ rhs;
2035         }
2036
2037         RValue<Short> operator<<=(Short &lhs, RValue<Short> rhs)
2038         {
2039                 return lhs = lhs << rhs;
2040         }
2041
2042         RValue<Short> operator>>=(Short &lhs, RValue<Short> rhs)
2043         {
2044                 return lhs = lhs >> rhs;
2045         }
2046
2047         RValue<Short> operator+(RValue<Short> val)
2048         {
2049                 return val;
2050         }
2051
2052         RValue<Short> operator-(RValue<Short> val)
2053         {
2054                 return RValue<Short>(Nucleus::createNeg(val.value));
2055         }
2056
2057         RValue<Short> operator~(RValue<Short> val)
2058         {
2059                 return RValue<Short>(Nucleus::createNot(val.value));
2060         }
2061
2062         RValue<Short> operator++(Short &val, int)   // Post-increment
2063         {
2064                 RValue<Short> res = val;
2065                 val += Short(1);
2066                 return res;
2067         }
2068
2069         const Short &operator++(Short &val)   // Pre-increment
2070         {
2071                 val += Short(1);
2072                 return val;
2073         }
2074
2075         RValue<Short> operator--(Short &val, int)   // Post-decrement
2076         {
2077                 RValue<Short> res = val;
2078                 val -= Short(1);
2079                 return res;
2080         }
2081
2082         const Short &operator--(Short &val)   // Pre-decrement
2083         {
2084                 val -= Short(1);
2085                 return val;
2086         }
2087
2088         RValue<Bool> operator<(RValue<Short> lhs, RValue<Short> rhs)
2089         {
2090                 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
2091         }
2092
2093         RValue<Bool> operator<=(RValue<Short> lhs, RValue<Short> rhs)
2094         {
2095                 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
2096         }
2097
2098         RValue<Bool> operator>(RValue<Short> lhs, RValue<Short> rhs)
2099         {
2100                 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
2101         }
2102
2103         RValue<Bool> operator>=(RValue<Short> lhs, RValue<Short> rhs)
2104         {
2105                 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
2106         }
2107
2108         RValue<Bool> operator!=(RValue<Short> lhs, RValue<Short> rhs)
2109         {
2110                 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
2111         }
2112
2113         RValue<Bool> operator==(RValue<Short> lhs, RValue<Short> rhs)
2114         {
2115                 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
2116         }
2117
2118         Type *Short::getType()
2119         {
2120                 return T(Ice::IceType_i16);
2121         }
2122
2123         UShort::UShort(Argument<UShort> argument)
2124         {
2125                 storeValue(argument.value);
2126         }
2127
2128         UShort::UShort(RValue<UInt> cast)
2129         {
2130                 Value *integer = Nucleus::createTrunc(cast.value, UShort::getType());
2131
2132                 storeValue(integer);
2133         }
2134
2135         UShort::UShort(RValue<Int> cast)
2136         {
2137                 Value *integer = Nucleus::createTrunc(cast.value, UShort::getType());
2138
2139                 storeValue(integer);
2140         }
2141
2142         UShort::UShort(unsigned short x)
2143         {
2144                 storeValue(Nucleus::createConstantShort(x));
2145         }
2146
2147         UShort::UShort(RValue<UShort> rhs)
2148         {
2149                 storeValue(rhs.value);
2150         }
2151
2152         UShort::UShort(const UShort &rhs)
2153         {
2154                 Value *value = rhs.loadValue();
2155                 storeValue(value);
2156         }
2157
2158         UShort::UShort(const Reference<UShort> &rhs)
2159         {
2160                 Value *value = rhs.loadValue();
2161                 storeValue(value);
2162         }
2163
2164         RValue<UShort> UShort::operator=(RValue<UShort> rhs)
2165         {
2166                 storeValue(rhs.value);
2167
2168                 return rhs;
2169         }
2170
2171         RValue<UShort> UShort::operator=(const UShort &rhs)
2172         {
2173                 Value *value = rhs.loadValue();
2174                 storeValue(value);
2175
2176                 return RValue<UShort>(value);
2177         }
2178
2179         RValue<UShort> UShort::operator=(const Reference<UShort> &rhs)
2180         {
2181                 Value *value = rhs.loadValue();
2182                 storeValue(value);
2183
2184                 return RValue<UShort>(value);
2185         }
2186
2187         RValue<UShort> operator+(RValue<UShort> lhs, RValue<UShort> rhs)
2188         {
2189                 return RValue<UShort>(Nucleus::createAdd(lhs.value, rhs.value));
2190         }
2191
2192         RValue<UShort> operator-(RValue<UShort> lhs, RValue<UShort> rhs)
2193         {
2194                 return RValue<UShort>(Nucleus::createSub(lhs.value, rhs.value));
2195         }
2196
2197         RValue<UShort> operator*(RValue<UShort> lhs, RValue<UShort> rhs)
2198         {
2199                 return RValue<UShort>(Nucleus::createMul(lhs.value, rhs.value));
2200         }
2201
2202         RValue<UShort> operator/(RValue<UShort> lhs, RValue<UShort> rhs)
2203         {
2204                 return RValue<UShort>(Nucleus::createUDiv(lhs.value, rhs.value));
2205         }
2206
2207         RValue<UShort> operator%(RValue<UShort> lhs, RValue<UShort> rhs)
2208         {
2209                 return RValue<UShort>(Nucleus::createURem(lhs.value, rhs.value));
2210         }
2211
2212         RValue<UShort> operator&(RValue<UShort> lhs, RValue<UShort> rhs)
2213         {
2214                 return RValue<UShort>(Nucleus::createAnd(lhs.value, rhs.value));
2215         }
2216
2217         RValue<UShort> operator|(RValue<UShort> lhs, RValue<UShort> rhs)
2218         {
2219                 return RValue<UShort>(Nucleus::createOr(lhs.value, rhs.value));
2220         }
2221
2222         RValue<UShort> operator^(RValue<UShort> lhs, RValue<UShort> rhs)
2223         {
2224                 return RValue<UShort>(Nucleus::createXor(lhs.value, rhs.value));
2225         }
2226
2227         RValue<UShort> operator<<(RValue<UShort> lhs, RValue<UShort> rhs)
2228         {
2229                 return RValue<UShort>(Nucleus::createShl(lhs.value, rhs.value));
2230         }
2231
2232         RValue<UShort> operator>>(RValue<UShort> lhs, RValue<UShort> rhs)
2233         {
2234                 return RValue<UShort>(Nucleus::createLShr(lhs.value, rhs.value));
2235         }
2236
2237         RValue<UShort> operator+=(UShort &lhs, RValue<UShort> rhs)
2238         {
2239                 return lhs = lhs + rhs;
2240         }
2241
2242         RValue<UShort> operator-=(UShort &lhs, RValue<UShort> rhs)
2243         {
2244                 return lhs = lhs - rhs;
2245         }
2246
2247         RValue<UShort> operator*=(UShort &lhs, RValue<UShort> rhs)
2248         {
2249                 return lhs = lhs * rhs;
2250         }
2251
2252         RValue<UShort> operator/=(UShort &lhs, RValue<UShort> rhs)
2253         {
2254                 return lhs = lhs / rhs;
2255         }
2256
2257         RValue<UShort> operator%=(UShort &lhs, RValue<UShort> rhs)
2258         {
2259                 return lhs = lhs % rhs;
2260         }
2261
2262         RValue<UShort> operator&=(UShort &lhs, RValue<UShort> rhs)
2263         {
2264                 return lhs = lhs & rhs;
2265         }
2266
2267         RValue<UShort> operator|=(UShort &lhs, RValue<UShort> rhs)
2268         {
2269                 return lhs = lhs | rhs;
2270         }
2271
2272         RValue<UShort> operator^=(UShort &lhs, RValue<UShort> rhs)
2273         {
2274                 return lhs = lhs ^ rhs;
2275         }
2276
2277         RValue<UShort> operator<<=(UShort &lhs, RValue<UShort> rhs)
2278         {
2279                 return lhs = lhs << rhs;
2280         }
2281
2282         RValue<UShort> operator>>=(UShort &lhs, RValue<UShort> rhs)
2283         {
2284                 return lhs = lhs >> rhs;
2285         }
2286
2287         RValue<UShort> operator+(RValue<UShort> val)
2288         {
2289                 return val;
2290         }
2291
2292         RValue<UShort> operator-(RValue<UShort> val)
2293         {
2294                 return RValue<UShort>(Nucleus::createNeg(val.value));
2295         }
2296
2297         RValue<UShort> operator~(RValue<UShort> val)
2298         {
2299                 return RValue<UShort>(Nucleus::createNot(val.value));
2300         }
2301
2302         RValue<UShort> operator++(UShort &val, int)   // Post-increment
2303         {
2304                 RValue<UShort> res = val;
2305                 val += UShort(1);
2306                 return res;
2307         }
2308
2309         const UShort &operator++(UShort &val)   // Pre-increment
2310         {
2311                 val += UShort(1);
2312                 return val;
2313         }
2314
2315         RValue<UShort> operator--(UShort &val, int)   // Post-decrement
2316         {
2317                 RValue<UShort> res = val;
2318                 val -= UShort(1);
2319                 return res;
2320         }
2321
2322         const UShort &operator--(UShort &val)   // Pre-decrement
2323         {
2324                 val -= UShort(1);
2325                 return val;
2326         }
2327
2328         RValue<Bool> operator<(RValue<UShort> lhs, RValue<UShort> rhs)
2329         {
2330                 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
2331         }
2332
2333         RValue<Bool> operator<=(RValue<UShort> lhs, RValue<UShort> rhs)
2334         {
2335                 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
2336         }
2337
2338         RValue<Bool> operator>(RValue<UShort> lhs, RValue<UShort> rhs)
2339         {
2340                 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
2341         }
2342
2343         RValue<Bool> operator>=(RValue<UShort> lhs, RValue<UShort> rhs)
2344         {
2345                 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
2346         }
2347
2348         RValue<Bool> operator!=(RValue<UShort> lhs, RValue<UShort> rhs)
2349         {
2350                 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
2351         }
2352
2353         RValue<Bool> operator==(RValue<UShort> lhs, RValue<UShort> rhs)
2354         {
2355                 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
2356         }
2357
2358         Type *UShort::getType()
2359         {
2360                 return T(Ice::IceType_i16);
2361         }
2362
2363         Byte4::Byte4(RValue<Byte8> cast)
2364         {
2365                 storeValue(Nucleus::createBitCast(cast.value, getType()));
2366         }
2367
2368         Byte4::Byte4(const Reference<Byte4> &rhs)
2369         {
2370                 Value *value = rhs.loadValue();
2371                 storeValue(value);
2372         }
2373
2374         Type *Byte4::getType()
2375         {
2376                 return T(Type_v4i8);
2377         }
2378
2379         Type *SByte4::getType()
2380         {
2381                 return T(Type_v4i8);
2382         }
2383
2384         Byte8::Byte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7)
2385         {
2386                 int64_t constantVector[8] = {x0, x1, x2, x3, x4, x5, x6, x7};
2387                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
2388         }
2389
2390         Byte8::Byte8(RValue<Byte8> rhs)
2391         {
2392                 storeValue(rhs.value);
2393         }
2394
2395         Byte8::Byte8(const Byte8 &rhs)
2396         {
2397                 Value *value = rhs.loadValue();
2398                 storeValue(value);
2399         }
2400
2401         Byte8::Byte8(const Reference<Byte8> &rhs)
2402         {
2403                 Value *value = rhs.loadValue();
2404                 storeValue(value);
2405         }
2406
2407         RValue<Byte8> Byte8::operator=(RValue<Byte8> rhs)
2408         {
2409                 storeValue(rhs.value);
2410
2411                 return rhs;
2412         }
2413
2414         RValue<Byte8> Byte8::operator=(const Byte8 &rhs)
2415         {
2416                 Value *value = rhs.loadValue();
2417                 storeValue(value);
2418
2419                 return RValue<Byte8>(value);
2420         }
2421
2422         RValue<Byte8> Byte8::operator=(const Reference<Byte8> &rhs)
2423         {
2424                 Value *value = rhs.loadValue();
2425                 storeValue(value);
2426
2427                 return RValue<Byte8>(value);
2428         }
2429
2430         RValue<Byte8> operator+(RValue<Byte8> lhs, RValue<Byte8> rhs)
2431         {
2432                 return RValue<Byte8>(Nucleus::createAdd(lhs.value, rhs.value));
2433         }
2434
2435         RValue<Byte8> operator-(RValue<Byte8> lhs, RValue<Byte8> rhs)
2436         {
2437                 return RValue<Byte8>(Nucleus::createSub(lhs.value, rhs.value));
2438         }
2439
2440 //      RValue<Byte8> operator*(RValue<Byte8> lhs, RValue<Byte8> rhs)
2441 //      {
2442 //              return RValue<Byte8>(Nucleus::createMul(lhs.value, rhs.value));
2443 //      }
2444
2445 //      RValue<Byte8> operator/(RValue<Byte8> lhs, RValue<Byte8> rhs)
2446 //      {
2447 //              return RValue<Byte8>(Nucleus::createUDiv(lhs.value, rhs.value));
2448 //      }
2449
2450 //      RValue<Byte8> operator%(RValue<Byte8> lhs, RValue<Byte8> rhs)
2451 //      {
2452 //              return RValue<Byte8>(Nucleus::createURem(lhs.value, rhs.value));
2453 //      }
2454
2455         RValue<Byte8> operator&(RValue<Byte8> lhs, RValue<Byte8> rhs)
2456         {
2457                 return RValue<Byte8>(Nucleus::createAnd(lhs.value, rhs.value));
2458         }
2459
2460         RValue<Byte8> operator|(RValue<Byte8> lhs, RValue<Byte8> rhs)
2461         {
2462                 return RValue<Byte8>(Nucleus::createOr(lhs.value, rhs.value));
2463         }
2464
2465         RValue<Byte8> operator^(RValue<Byte8> lhs, RValue<Byte8> rhs)
2466         {
2467                 return RValue<Byte8>(Nucleus::createXor(lhs.value, rhs.value));
2468         }
2469
2470 //      RValue<Byte8> operator<<(RValue<Byte8> lhs, unsigned char rhs)
2471 //      {
2472 //              return RValue<Byte8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2473 //      }
2474
2475 //      RValue<Byte8> operator>>(RValue<Byte8> lhs, unsigned char rhs)
2476 //      {
2477 //              return RValue<Byte8>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
2478 //      }
2479
2480         RValue<Byte8> operator+=(Byte8 &lhs, RValue<Byte8> rhs)
2481         {
2482                 return lhs = lhs + rhs;
2483         }
2484
2485         RValue<Byte8> operator-=(Byte8 &lhs, RValue<Byte8> rhs)
2486         {
2487                 return lhs = lhs - rhs;
2488         }
2489
2490 //      RValue<Byte8> operator*=(Byte8 &lhs, RValue<Byte8> rhs)
2491 //      {
2492 //              return lhs = lhs * rhs;
2493 //      }
2494
2495 //      RValue<Byte8> operator/=(Byte8 &lhs, RValue<Byte8> rhs)
2496 //      {
2497 //              return lhs = lhs / rhs;
2498 //      }
2499
2500 //      RValue<Byte8> operator%=(Byte8 &lhs, RValue<Byte8> rhs)
2501 //      {
2502 //              return lhs = lhs % rhs;
2503 //      }
2504
2505         RValue<Byte8> operator&=(Byte8 &lhs, RValue<Byte8> rhs)
2506         {
2507                 return lhs = lhs & rhs;
2508         }
2509
2510         RValue<Byte8> operator|=(Byte8 &lhs, RValue<Byte8> rhs)
2511         {
2512                 return lhs = lhs | rhs;
2513         }
2514
2515         RValue<Byte8> operator^=(Byte8 &lhs, RValue<Byte8> rhs)
2516         {
2517                 return lhs = lhs ^ rhs;
2518         }
2519
2520 //      RValue<Byte8> operator<<=(Byte8 &lhs, RValue<Byte8> rhs)
2521 //      {
2522 //              return lhs = lhs << rhs;
2523 //      }
2524
2525 //      RValue<Byte8> operator>>=(Byte8 &lhs, RValue<Byte8> rhs)
2526 //      {
2527 //              return lhs = lhs >> rhs;
2528 //      }
2529
2530 //      RValue<Byte8> operator+(RValue<Byte8> val)
2531 //      {
2532 //              return val;
2533 //      }
2534
2535 //      RValue<Byte8> operator-(RValue<Byte8> val)
2536 //      {
2537 //              return RValue<Byte8>(Nucleus::createNeg(val.value));
2538 //      }
2539
2540         RValue<Byte8> operator~(RValue<Byte8> val)
2541         {
2542                 return RValue<Byte8>(Nucleus::createNot(val.value));
2543         }
2544
2545         RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
2546         {
2547                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
2548                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2549                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2550                 auto paddusb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2551                 paddusb->addArg(x.value);
2552                 paddusb->addArg(y.value);
2553                 ::basicBlock->appendInst(paddusb);
2554
2555                 return RValue<Byte8>(V(result));
2556         }
2557
2558         RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
2559         {
2560                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
2561                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2562                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2563                 auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2564                 psubusw->addArg(x.value);
2565                 psubusw->addArg(y.value);
2566                 ::basicBlock->appendInst(psubusw);
2567
2568                 return RValue<Byte8>(V(result));
2569         }
2570
2571         RValue<Short4> Unpack(RValue<Byte4> x)
2572         {
2573                 int shuffle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7};   // Real type is v16i8
2574                 return RValue<Short4>(Nucleus::createShuffleVector(x.value, x.value, shuffle));
2575         }
2576
2577         RValue<Short4> UnpackLow(RValue<Byte8> x, RValue<Byte8> y)
2578         {
2579                 int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};   // Real type is v16i8
2580                 return RValue<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
2581         }
2582
2583         RValue<Short4> UnpackHigh(RValue<Byte8> x, RValue<Byte8> y)
2584         {
2585                 int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};   // Real type is v16i8
2586                 auto lowHigh = RValue<Byte16>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
2587                 return As<Short4>(Swizzle(As<Int4>(lowHigh), 0xEE));
2588         }
2589
2590         RValue<Int> SignMask(RValue<Byte8> x)
2591         {
2592                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
2593                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2594                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2595                 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
2596                 movmsk->addArg(x.value);
2597                 ::basicBlock->appendInst(movmsk);
2598
2599                 return RValue<Int>(V(result));
2600         }
2601
2602 //      RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
2603 //      {
2604 //              return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Ugt, x.value, y.value));
2605 //      }
2606
2607         RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
2608         {
2609                 return RValue<Byte8>(Nucleus::createICmpEQ(x.value, y.value));
2610         }
2611
2612         Type *Byte8::getType()
2613         {
2614                 return T(Type_v8i8);
2615         }
2616
2617         SByte8::SByte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7)
2618         {
2619                 int64_t constantVector[8] = { x0, x1, x2, x3, x4, x5, x6, x7 };
2620                 Value *vector = V(Nucleus::createConstantVector(constantVector, getType()));
2621
2622                 storeValue(Nucleus::createBitCast(vector, getType()));
2623         }
2624
2625         SByte8::SByte8(RValue<SByte8> rhs)
2626         {
2627                 storeValue(rhs.value);
2628         }
2629
2630         SByte8::SByte8(const SByte8 &rhs)
2631         {
2632                 Value *value = rhs.loadValue();
2633                 storeValue(value);
2634         }
2635
2636         SByte8::SByte8(const Reference<SByte8> &rhs)
2637         {
2638                 Value *value = rhs.loadValue();
2639                 storeValue(value);
2640         }
2641
2642         RValue<SByte8> SByte8::operator=(RValue<SByte8> rhs)
2643         {
2644                 storeValue(rhs.value);
2645
2646                 return rhs;
2647         }
2648
2649         RValue<SByte8> SByte8::operator=(const SByte8 &rhs)
2650         {
2651                 Value *value = rhs.loadValue();
2652                 storeValue(value);
2653
2654                 return RValue<SByte8>(value);
2655         }
2656
2657         RValue<SByte8> SByte8::operator=(const Reference<SByte8> &rhs)
2658         {
2659                 Value *value = rhs.loadValue();
2660                 storeValue(value);
2661
2662                 return RValue<SByte8>(value);
2663         }
2664
2665         RValue<SByte8> operator+(RValue<SByte8> lhs, RValue<SByte8> rhs)
2666         {
2667                 return RValue<SByte8>(Nucleus::createAdd(lhs.value, rhs.value));
2668         }
2669
2670         RValue<SByte8> operator-(RValue<SByte8> lhs, RValue<SByte8> rhs)
2671         {
2672                 return RValue<SByte8>(Nucleus::createSub(lhs.value, rhs.value));
2673         }
2674
2675 //      RValue<SByte8> operator*(RValue<SByte8> lhs, RValue<SByte8> rhs)
2676 //      {
2677 //              return RValue<SByte8>(Nucleus::createMul(lhs.value, rhs.value));
2678 //      }
2679
2680 //      RValue<SByte8> operator/(RValue<SByte8> lhs, RValue<SByte8> rhs)
2681 //      {
2682 //              return RValue<SByte8>(Nucleus::createSDiv(lhs.value, rhs.value));
2683 //      }
2684
2685 //      RValue<SByte8> operator%(RValue<SByte8> lhs, RValue<SByte8> rhs)
2686 //      {
2687 //              return RValue<SByte8>(Nucleus::createSRem(lhs.value, rhs.value));
2688 //      }
2689
2690         RValue<SByte8> operator&(RValue<SByte8> lhs, RValue<SByte8> rhs)
2691         {
2692                 return RValue<SByte8>(Nucleus::createAnd(lhs.value, rhs.value));
2693         }
2694
2695         RValue<SByte8> operator|(RValue<SByte8> lhs, RValue<SByte8> rhs)
2696         {
2697                 return RValue<SByte8>(Nucleus::createOr(lhs.value, rhs.value));
2698         }
2699
2700         RValue<SByte8> operator^(RValue<SByte8> lhs, RValue<SByte8> rhs)
2701         {
2702                 return RValue<SByte8>(Nucleus::createXor(lhs.value, rhs.value));
2703         }
2704
2705 //      RValue<SByte8> operator<<(RValue<SByte8> lhs, unsigned char rhs)
2706 //      {
2707 //              return RValue<SByte8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
2708 //      }
2709
2710 //      RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2711 //      {
2712 //              return RValue<SByte8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
2713 //      }
2714
2715         RValue<SByte8> operator+=(SByte8 &lhs, RValue<SByte8> rhs)
2716         {
2717                 return lhs = lhs + rhs;
2718         }
2719
2720         RValue<SByte8> operator-=(SByte8 &lhs, RValue<SByte8> rhs)
2721         {
2722                 return lhs = lhs - rhs;
2723         }
2724
2725 //      RValue<SByte8> operator*=(SByte8 &lhs, RValue<SByte8> rhs)
2726 //      {
2727 //              return lhs = lhs * rhs;
2728 //      }
2729
2730 //      RValue<SByte8> operator/=(SByte8 &lhs, RValue<SByte8> rhs)
2731 //      {
2732 //              return lhs = lhs / rhs;
2733 //      }
2734
2735 //      RValue<SByte8> operator%=(SByte8 &lhs, RValue<SByte8> rhs)
2736 //      {
2737 //              return lhs = lhs % rhs;
2738 //      }
2739
2740         RValue<SByte8> operator&=(SByte8 &lhs, RValue<SByte8> rhs)
2741         {
2742                 return lhs = lhs & rhs;
2743         }
2744
2745         RValue<SByte8> operator|=(SByte8 &lhs, RValue<SByte8> rhs)
2746         {
2747                 return lhs = lhs | rhs;
2748         }
2749
2750         RValue<SByte8> operator^=(SByte8 &lhs, RValue<SByte8> rhs)
2751         {
2752                 return lhs = lhs ^ rhs;
2753         }
2754
2755 //      RValue<SByte8> operator<<=(SByte8 &lhs, RValue<SByte8> rhs)
2756 //      {
2757 //              return lhs = lhs << rhs;
2758 //      }
2759
2760 //      RValue<SByte8> operator>>=(SByte8 &lhs, RValue<SByte8> rhs)
2761 //      {
2762 //              return lhs = lhs >> rhs;
2763 //      }
2764
2765 //      RValue<SByte8> operator+(RValue<SByte8> val)
2766 //      {
2767 //              return val;
2768 //      }
2769
2770 //      RValue<SByte8> operator-(RValue<SByte8> val)
2771 //      {
2772 //              return RValue<SByte8>(Nucleus::createNeg(val.value));
2773 //      }
2774
2775         RValue<SByte8> operator~(RValue<SByte8> val)
2776         {
2777                 return RValue<SByte8>(Nucleus::createNot(val.value));
2778         }
2779
2780         RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
2781         {
2782                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
2783                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2784                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2785                 auto paddsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2786                 paddsb->addArg(x.value);
2787                 paddsb->addArg(y.value);
2788                 ::basicBlock->appendInst(paddsb);
2789
2790                 return RValue<SByte8>(V(result));
2791         }
2792
2793         RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
2794         {
2795                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
2796                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2797                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2798                 auto psubsb = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
2799                 psubsb->addArg(x.value);
2800                 psubsb->addArg(y.value);
2801                 ::basicBlock->appendInst(psubsb);
2802
2803                 return RValue<SByte8>(V(result));
2804         }
2805
2806         RValue<Short4> UnpackLow(RValue<SByte8> x, RValue<SByte8> y)
2807         {
2808                 int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};   // Real type is v16i8
2809                 return RValue<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
2810         }
2811
2812         RValue<Short4> UnpackHigh(RValue<SByte8> x, RValue<SByte8> y)
2813         {
2814                 int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};   // Real type is v16i8
2815                 auto lowHigh = RValue<Byte16>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
2816                 return As<Short4>(Swizzle(As<Int4>(lowHigh), 0xEE));
2817         }
2818
2819         RValue<Int> SignMask(RValue<SByte8> x)
2820         {
2821                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
2822                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
2823                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
2824                 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
2825                 movmsk->addArg(x.value);
2826                 ::basicBlock->appendInst(movmsk);
2827
2828                 return RValue<Int>(V(result));
2829         }
2830
2831         RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
2832         {
2833                 return RValue<Byte8>(createIntCompare(Ice::InstIcmp::Sgt, x.value, y.value));
2834         }
2835
2836         RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
2837         {
2838                 return RValue<Byte8>(Nucleus::createICmpEQ(x.value, y.value));
2839         }
2840
2841         Type *SByte8::getType()
2842         {
2843                 return T(Type_v8i8);
2844         }
2845
2846         Byte16::Byte16(RValue<Byte16> rhs)
2847         {
2848                 storeValue(rhs.value);
2849         }
2850
2851         Byte16::Byte16(const Byte16 &rhs)
2852         {
2853                 Value *value = rhs.loadValue();
2854                 storeValue(value);
2855         }
2856
2857         Byte16::Byte16(const Reference<Byte16> &rhs)
2858         {
2859                 Value *value = rhs.loadValue();
2860                 storeValue(value);
2861         }
2862
2863         RValue<Byte16> Byte16::operator=(RValue<Byte16> rhs)
2864         {
2865                 storeValue(rhs.value);
2866
2867                 return rhs;
2868         }
2869
2870         RValue<Byte16> Byte16::operator=(const Byte16 &rhs)
2871         {
2872                 Value *value = rhs.loadValue();
2873                 storeValue(value);
2874
2875                 return RValue<Byte16>(value);
2876         }
2877
2878         RValue<Byte16> Byte16::operator=(const Reference<Byte16> &rhs)
2879         {
2880                 Value *value = rhs.loadValue();
2881                 storeValue(value);
2882
2883                 return RValue<Byte16>(value);
2884         }
2885
2886         Type *Byte16::getType()
2887         {
2888                 return T(Ice::IceType_v16i8);
2889         }
2890
2891         Type *SByte16::getType()
2892         {
2893                 return T(Ice::IceType_v16i8);
2894         }
2895
2896         Short2::Short2(RValue<Short4> cast)
2897         {
2898                 storeValue(Nucleus::createBitCast(cast.value, getType()));
2899         }
2900
2901         Type *Short2::getType()
2902         {
2903                 return T(Type_v2i16);
2904         }
2905
2906         UShort2::UShort2(RValue<UShort4> cast)
2907         {
2908                 storeValue(Nucleus::createBitCast(cast.value, getType()));
2909         }
2910
2911         Type *UShort2::getType()
2912         {
2913                 return T(Type_v2i16);
2914         }
2915
2916         Short4::Short4(RValue<Int> cast)
2917         {
2918                 Value *vector = loadValue();
2919                 Value *element = Nucleus::createTrunc(cast.value, Short::getType());
2920                 Value *insert = Nucleus::createInsertElement(vector, element, 0);
2921                 Value *swizzle = Swizzle(RValue<Short4>(insert), 0x00).value;
2922
2923                 storeValue(swizzle);
2924         }
2925
2926         Short4::Short4(RValue<Int4> cast)
2927         {
2928                 int pshufb[16] = {0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13};
2929                 Value *byte16 = Nucleus::createBitCast(cast.value, Byte16::getType());
2930                 Value *packed = Nucleus::createShuffleVector(byte16, byte16, pshufb);
2931
2932                 Value *int2 = RValue<Int2>(Int2(RValue<Int4>(packed))).value;
2933                 Value *short4 = Nucleus::createBitCast(int2, Short4::getType());
2934
2935                 storeValue(short4);
2936         }
2937
2938 //      Short4::Short4(RValue<Float> cast)
2939 //      {
2940 //      }
2941
2942         Short4::Short4(RValue<Float4> cast)
2943         {
2944                 assert(false && "UNIMPLEMENTED");
2945         }
2946
2947         Short4::Short4(short xyzw)
2948         {
2949                 int64_t constantVector[4] = {xyzw, xyzw, xyzw, xyzw};
2950                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
2951         }
2952
2953         Short4::Short4(short x, short y, short z, short w)
2954         {
2955                 int64_t constantVector[4] = {x, y, z, w};
2956                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
2957         }
2958
2959         Short4::Short4(RValue<Short4> rhs)
2960         {
2961                 storeValue(rhs.value);
2962         }
2963
2964         Short4::Short4(const Short4 &rhs)
2965         {
2966                 Value *value = rhs.loadValue();
2967                 storeValue(value);
2968         }
2969
2970         Short4::Short4(const Reference<Short4> &rhs)
2971         {
2972                 Value *value = rhs.loadValue();
2973                 storeValue(value);
2974         }
2975
2976         Short4::Short4(RValue<UShort4> rhs)
2977         {
2978                 storeValue(rhs.value);
2979         }
2980
2981         Short4::Short4(const UShort4 &rhs)
2982         {
2983                 storeValue(rhs.loadValue());
2984         }
2985
2986         Short4::Short4(const Reference<UShort4> &rhs)
2987         {
2988                 storeValue(rhs.loadValue());
2989         }
2990
2991         RValue<Short4> Short4::operator=(RValue<Short4> rhs)
2992         {
2993                 storeValue(rhs.value);
2994
2995                 return rhs;
2996         }
2997
2998         RValue<Short4> Short4::operator=(const Short4 &rhs)
2999         {
3000                 Value *value = rhs.loadValue();
3001                 storeValue(value);
3002
3003                 return RValue<Short4>(value);
3004         }
3005
3006         RValue<Short4> Short4::operator=(const Reference<Short4> &rhs)
3007         {
3008                 Value *value = rhs.loadValue();
3009                 storeValue(value);
3010
3011                 return RValue<Short4>(value);
3012         }
3013
3014         RValue<Short4> Short4::operator=(RValue<UShort4> rhs)
3015         {
3016                 storeValue(rhs.value);
3017
3018                 return RValue<Short4>(rhs);
3019         }
3020
3021         RValue<Short4> Short4::operator=(const UShort4 &rhs)
3022         {
3023                 Value *value = rhs.loadValue();
3024                 storeValue(value);
3025
3026                 return RValue<Short4>(value);
3027         }
3028
3029         RValue<Short4> Short4::operator=(const Reference<UShort4> &rhs)
3030         {
3031                 Value *value = rhs.loadValue();
3032                 storeValue(value);
3033
3034                 return RValue<Short4>(value);
3035         }
3036
3037         RValue<Short4> operator+(RValue<Short4> lhs, RValue<Short4> rhs)
3038         {
3039                 return RValue<Short4>(Nucleus::createAdd(lhs.value, rhs.value));
3040         }
3041
3042         RValue<Short4> operator-(RValue<Short4> lhs, RValue<Short4> rhs)
3043         {
3044                 return RValue<Short4>(Nucleus::createSub(lhs.value, rhs.value));
3045         }
3046
3047         RValue<Short4> operator*(RValue<Short4> lhs, RValue<Short4> rhs)
3048         {
3049                 return RValue<Short4>(Nucleus::createMul(lhs.value, rhs.value));
3050         }
3051
3052 //      RValue<Short4> operator/(RValue<Short4> lhs, RValue<Short4> rhs)
3053 //      {
3054 //              return RValue<Short4>(Nucleus::createSDiv(lhs.value, rhs.value));
3055 //      }
3056
3057 //      RValue<Short4> operator%(RValue<Short4> lhs, RValue<Short4> rhs)
3058 //      {
3059 //              return RValue<Short4>(Nucleus::createSRem(lhs.value, rhs.value));
3060 //      }
3061
3062         RValue<Short4> operator&(RValue<Short4> lhs, RValue<Short4> rhs)
3063         {
3064                 return RValue<Short4>(Nucleus::createAnd(lhs.value, rhs.value));
3065         }
3066
3067         RValue<Short4> operator|(RValue<Short4> lhs, RValue<Short4> rhs)
3068         {
3069                 return RValue<Short4>(Nucleus::createOr(lhs.value, rhs.value));
3070         }
3071
3072         RValue<Short4> operator^(RValue<Short4> lhs, RValue<Short4> rhs)
3073         {
3074                 return RValue<Short4>(Nucleus::createXor(lhs.value, rhs.value));
3075         }
3076
3077         RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
3078         {
3079                 return RValue<Short4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
3080         }
3081
3082         RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
3083         {
3084                 return RValue<Short4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
3085         }
3086
3087         RValue<Short4> operator+=(Short4 &lhs, RValue<Short4> rhs)
3088         {
3089                 return lhs = lhs + rhs;
3090         }
3091
3092         RValue<Short4> operator-=(Short4 &lhs, RValue<Short4> rhs)
3093         {
3094                 return lhs = lhs - rhs;
3095         }
3096
3097         RValue<Short4> operator*=(Short4 &lhs, RValue<Short4> rhs)
3098         {
3099                 return lhs = lhs * rhs;
3100         }
3101
3102 //      RValue<Short4> operator/=(Short4 &lhs, RValue<Short4> rhs)
3103 //      {
3104 //              return lhs = lhs / rhs;
3105 //      }
3106
3107 //      RValue<Short4> operator%=(Short4 &lhs, RValue<Short4> rhs)
3108 //      {
3109 //              return lhs = lhs % rhs;
3110 //      }
3111
3112         RValue<Short4> operator&=(Short4 &lhs, RValue<Short4> rhs)
3113         {
3114                 return lhs = lhs & rhs;
3115         }
3116
3117         RValue<Short4> operator|=(Short4 &lhs, RValue<Short4> rhs)
3118         {
3119                 return lhs = lhs | rhs;
3120         }
3121
3122         RValue<Short4> operator^=(Short4 &lhs, RValue<Short4> rhs)
3123         {
3124                 return lhs = lhs ^ rhs;
3125         }
3126
3127         RValue<Short4> operator<<=(Short4 &lhs, unsigned char rhs)
3128         {
3129                 return lhs = lhs << rhs;
3130         }
3131
3132         RValue<Short4> operator>>=(Short4 &lhs, unsigned char rhs)
3133         {
3134                 return lhs = lhs >> rhs;
3135         }
3136
3137 //      RValue<Short4> operator+(RValue<Short4> val)
3138 //      {
3139 //              return val;
3140 //      }
3141
3142         RValue<Short4> operator-(RValue<Short4> val)
3143         {
3144                 return RValue<Short4>(Nucleus::createNeg(val.value));
3145         }
3146
3147         RValue<Short4> operator~(RValue<Short4> val)
3148         {
3149                 return RValue<Short4>(Nucleus::createNot(val.value));
3150         }
3151
3152         RValue<Short4> RoundShort4(RValue<Float4> cast)
3153         {
3154                 RValue<Int4> int4 = RoundInt(cast);
3155                 return As<Short4>(Pack(int4, int4));
3156         }
3157
3158         RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
3159         {
3160                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
3161                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value, y.value);
3162                 ::basicBlock->appendInst(cmp);
3163
3164                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3165                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3166                 ::basicBlock->appendInst(select);
3167
3168                 return RValue<Short4>(V(result));
3169         }
3170
3171         RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
3172         {
3173                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
3174                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value, y.value);
3175                 ::basicBlock->appendInst(cmp);
3176
3177                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3178                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3179                 ::basicBlock->appendInst(select);
3180
3181                 return RValue<Short4>(V(result));
3182         }
3183
3184         RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
3185         {
3186                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3187                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3188                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3189                 auto paddsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3190                 paddsw->addArg(x.value);
3191                 paddsw->addArg(y.value);
3192                 ::basicBlock->appendInst(paddsw);
3193
3194                 return RValue<Short4>(V(result));
3195         }
3196
3197         RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
3198         {
3199                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3200                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3201                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3202                 auto psubsw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3203                 psubsw->addArg(x.value);
3204                 psubsw->addArg(y.value);
3205                 ::basicBlock->appendInst(psubsw);
3206
3207                 return RValue<Short4>(V(result));
3208         }
3209
3210         RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
3211         {
3212                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3213                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::MultiplyHighSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3214                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3215                 auto pmulhw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3216                 pmulhw->addArg(x.value);
3217                 pmulhw->addArg(y.value);
3218                 ::basicBlock->appendInst(pmulhw);
3219
3220                 return RValue<Short4>(V(result));
3221         }
3222
3223         RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
3224         {
3225                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3226                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::MultiplyAddPairs, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3227                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3228                 auto pmaddwd = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3229                 pmaddwd->addArg(x.value);
3230                 pmaddwd->addArg(y.value);
3231                 ::basicBlock->appendInst(pmaddwd);
3232
3233                 return RValue<Int2>(V(result));
3234         }
3235
3236         RValue<SByte8> Pack(RValue<Short4> x, RValue<Short4> y)
3237         {
3238                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
3239                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3240                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3241                 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3242                 pack->addArg(x.value);
3243                 pack->addArg(y.value);
3244                 ::basicBlock->appendInst(pack);
3245
3246                 return As<SByte8>(Swizzle(As<Int4>(V(result)), 0x88));
3247         }
3248
3249         RValue<Int2> UnpackLow(RValue<Short4> x, RValue<Short4> y)
3250         {
3251                 int shuffle[8] = {0, 8, 1, 9, 2, 10, 3, 11};   // Real type is v8i16
3252                 return RValue<Int2>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
3253         }
3254
3255         RValue<Int2> UnpackHigh(RValue<Short4> x, RValue<Short4> y)
3256         {
3257                 int shuffle[8] = {0, 8, 1, 9, 2, 10, 3, 11};   // Real type is v8i16
3258                 auto lowHigh = RValue<Short8>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
3259                 return As<Int2>(Swizzle(As<Int4>(lowHigh), 0xEE));
3260         }
3261
3262         RValue<Short4> Swizzle(RValue<Short4> x, unsigned char select)
3263         {
3264                 // Real type is v8i16
3265                 int shuffle[8] =
3266                 {
3267                         (select >> 0) & 0x03,
3268                         (select >> 2) & 0x03,
3269                         (select >> 4) & 0x03,
3270                         (select >> 6) & 0x03,
3271                         (select >> 0) & 0x03,
3272                         (select >> 2) & 0x03,
3273                         (select >> 4) & 0x03,
3274                         (select >> 6) & 0x03,
3275                 };
3276
3277                 return RValue<Short4>(Nucleus::createShuffleVector(x.value, x.value, shuffle));
3278         }
3279
3280         RValue<Short4> Insert(RValue<Short4> val, RValue<Short> element, int i)
3281         {
3282                 return RValue<Short4>(Nucleus::createInsertElement(val.value, element.value, i));
3283         }
3284
3285         RValue<Short> Extract(RValue<Short4> val, int i)
3286         {
3287                 return RValue<Short>(Nucleus::createExtractElement(val.value, Short::getType(), i));
3288         }
3289
3290         RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
3291         {
3292                 return RValue<Short4>(createIntCompare(Ice::InstIcmp::Sgt, x.value, y.value));
3293         }
3294
3295         RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
3296         {
3297                 return RValue<Short4>(Nucleus::createICmpEQ(x.value, y.value));
3298         }
3299
3300         Type *Short4::getType()
3301         {
3302                 return T(Type_v4i16);
3303         }
3304
3305         UShort4::UShort4(RValue<Int4> cast)
3306         {
3307                 *this = Short4(cast);
3308         }
3309
3310         UShort4::UShort4(RValue<Float4> cast, bool saturate)
3311         {
3312                 if(saturate)
3313                 {
3314                         if(CPUID::SSE4_1)
3315                         {
3316                                 Int4 int4(Min(cast, Float4(0xFFFF)));   // packusdw takes care of 0x0000 saturation
3317                                 *this = As<Short4>(Pack(As<UInt4>(int4), As<UInt4>(int4)));
3318                         }
3319                         else
3320                         {
3321                                 *this = Short4(Int4(Max(Min(cast, Float4(0xFFFF)), Float4(0x0000))));
3322                         }
3323                 }
3324                 else
3325                 {
3326                         *this = Short4(Int4(cast));
3327                 }
3328         }
3329
3330         UShort4::UShort4(unsigned short xyzw)
3331         {
3332                 int64_t constantVector[4] = {xyzw, xyzw, xyzw, xyzw};
3333                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3334         }
3335
3336         UShort4::UShort4(unsigned short x, unsigned short y, unsigned short z, unsigned short w)
3337         {
3338                 int64_t constantVector[4] = {x, y, z, w};
3339                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3340         }
3341
3342         UShort4::UShort4(RValue<UShort4> rhs)
3343         {
3344                 storeValue(rhs.value);
3345         }
3346
3347         UShort4::UShort4(const UShort4 &rhs)
3348         {
3349                 Value *value = rhs.loadValue();
3350                 storeValue(value);
3351         }
3352
3353         UShort4::UShort4(const Reference<UShort4> &rhs)
3354         {
3355                 Value *value = rhs.loadValue();
3356                 storeValue(value);
3357         }
3358
3359         UShort4::UShort4(RValue<Short4> rhs)
3360         {
3361                 storeValue(rhs.value);
3362         }
3363
3364         UShort4::UShort4(const Short4 &rhs)
3365         {
3366                 Value *value = rhs.loadValue();
3367                 storeValue(value);
3368         }
3369
3370         UShort4::UShort4(const Reference<Short4> &rhs)
3371         {
3372                 Value *value = rhs.loadValue();
3373                 storeValue(value);
3374         }
3375
3376         RValue<UShort4> UShort4::operator=(RValue<UShort4> rhs)
3377         {
3378                 storeValue(rhs.value);
3379
3380                 return rhs;
3381         }
3382
3383         RValue<UShort4> UShort4::operator=(const UShort4 &rhs)
3384         {
3385                 Value *value = rhs.loadValue();
3386                 storeValue(value);
3387
3388                 return RValue<UShort4>(value);
3389         }
3390
3391         RValue<UShort4> UShort4::operator=(const Reference<UShort4> &rhs)
3392         {
3393                 Value *value = rhs.loadValue();
3394                 storeValue(value);
3395
3396                 return RValue<UShort4>(value);
3397         }
3398
3399         RValue<UShort4> UShort4::operator=(RValue<Short4> rhs)
3400         {
3401                 storeValue(rhs.value);
3402
3403                 return RValue<UShort4>(rhs);
3404         }
3405
3406         RValue<UShort4> UShort4::operator=(const Short4 &rhs)
3407         {
3408                 Value *value = rhs.loadValue();
3409                 storeValue(value);
3410
3411                 return RValue<UShort4>(value);
3412         }
3413
3414         RValue<UShort4> UShort4::operator=(const Reference<Short4> &rhs)
3415         {
3416                 Value *value = rhs.loadValue();
3417                 storeValue(value);
3418
3419                 return RValue<UShort4>(value);
3420         }
3421
3422         RValue<UShort4> operator+(RValue<UShort4> lhs, RValue<UShort4> rhs)
3423         {
3424                 return RValue<UShort4>(Nucleus::createAdd(lhs.value, rhs.value));
3425         }
3426
3427         RValue<UShort4> operator-(RValue<UShort4> lhs, RValue<UShort4> rhs)
3428         {
3429                 return RValue<UShort4>(Nucleus::createSub(lhs.value, rhs.value));
3430         }
3431
3432         RValue<UShort4> operator*(RValue<UShort4> lhs, RValue<UShort4> rhs)
3433         {
3434                 return RValue<UShort4>(Nucleus::createMul(lhs.value, rhs.value));
3435         }
3436
3437         RValue<UShort4> operator&(RValue<UShort4> lhs, RValue<UShort4> rhs)
3438         {
3439                 return RValue<UShort4>(Nucleus::createAnd(lhs.value, rhs.value));
3440         }
3441
3442         RValue<UShort4> operator|(RValue<UShort4> lhs, RValue<UShort4> rhs)
3443         {
3444                 return RValue<UShort4>(Nucleus::createOr(lhs.value, rhs.value));
3445         }
3446
3447         RValue<UShort4> operator^(RValue<UShort4> lhs, RValue<UShort4> rhs)
3448         {
3449                 return RValue<UShort4>(Nucleus::createXor(lhs.value, rhs.value));
3450         }
3451
3452         RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
3453         {
3454                 return RValue<UShort4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
3455         }
3456
3457         RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
3458         {
3459                 return RValue<UShort4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
3460         }
3461
3462         RValue<UShort4> operator<<=(UShort4 &lhs, unsigned char rhs)
3463         {
3464                 return lhs = lhs << rhs;
3465         }
3466
3467         RValue<UShort4> operator>>=(UShort4 &lhs, unsigned char rhs)
3468         {
3469                 return lhs = lhs >> rhs;
3470         }
3471
3472         RValue<UShort4> operator~(RValue<UShort4> val)
3473         {
3474                 return RValue<UShort4>(Nucleus::createNot(val.value));
3475         }
3476
3477         RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
3478         {
3479                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
3480                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value, y.value);
3481                 ::basicBlock->appendInst(cmp);
3482
3483                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3484                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3485                 ::basicBlock->appendInst(select);
3486
3487                 return RValue<UShort4>(V(result));
3488         }
3489
3490         RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
3491         {
3492                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v8i1);
3493                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value, y.value);
3494                 ::basicBlock->appendInst(cmp);
3495
3496                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3497                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
3498                 ::basicBlock->appendInst(select);
3499
3500                 return RValue<UShort4>(V(result));
3501         }
3502
3503         RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
3504         {
3505                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3506                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::AddSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3507                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3508                 auto paddusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3509                 paddusw->addArg(x.value);
3510                 paddusw->addArg(y.value);
3511                 ::basicBlock->appendInst(paddusw);
3512
3513                 return RValue<UShort4>(V(result));
3514         }
3515
3516         RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
3517         {
3518                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3519                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SubtractSaturateUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3520                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3521                 auto psubusw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3522                 psubusw->addArg(x.value);
3523                 psubusw->addArg(y.value);
3524                 ::basicBlock->appendInst(psubusw);
3525
3526                 return RValue<UShort4>(V(result));
3527         }
3528
3529         RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
3530         {
3531                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
3532                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::MultiplyHighUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3533                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3534                 auto pmulhuw = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3535                 pmulhuw->addArg(x.value);
3536                 pmulhuw->addArg(y.value);
3537                 ::basicBlock->appendInst(pmulhuw);
3538
3539                 return RValue<UShort4>(V(result));
3540         }
3541
3542         RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
3543         {
3544                 assert(false && "UNIMPLEMENTED"); return RValue<UShort4>(V(nullptr));
3545         }
3546
3547         RValue<Byte8> Pack(RValue<UShort4> x, RValue<UShort4> y)
3548         {
3549                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v16i8);
3550                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
3551                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
3552                 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
3553                 pack->addArg(x.value);
3554                 pack->addArg(y.value);
3555                 ::basicBlock->appendInst(pack);
3556
3557                 return As<Byte8>(Swizzle(As<Int4>(V(result)), 0x88));
3558         }
3559
3560         Type *UShort4::getType()
3561         {
3562                 return T(Type_v4i16);
3563         }
3564
3565         Short8::Short8(short c)
3566         {
3567                 int64_t constantVector[8] = {c, c, c, c, c, c, c, c};
3568                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3569         }
3570
3571         Short8::Short8(short c0, short c1, short c2, short c3, short c4, short c5, short c6, short c7)
3572         {
3573                 int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7};
3574                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3575         }
3576
3577         Short8::Short8(RValue<Short8> rhs)
3578         {
3579                 storeValue(rhs.value);
3580         }
3581
3582         Short8::Short8(const Reference<Short8> &rhs)
3583         {
3584                 Value *value = rhs.loadValue();
3585                 storeValue(value);
3586         }
3587
3588         Short8::Short8(RValue<Short4> lo, RValue<Short4> hi)
3589         {
3590                 int shuffle[8] = {0, 1, 2, 3, 8, 9, 10, 11};   // Real type is v8i16
3591                 Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
3592
3593                 storeValue(packed);
3594         }
3595
3596         RValue<Short8> operator+(RValue<Short8> lhs, RValue<Short8> rhs)
3597         {
3598                 return RValue<Short8>(Nucleus::createAdd(lhs.value, rhs.value));
3599         }
3600
3601         RValue<Short8> operator&(RValue<Short8> lhs, RValue<Short8> rhs)
3602         {
3603                 return RValue<Short8>(Nucleus::createAnd(lhs.value, rhs.value));
3604         }
3605
3606         RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
3607         {
3608                 return RValue<Short8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
3609         }
3610
3611         RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
3612         {
3613                 return RValue<Short8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
3614         }
3615
3616         RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
3617         {
3618                 assert(false && "UNIMPLEMENTED"); return RValue<Int4>(V(nullptr));
3619         }
3620
3621         RValue<Int4> Abs(RValue<Int4> x)
3622         {
3623                 auto negative = x >> 31;
3624                 return (x ^ negative) - negative;
3625         }
3626
3627         RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
3628         {
3629                 assert(false && "UNIMPLEMENTED"); return RValue<Short8>(V(nullptr));
3630         }
3631
3632         Type *Short8::getType()
3633         {
3634                 return T(Ice::IceType_v8i16);
3635         }
3636
3637         UShort8::UShort8(unsigned short c)
3638         {
3639                 int64_t constantVector[8] = {c, c, c, c, c, c, c, c};
3640                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3641         }
3642
3643         UShort8::UShort8(unsigned short c0, unsigned short c1, unsigned short c2, unsigned short c3, unsigned short c4, unsigned short c5, unsigned short c6, unsigned short c7)
3644         {
3645                 int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7};
3646                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3647         }
3648
3649         UShort8::UShort8(RValue<UShort8> rhs)
3650         {
3651                 storeValue(rhs.value);
3652         }
3653
3654         UShort8::UShort8(const Reference<UShort8> &rhs)
3655         {
3656                 Value *value = rhs.loadValue();
3657                 storeValue(value);
3658         }
3659
3660         UShort8::UShort8(RValue<UShort4> lo, RValue<UShort4> hi)
3661         {
3662                 int shuffle[8] = {0, 1, 2, 3, 8, 9, 10, 11};   // Real type is v8i16
3663                 Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
3664
3665                 storeValue(packed);
3666         }
3667
3668         RValue<UShort8> UShort8::operator=(RValue<UShort8> rhs)
3669         {
3670                 storeValue(rhs.value);
3671
3672                 return rhs;
3673         }
3674
3675         RValue<UShort8> UShort8::operator=(const UShort8 &rhs)
3676         {
3677                 Value *value = rhs.loadValue();
3678                 storeValue(value);
3679
3680                 return RValue<UShort8>(value);
3681         }
3682
3683         RValue<UShort8> UShort8::operator=(const Reference<UShort8> &rhs)
3684         {
3685                 Value *value = rhs.loadValue();
3686                 storeValue(value);
3687
3688                 return RValue<UShort8>(value);
3689         }
3690
3691         RValue<UShort8> operator&(RValue<UShort8> lhs, RValue<UShort8> rhs)
3692         {
3693                 return RValue<UShort8>(Nucleus::createAnd(lhs.value, rhs.value));
3694         }
3695
3696         RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
3697         {
3698                 return RValue<UShort8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
3699         }
3700
3701         RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
3702         {
3703                 return RValue<UShort8>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
3704         }
3705
3706         RValue<UShort8> operator+(RValue<UShort8> lhs, RValue<UShort8> rhs)
3707         {
3708                 return RValue<UShort8>(Nucleus::createAdd(lhs.value, rhs.value));
3709         }
3710
3711         RValue<UShort8> operator*(RValue<UShort8> lhs, RValue<UShort8> rhs)
3712         {
3713                 return RValue<UShort8>(Nucleus::createMul(lhs.value, rhs.value));
3714         }
3715
3716         RValue<UShort8> operator+=(UShort8 &lhs, RValue<UShort8> rhs)
3717         {
3718                 return lhs = lhs + rhs;
3719         }
3720
3721         RValue<UShort8> operator~(RValue<UShort8> val)
3722         {
3723                 return RValue<UShort8>(Nucleus::createNot(val.value));
3724         }
3725
3726         RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7)
3727         {
3728                 assert(false && "UNIMPLEMENTED"); return RValue<UShort8>(V(nullptr));
3729         }
3730
3731         RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
3732         {
3733                 assert(false && "UNIMPLEMENTED"); return RValue<UShort8>(V(nullptr));
3734         }
3735
3736         // FIXME: Implement as Shuffle(x, y, Select(i0, ..., i16)) and Shuffle(x, y, SELECT_PACK_REPEAT(element))
3737 //      RValue<UShort8> PackRepeat(RValue<Byte16> x, RValue<Byte16> y, int element)
3738 //      {
3739 //              assert(false && "UNIMPLEMENTED"); return RValue<UShort8>(V(nullptr));
3740 //      }
3741
3742         Type *UShort8::getType()
3743         {
3744                 return T(Ice::IceType_v8i16);
3745         }
3746
3747         Int::Int(Argument<Int> argument)
3748         {
3749                 storeValue(argument.value);
3750         }
3751
3752         Int::Int(RValue<Byte> cast)
3753         {
3754                 Value *integer = Nucleus::createZExt(cast.value, Int::getType());
3755
3756                 storeValue(integer);
3757         }
3758
3759         Int::Int(RValue<SByte> cast)
3760         {
3761                 Value *integer = Nucleus::createSExt(cast.value, Int::getType());
3762
3763                 storeValue(integer);
3764         }
3765
3766         Int::Int(RValue<Short> cast)
3767         {
3768                 Value *integer = Nucleus::createSExt(cast.value, Int::getType());
3769
3770                 storeValue(integer);
3771         }
3772
3773         Int::Int(RValue<UShort> cast)
3774         {
3775                 Value *integer = Nucleus::createZExt(cast.value, Int::getType());
3776
3777                 storeValue(integer);
3778         }
3779
3780         Int::Int(RValue<Int2> cast)
3781         {
3782                 *this = Extract(cast, 0);
3783         }
3784
3785         Int::Int(RValue<Long> cast)
3786         {
3787                 Value *integer = Nucleus::createTrunc(cast.value, Int::getType());
3788
3789                 storeValue(integer);
3790         }
3791
3792         Int::Int(RValue<Float> cast)
3793         {
3794                 Value *integer = Nucleus::createFPToSI(cast.value, Int::getType());
3795
3796                 storeValue(integer);
3797         }
3798
3799         Int::Int(int x)
3800         {
3801                 storeValue(Nucleus::createConstantInt(x));
3802         }
3803
3804         Int::Int(RValue<Int> rhs)
3805         {
3806                 storeValue(rhs.value);
3807         }
3808
3809         Int::Int(RValue<UInt> rhs)
3810         {
3811                 storeValue(rhs.value);
3812         }
3813
3814         Int::Int(const Int &rhs)
3815         {
3816                 Value *value = rhs.loadValue();
3817                 storeValue(value);
3818         }
3819
3820         Int::Int(const Reference<Int> &rhs)
3821         {
3822                 Value *value = rhs.loadValue();
3823                 storeValue(value);
3824         }
3825
3826         Int::Int(const UInt &rhs)
3827         {
3828                 Value *value = rhs.loadValue();
3829                 storeValue(value);
3830         }
3831
3832         Int::Int(const Reference<UInt> &rhs)
3833         {
3834                 Value *value = rhs.loadValue();
3835                 storeValue(value);
3836         }
3837
3838         RValue<Int> Int::operator=(int rhs)
3839         {
3840                 return RValue<Int>(storeValue(Nucleus::createConstantInt(rhs)));
3841         }
3842
3843         RValue<Int> Int::operator=(RValue<Int> rhs)
3844         {
3845                 storeValue(rhs.value);
3846
3847                 return rhs;
3848         }
3849
3850         RValue<Int> Int::operator=(RValue<UInt> rhs)
3851         {
3852                 storeValue(rhs.value);
3853
3854                 return RValue<Int>(rhs);
3855         }
3856
3857         RValue<Int> Int::operator=(const Int &rhs)
3858         {
3859                 Value *value = rhs.loadValue();
3860                 storeValue(value);
3861
3862                 return RValue<Int>(value);
3863         }
3864
3865         RValue<Int> Int::operator=(const Reference<Int> &rhs)
3866         {
3867                 Value *value = rhs.loadValue();
3868                 storeValue(value);
3869
3870                 return RValue<Int>(value);
3871         }
3872
3873         RValue<Int> Int::operator=(const UInt &rhs)
3874         {
3875                 Value *value = rhs.loadValue();
3876                 storeValue(value);
3877
3878                 return RValue<Int>(value);
3879         }
3880
3881         RValue<Int> Int::operator=(const Reference<UInt> &rhs)
3882         {
3883                 Value *value = rhs.loadValue();
3884                 storeValue(value);
3885
3886                 return RValue<Int>(value);
3887         }
3888
3889         RValue<Int> operator+(RValue<Int> lhs, RValue<Int> rhs)
3890         {
3891                 return RValue<Int>(Nucleus::createAdd(lhs.value, rhs.value));
3892         }
3893
3894         RValue<Int> operator-(RValue<Int> lhs, RValue<Int> rhs)
3895         {
3896                 return RValue<Int>(Nucleus::createSub(lhs.value, rhs.value));
3897         }
3898
3899         RValue<Int> operator*(RValue<Int> lhs, RValue<Int> rhs)
3900         {
3901                 return RValue<Int>(Nucleus::createMul(lhs.value, rhs.value));
3902         }
3903
3904         RValue<Int> operator/(RValue<Int> lhs, RValue<Int> rhs)
3905         {
3906                 return RValue<Int>(Nucleus::createSDiv(lhs.value, rhs.value));
3907         }
3908
3909         RValue<Int> operator%(RValue<Int> lhs, RValue<Int> rhs)
3910         {
3911                 return RValue<Int>(Nucleus::createSRem(lhs.value, rhs.value));
3912         }
3913
3914         RValue<Int> operator&(RValue<Int> lhs, RValue<Int> rhs)
3915         {
3916                 return RValue<Int>(Nucleus::createAnd(lhs.value, rhs.value));
3917         }
3918
3919         RValue<Int> operator|(RValue<Int> lhs, RValue<Int> rhs)
3920         {
3921                 return RValue<Int>(Nucleus::createOr(lhs.value, rhs.value));
3922         }
3923
3924         RValue<Int> operator^(RValue<Int> lhs, RValue<Int> rhs)
3925         {
3926                 return RValue<Int>(Nucleus::createXor(lhs.value, rhs.value));
3927         }
3928
3929         RValue<Int> operator<<(RValue<Int> lhs, RValue<Int> rhs)
3930         {
3931                 return RValue<Int>(Nucleus::createShl(lhs.value, rhs.value));
3932         }
3933
3934         RValue<Int> operator>>(RValue<Int> lhs, RValue<Int> rhs)
3935         {
3936                 return RValue<Int>(Nucleus::createAShr(lhs.value, rhs.value));
3937         }
3938
3939         RValue<Int> operator+=(Int &lhs, RValue<Int> rhs)
3940         {
3941                 return lhs = lhs + rhs;
3942         }
3943
3944         RValue<Int> operator-=(Int &lhs, RValue<Int> rhs)
3945         {
3946                 return lhs = lhs - rhs;
3947         }
3948
3949         RValue<Int> operator*=(Int &lhs, RValue<Int> rhs)
3950         {
3951                 return lhs = lhs * rhs;
3952         }
3953
3954         RValue<Int> operator/=(Int &lhs, RValue<Int> rhs)
3955         {
3956                 return lhs = lhs / rhs;
3957         }
3958
3959         RValue<Int> operator%=(Int &lhs, RValue<Int> rhs)
3960         {
3961                 return lhs = lhs % rhs;
3962         }
3963
3964         RValue<Int> operator&=(Int &lhs, RValue<Int> rhs)
3965         {
3966                 return lhs = lhs & rhs;
3967         }
3968
3969         RValue<Int> operator|=(Int &lhs, RValue<Int> rhs)
3970         {
3971                 return lhs = lhs | rhs;
3972         }
3973
3974         RValue<Int> operator^=(Int &lhs, RValue<Int> rhs)
3975         {
3976                 return lhs = lhs ^ rhs;
3977         }
3978
3979         RValue<Int> operator<<=(Int &lhs, RValue<Int> rhs)
3980         {
3981                 return lhs = lhs << rhs;
3982         }
3983
3984         RValue<Int> operator>>=(Int &lhs, RValue<Int> rhs)
3985         {
3986                 return lhs = lhs >> rhs;
3987         }
3988
3989         RValue<Int> operator+(RValue<Int> val)
3990         {
3991                 return val;
3992         }
3993
3994         RValue<Int> operator-(RValue<Int> val)
3995         {
3996                 return RValue<Int>(Nucleus::createNeg(val.value));
3997         }
3998
3999         RValue<Int> operator~(RValue<Int> val)
4000         {
4001                 return RValue<Int>(Nucleus::createNot(val.value));
4002         }
4003
4004         RValue<Int> operator++(Int &val, int)   // Post-increment
4005         {
4006                 RValue<Int> res = val;
4007                 val += 1;
4008                 return res;
4009         }
4010
4011         const Int &operator++(Int &val)   // Pre-increment
4012         {
4013                 val += 1;
4014                 return val;
4015         }
4016
4017         RValue<Int> operator--(Int &val, int)   // Post-decrement
4018         {
4019                 RValue<Int> res = val;
4020                 val -= 1;
4021                 return res;
4022         }
4023
4024         const Int &operator--(Int &val)   // Pre-decrement
4025         {
4026                 val -= 1;
4027                 return val;
4028         }
4029
4030         RValue<Bool> operator<(RValue<Int> lhs, RValue<Int> rhs)
4031         {
4032                 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
4033         }
4034
4035         RValue<Bool> operator<=(RValue<Int> lhs, RValue<Int> rhs)
4036         {
4037                 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
4038         }
4039
4040         RValue<Bool> operator>(RValue<Int> lhs, RValue<Int> rhs)
4041         {
4042                 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
4043         }
4044
4045         RValue<Bool> operator>=(RValue<Int> lhs, RValue<Int> rhs)
4046         {
4047                 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
4048         }
4049
4050         RValue<Bool> operator!=(RValue<Int> lhs, RValue<Int> rhs)
4051         {
4052                 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
4053         }
4054
4055         RValue<Bool> operator==(RValue<Int> lhs, RValue<Int> rhs)
4056         {
4057                 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
4058         }
4059
4060         RValue<Int> Max(RValue<Int> x, RValue<Int> y)
4061         {
4062                 return IfThenElse(x > y, x, y);
4063         }
4064
4065         RValue<Int> Min(RValue<Int> x, RValue<Int> y)
4066         {
4067                 return IfThenElse(x < y, x, y);
4068         }
4069
4070         RValue<Int> Clamp(RValue<Int> x, RValue<Int> min, RValue<Int> max)
4071         {
4072                 return Min(Max(x, min), max);
4073         }
4074
4075         RValue<Int> RoundInt(RValue<Float> cast)
4076         {
4077                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
4078                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
4079                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
4080                 auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
4081                 nearbyint->addArg(cast.value);
4082                 ::basicBlock->appendInst(nearbyint);
4083
4084                 return RValue<Int>(V(result));
4085         }
4086
4087         Type *Int::getType()
4088         {
4089                 return T(Ice::IceType_i32);
4090         }
4091
4092         Long::Long(RValue<Int> cast)
4093         {
4094                 Value *integer = Nucleus::createSExt(cast.value, Long::getType());
4095
4096                 storeValue(integer);
4097         }
4098
4099         Long::Long(RValue<UInt> cast)
4100         {
4101                 Value *integer = Nucleus::createZExt(cast.value, Long::getType());
4102
4103                 storeValue(integer);
4104         }
4105
4106         Long::Long(RValue<Long> rhs)
4107         {
4108                 storeValue(rhs.value);
4109         }
4110
4111         RValue<Long> Long::operator=(int64_t rhs)
4112         {
4113                 return RValue<Long>(storeValue(Nucleus::createConstantLong(rhs)));
4114         }
4115
4116         RValue<Long> Long::operator=(RValue<Long> rhs)
4117         {
4118                 storeValue(rhs.value);
4119
4120                 return rhs;
4121         }
4122
4123         RValue<Long> Long::operator=(const Long &rhs)
4124         {
4125                 Value *value = rhs.loadValue();
4126                 storeValue(value);
4127
4128                 return RValue<Long>(value);
4129         }
4130
4131         RValue<Long> Long::operator=(const Reference<Long> &rhs)
4132         {
4133                 Value *value = rhs.loadValue();
4134                 storeValue(value);
4135
4136                 return RValue<Long>(value);
4137         }
4138
4139         RValue<Long> operator+(RValue<Long> lhs, RValue<Long> rhs)
4140         {
4141                 return RValue<Long>(Nucleus::createAdd(lhs.value, rhs.value));
4142         }
4143
4144         RValue<Long> operator-(RValue<Long> lhs, RValue<Long> rhs)
4145         {
4146                 return RValue<Long>(Nucleus::createSub(lhs.value, rhs.value));
4147         }
4148
4149         RValue<Long> operator+=(Long &lhs, RValue<Long> rhs)
4150         {
4151                 return lhs = lhs + rhs;
4152         }
4153
4154         RValue<Long> operator-=(Long &lhs, RValue<Long> rhs)
4155         {
4156                 return lhs = lhs - rhs;
4157         }
4158
4159         RValue<Long> AddAtomic(RValue<Pointer<Long> > x, RValue<Long> y)
4160         {
4161                 return RValue<Long>(Nucleus::createAtomicAdd(x.value, y.value));
4162         }
4163
4164         Type *Long::getType()
4165         {
4166                 return T(Ice::IceType_i64);
4167         }
4168
4169         UInt::UInt(Argument<UInt> argument)
4170         {
4171                 storeValue(argument.value);
4172         }
4173
4174         UInt::UInt(RValue<UShort> cast)
4175         {
4176                 Value *integer = Nucleus::createZExt(cast.value, UInt::getType());
4177
4178                 storeValue(integer);
4179         }
4180
4181         UInt::UInt(RValue<Long> cast)
4182         {
4183                 Value *integer = Nucleus::createTrunc(cast.value, UInt::getType());
4184
4185                 storeValue(integer);
4186         }
4187
4188         UInt::UInt(RValue<Float> cast)
4189         {
4190                 // Smallest positive value representable in UInt, but not in Int
4191                 const unsigned int ustart = 0x80000000u;
4192                 const float ustartf = float(ustart);
4193
4194                 // If the value is negative, store 0, otherwise store the result of the conversion
4195                 storeValue((~(As<Int>(cast) >> 31) &
4196                 // Check if the value can be represented as an Int
4197                         IfThenElse(cast >= ustartf,
4198                 // If the value is too large, subtract ustart and re-add it after conversion.
4199                                 As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)),
4200                 // Otherwise, just convert normally
4201                                 Int(cast))).value);
4202         }
4203
4204         UInt::UInt(int x)
4205         {
4206                 storeValue(Nucleus::createConstantInt(x));
4207         }
4208
4209         UInt::UInt(unsigned int x)
4210         {
4211                 storeValue(Nucleus::createConstantInt(x));
4212         }
4213
4214         UInt::UInt(RValue<UInt> rhs)
4215         {
4216                 storeValue(rhs.value);
4217         }
4218
4219         UInt::UInt(RValue<Int> rhs)
4220         {
4221                 storeValue(rhs.value);
4222         }
4223
4224         UInt::UInt(const UInt &rhs)
4225         {
4226                 Value *value = rhs.loadValue();
4227                 storeValue(value);
4228         }
4229
4230         UInt::UInt(const Reference<UInt> &rhs)
4231         {
4232                 Value *value = rhs.loadValue();
4233                 storeValue(value);
4234         }
4235
4236         UInt::UInt(const Int &rhs)
4237         {
4238                 Value *value = rhs.loadValue();
4239                 storeValue(value);
4240         }
4241
4242         UInt::UInt(const Reference<Int> &rhs)
4243         {
4244                 Value *value = rhs.loadValue();
4245                 storeValue(value);
4246         }
4247
4248         RValue<UInt> UInt::operator=(unsigned int rhs)
4249         {
4250                 return RValue<UInt>(storeValue(Nucleus::createConstantInt(rhs)));
4251         }
4252
4253         RValue<UInt> UInt::operator=(RValue<UInt> rhs)
4254         {
4255                 storeValue(rhs.value);
4256
4257                 return rhs;
4258         }
4259
4260         RValue<UInt> UInt::operator=(RValue<Int> rhs)
4261         {
4262                 storeValue(rhs.value);
4263
4264                 return RValue<UInt>(rhs);
4265         }
4266
4267         RValue<UInt> UInt::operator=(const UInt &rhs)
4268         {
4269                 Value *value = rhs.loadValue();
4270                 storeValue(value);
4271
4272                 return RValue<UInt>(value);
4273         }
4274
4275         RValue<UInt> UInt::operator=(const Reference<UInt> &rhs)
4276         {
4277                 Value *value = rhs.loadValue();
4278                 storeValue(value);
4279
4280                 return RValue<UInt>(value);
4281         }
4282
4283         RValue<UInt> UInt::operator=(const Int &rhs)
4284         {
4285                 Value *value = rhs.loadValue();
4286                 storeValue(value);
4287
4288                 return RValue<UInt>(value);
4289         }
4290
4291         RValue<UInt> UInt::operator=(const Reference<Int> &rhs)
4292         {
4293                 Value *value = rhs.loadValue();
4294                 storeValue(value);
4295
4296                 return RValue<UInt>(value);
4297         }
4298
4299         RValue<UInt> operator+(RValue<UInt> lhs, RValue<UInt> rhs)
4300         {
4301                 return RValue<UInt>(Nucleus::createAdd(lhs.value, rhs.value));
4302         }
4303
4304         RValue<UInt> operator-(RValue<UInt> lhs, RValue<UInt> rhs)
4305         {
4306                 return RValue<UInt>(Nucleus::createSub(lhs.value, rhs.value));
4307         }
4308
4309         RValue<UInt> operator*(RValue<UInt> lhs, RValue<UInt> rhs)
4310         {
4311                 return RValue<UInt>(Nucleus::createMul(lhs.value, rhs.value));
4312         }
4313
4314         RValue<UInt> operator/(RValue<UInt> lhs, RValue<UInt> rhs)
4315         {
4316                 return RValue<UInt>(Nucleus::createUDiv(lhs.value, rhs.value));
4317         }
4318
4319         RValue<UInt> operator%(RValue<UInt> lhs, RValue<UInt> rhs)
4320         {
4321                 return RValue<UInt>(Nucleus::createURem(lhs.value, rhs.value));
4322         }
4323
4324         RValue<UInt> operator&(RValue<UInt> lhs, RValue<UInt> rhs)
4325         {
4326                 return RValue<UInt>(Nucleus::createAnd(lhs.value, rhs.value));
4327         }
4328
4329         RValue<UInt> operator|(RValue<UInt> lhs, RValue<UInt> rhs)
4330         {
4331                 return RValue<UInt>(Nucleus::createOr(lhs.value, rhs.value));
4332         }
4333
4334         RValue<UInt> operator^(RValue<UInt> lhs, RValue<UInt> rhs)
4335         {
4336                 return RValue<UInt>(Nucleus::createXor(lhs.value, rhs.value));
4337         }
4338
4339         RValue<UInt> operator<<(RValue<UInt> lhs, RValue<UInt> rhs)
4340         {
4341                 return RValue<UInt>(Nucleus::createShl(lhs.value, rhs.value));
4342         }
4343
4344         RValue<UInt> operator>>(RValue<UInt> lhs, RValue<UInt> rhs)
4345         {
4346                 return RValue<UInt>(Nucleus::createLShr(lhs.value, rhs.value));
4347         }
4348
4349         RValue<UInt> operator+=(UInt &lhs, RValue<UInt> rhs)
4350         {
4351                 return lhs = lhs + rhs;
4352         }
4353
4354         RValue<UInt> operator-=(UInt &lhs, RValue<UInt> rhs)
4355         {
4356                 return lhs = lhs - rhs;
4357         }
4358
4359         RValue<UInt> operator*=(UInt &lhs, RValue<UInt> rhs)
4360         {
4361                 return lhs = lhs * rhs;
4362         }
4363
4364         RValue<UInt> operator/=(UInt &lhs, RValue<UInt> rhs)
4365         {
4366                 return lhs = lhs / rhs;
4367         }
4368
4369         RValue<UInt> operator%=(UInt &lhs, RValue<UInt> rhs)
4370         {
4371                 return lhs = lhs % rhs;
4372         }
4373
4374         RValue<UInt> operator&=(UInt &lhs, RValue<UInt> rhs)
4375         {
4376                 return lhs = lhs & rhs;
4377         }
4378
4379         RValue<UInt> operator|=(UInt &lhs, RValue<UInt> rhs)
4380         {
4381                 return lhs = lhs | rhs;
4382         }
4383
4384         RValue<UInt> operator^=(UInt &lhs, RValue<UInt> rhs)
4385         {
4386                 return lhs = lhs ^ rhs;
4387         }
4388
4389         RValue<UInt> operator<<=(UInt &lhs, RValue<UInt> rhs)
4390         {
4391                 return lhs = lhs << rhs;
4392         }
4393
4394         RValue<UInt> operator>>=(UInt &lhs, RValue<UInt> rhs)
4395         {
4396                 return lhs = lhs >> rhs;
4397         }
4398
4399         RValue<UInt> operator+(RValue<UInt> val)
4400         {
4401                 return val;
4402         }
4403
4404         RValue<UInt> operator-(RValue<UInt> val)
4405         {
4406                 return RValue<UInt>(Nucleus::createNeg(val.value));
4407         }
4408
4409         RValue<UInt> operator~(RValue<UInt> val)
4410         {
4411                 return RValue<UInt>(Nucleus::createNot(val.value));
4412         }
4413
4414         RValue<UInt> operator++(UInt &val, int)   // Post-increment
4415         {
4416                 RValue<UInt> res = val;
4417                 val += 1;
4418                 return res;
4419         }
4420
4421         const UInt &operator++(UInt &val)   // Pre-increment
4422         {
4423                 val += 1;
4424                 return val;
4425         }
4426
4427         RValue<UInt> operator--(UInt &val, int)   // Post-decrement
4428         {
4429                 RValue<UInt> res = val;
4430                 val -= 1;
4431                 return res;
4432         }
4433
4434         const UInt &operator--(UInt &val)   // Pre-decrement
4435         {
4436                 val -= 1;
4437                 return val;
4438         }
4439
4440         RValue<UInt> Max(RValue<UInt> x, RValue<UInt> y)
4441         {
4442                 return IfThenElse(x > y, x, y);
4443         }
4444
4445         RValue<UInt> Min(RValue<UInt> x, RValue<UInt> y)
4446         {
4447                 return IfThenElse(x < y, x, y);
4448         }
4449
4450         RValue<UInt> Clamp(RValue<UInt> x, RValue<UInt> min, RValue<UInt> max)
4451         {
4452                 return Min(Max(x, min), max);
4453         }
4454
4455         RValue<Bool> operator<(RValue<UInt> lhs, RValue<UInt> rhs)
4456         {
4457                 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
4458         }
4459
4460         RValue<Bool> operator<=(RValue<UInt> lhs, RValue<UInt> rhs)
4461         {
4462                 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
4463         }
4464
4465         RValue<Bool> operator>(RValue<UInt> lhs, RValue<UInt> rhs)
4466         {
4467                 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
4468         }
4469
4470         RValue<Bool> operator>=(RValue<UInt> lhs, RValue<UInt> rhs)
4471         {
4472                 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
4473         }
4474
4475         RValue<Bool> operator!=(RValue<UInt> lhs, RValue<UInt> rhs)
4476         {
4477                 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
4478         }
4479
4480         RValue<Bool> operator==(RValue<UInt> lhs, RValue<UInt> rhs)
4481         {
4482                 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
4483         }
4484
4485 //      RValue<UInt> RoundUInt(RValue<Float> cast)
4486 //      {
4487 //              assert(false && "UNIMPLEMENTED"); return RValue<UInt>(V(nullptr));
4488 //      }
4489
4490         Type *UInt::getType()
4491         {
4492                 return T(Ice::IceType_i32);
4493         }
4494
4495 //      Int2::Int2(RValue<Int> cast)
4496 //      {
4497 //              Value *extend = Nucleus::createZExt(cast.value, Long::getType());
4498 //              Value *vector = Nucleus::createBitCast(extend, Int2::getType());
4499 //
4500 //              Constant *shuffle[2];
4501 //              shuffle[0] = Nucleus::createConstantInt(0);
4502 //              shuffle[1] = Nucleus::createConstantInt(0);
4503 //
4504 //              Value *replicate = Nucleus::createShuffleVector(vector, UndefValue::get(Int2::getType()), Nucleus::createConstantVector(shuffle, 2));
4505 //
4506 //              storeValue(replicate);
4507 //      }
4508
4509         Int2::Int2(RValue<Int4> cast)
4510         {
4511                 storeValue(Nucleus::createBitCast(cast.value, getType()));
4512         }
4513
4514         Int2::Int2(int x, int y)
4515         {
4516                 int64_t constantVector[2] = {x, y};
4517                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
4518         }
4519
4520         Int2::Int2(RValue<Int2> rhs)
4521         {
4522                 storeValue(rhs.value);
4523         }
4524
4525         Int2::Int2(const Int2 &rhs)
4526         {
4527                 Value *value = rhs.loadValue();
4528                 storeValue(value);
4529         }
4530
4531         Int2::Int2(const Reference<Int2> &rhs)
4532         {
4533                 Value *value = rhs.loadValue();
4534                 storeValue(value);
4535         }
4536
4537         Int2::Int2(RValue<Int> lo, RValue<Int> hi)
4538         {
4539                 int shuffle[4] = {0, 4, 1, 5};
4540                 Value *packed = Nucleus::createShuffleVector(Int4(lo).loadValue(), Int4(hi).loadValue(), shuffle);
4541
4542                 storeValue(Nucleus::createBitCast(packed, Int2::getType()));
4543         }
4544
4545         RValue<Int2> Int2::operator=(RValue<Int2> rhs)
4546         {
4547                 storeValue(rhs.value);
4548
4549                 return rhs;
4550         }
4551
4552         RValue<Int2> Int2::operator=(const Int2 &rhs)
4553         {
4554                 Value *value = rhs.loadValue();
4555                 storeValue(value);
4556
4557                 return RValue<Int2>(value);
4558         }
4559
4560         RValue<Int2> Int2::operator=(const Reference<Int2> &rhs)
4561         {
4562                 Value *value = rhs.loadValue();
4563                 storeValue(value);
4564
4565                 return RValue<Int2>(value);
4566         }
4567
4568         RValue<Int2> operator+(RValue<Int2> lhs, RValue<Int2> rhs)
4569         {
4570                 return RValue<Int2>(Nucleus::createAdd(lhs.value, rhs.value));
4571         }
4572
4573         RValue<Int2> operator-(RValue<Int2> lhs, RValue<Int2> rhs)
4574         {
4575                 return RValue<Int2>(Nucleus::createSub(lhs.value, rhs.value));
4576         }
4577
4578 //      RValue<Int2> operator*(RValue<Int2> lhs, RValue<Int2> rhs)
4579 //      {
4580 //              return RValue<Int2>(Nucleus::createMul(lhs.value, rhs.value));
4581 //      }
4582
4583 //      RValue<Int2> operator/(RValue<Int2> lhs, RValue<Int2> rhs)
4584 //      {
4585 //              return RValue<Int2>(Nucleus::createSDiv(lhs.value, rhs.value));
4586 //      }
4587
4588 //      RValue<Int2> operator%(RValue<Int2> lhs, RValue<Int2> rhs)
4589 //      {
4590 //              return RValue<Int2>(Nucleus::createSRem(lhs.value, rhs.value));
4591 //      }
4592
4593         RValue<Int2> operator&(RValue<Int2> lhs, RValue<Int2> rhs)
4594         {
4595                 return RValue<Int2>(Nucleus::createAnd(lhs.value, rhs.value));
4596         }
4597
4598         RValue<Int2> operator|(RValue<Int2> lhs, RValue<Int2> rhs)
4599         {
4600                 return RValue<Int2>(Nucleus::createOr(lhs.value, rhs.value));
4601         }
4602
4603         RValue<Int2> operator^(RValue<Int2> lhs, RValue<Int2> rhs)
4604         {
4605                 return RValue<Int2>(Nucleus::createXor(lhs.value, rhs.value));
4606         }
4607
4608         RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
4609         {
4610                 return RValue<Int2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
4611         }
4612
4613         RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
4614         {
4615                 return RValue<Int2>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
4616         }
4617
4618         RValue<Int2> operator+=(Int2 &lhs, RValue<Int2> rhs)
4619         {
4620                 return lhs = lhs + rhs;
4621         }
4622
4623         RValue<Int2> operator-=(Int2 &lhs, RValue<Int2> rhs)
4624         {
4625                 return lhs = lhs - rhs;
4626         }
4627
4628 //      RValue<Int2> operator*=(Int2 &lhs, RValue<Int2> rhs)
4629 //      {
4630 //              return lhs = lhs * rhs;
4631 //      }
4632
4633 //      RValue<Int2> operator/=(Int2 &lhs, RValue<Int2> rhs)
4634 //      {
4635 //              return lhs = lhs / rhs;
4636 //      }
4637
4638 //      RValue<Int2> operator%=(Int2 &lhs, RValue<Int2> rhs)
4639 //      {
4640 //              return lhs = lhs % rhs;
4641 //      }
4642
4643         RValue<Int2> operator&=(Int2 &lhs, RValue<Int2> rhs)
4644         {
4645                 return lhs = lhs & rhs;
4646         }
4647
4648         RValue<Int2> operator|=(Int2 &lhs, RValue<Int2> rhs)
4649         {
4650                 return lhs = lhs | rhs;
4651         }
4652
4653         RValue<Int2> operator^=(Int2 &lhs, RValue<Int2> rhs)
4654         {
4655                 return lhs = lhs ^ rhs;
4656         }
4657
4658         RValue<Int2> operator<<=(Int2 &lhs, unsigned char rhs)
4659         {
4660                 return lhs = lhs << rhs;
4661         }
4662
4663         RValue<Int2> operator>>=(Int2 &lhs, unsigned char rhs)
4664         {
4665                 return lhs = lhs >> rhs;
4666         }
4667
4668 //      RValue<Int2> operator+(RValue<Int2> val)
4669 //      {
4670 //              return val;
4671 //      }
4672
4673 //      RValue<Int2> operator-(RValue<Int2> val)
4674 //      {
4675 //              return RValue<Int2>(Nucleus::createNeg(val.value));
4676 //      }
4677
4678         RValue<Int2> operator~(RValue<Int2> val)
4679         {
4680                 return RValue<Int2>(Nucleus::createNot(val.value));
4681         }
4682
4683         RValue<Short4> UnpackLow(RValue<Int2> x, RValue<Int2> y)
4684         {
4685                 int shuffle[4] = {0, 4, 1, 5};   // Real type is v4i32
4686                 return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
4687         }
4688
4689         RValue<Short4> UnpackHigh(RValue<Int2> x, RValue<Int2> y)
4690         {
4691                 int shuffle[16] = {0, 4, 1, 5};   // Real type is v4i32
4692                 auto lowHigh = RValue<Int4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
4693                 return As<Short4>(Swizzle(lowHigh, 0xEE));
4694         }
4695
4696         RValue<Int> Extract(RValue<Int2> val, int i)
4697         {
4698                 return RValue<Int>(Nucleus::createExtractElement(val.value, Int::getType(), i));
4699         }
4700
4701         RValue<Int2> Insert(RValue<Int2> val, RValue<Int> element, int i)
4702         {
4703                 return RValue<Int2>(Nucleus::createInsertElement(val.value, element.value, i));
4704         }
4705
4706         Type *Int2::getType()
4707         {
4708                 return T(Type_v2i32);
4709         }
4710
4711         UInt2::UInt2(unsigned int x, unsigned int y)
4712         {
4713                 int64_t constantVector[2] = {x, y};
4714                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
4715         }
4716
4717         UInt2::UInt2(RValue<UInt2> rhs)
4718         {
4719                 storeValue(rhs.value);
4720         }
4721
4722         UInt2::UInt2(const UInt2 &rhs)
4723         {
4724                 Value *value = rhs.loadValue();
4725                 storeValue(value);
4726         }
4727
4728         UInt2::UInt2(const Reference<UInt2> &rhs)
4729         {
4730                 Value *value = rhs.loadValue();
4731                 storeValue(value);
4732         }
4733
4734         RValue<UInt2> UInt2::operator=(RValue<UInt2> rhs)
4735         {
4736                 storeValue(rhs.value);
4737
4738                 return rhs;
4739         }
4740
4741         RValue<UInt2> UInt2::operator=(const UInt2 &rhs)
4742         {
4743                 Value *value = rhs.loadValue();
4744                 storeValue(value);
4745
4746                 return RValue<UInt2>(value);
4747         }
4748
4749         RValue<UInt2> UInt2::operator=(const Reference<UInt2> &rhs)
4750         {
4751                 Value *value = rhs.loadValue();
4752                 storeValue(value);
4753
4754                 return RValue<UInt2>(value);
4755         }
4756
4757         RValue<UInt2> operator+(RValue<UInt2> lhs, RValue<UInt2> rhs)
4758         {
4759                 return RValue<UInt2>(Nucleus::createAdd(lhs.value, rhs.value));
4760         }
4761
4762         RValue<UInt2> operator-(RValue<UInt2> lhs, RValue<UInt2> rhs)
4763         {
4764                 return RValue<UInt2>(Nucleus::createSub(lhs.value, rhs.value));
4765         }
4766
4767 //      RValue<UInt2> operator*(RValue<UInt2> lhs, RValue<UInt2> rhs)
4768 //      {
4769 //              return RValue<UInt2>(Nucleus::createMul(lhs.value, rhs.value));
4770 //      }
4771
4772 //      RValue<UInt2> operator/(RValue<UInt2> lhs, RValue<UInt2> rhs)
4773 //      {
4774 //              return RValue<UInt2>(Nucleus::createUDiv(lhs.value, rhs.value));
4775 //      }
4776
4777 //      RValue<UInt2> operator%(RValue<UInt2> lhs, RValue<UInt2> rhs)
4778 //      {
4779 //              return RValue<UInt2>(Nucleus::createURem(lhs.value, rhs.value));
4780 //      }
4781
4782         RValue<UInt2> operator&(RValue<UInt2> lhs, RValue<UInt2> rhs)
4783         {
4784                 return RValue<UInt2>(Nucleus::createAnd(lhs.value, rhs.value));
4785         }
4786
4787         RValue<UInt2> operator|(RValue<UInt2> lhs, RValue<UInt2> rhs)
4788         {
4789                 return RValue<UInt2>(Nucleus::createOr(lhs.value, rhs.value));
4790         }
4791
4792         RValue<UInt2> operator^(RValue<UInt2> lhs, RValue<UInt2> rhs)
4793         {
4794                 return RValue<UInt2>(Nucleus::createXor(lhs.value, rhs.value));
4795         }
4796
4797         RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
4798         {
4799                 return RValue<UInt2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
4800         }
4801
4802         RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
4803         {
4804                 return RValue<UInt2>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
4805         }
4806
4807         RValue<UInt2> operator+=(UInt2 &lhs, RValue<UInt2> rhs)
4808         {
4809                 return lhs = lhs + rhs;
4810         }
4811
4812         RValue<UInt2> operator-=(UInt2 &lhs, RValue<UInt2> rhs)
4813         {
4814                 return lhs = lhs - rhs;
4815         }
4816
4817 //      RValue<UInt2> operator*=(UInt2 &lhs, RValue<UInt2> rhs)
4818 //      {
4819 //              return lhs = lhs * rhs;
4820 //      }
4821
4822 //      RValue<UInt2> operator/=(UInt2 &lhs, RValue<UInt2> rhs)
4823 //      {
4824 //              return lhs = lhs / rhs;
4825 //      }
4826
4827 //      RValue<UInt2> operator%=(UInt2 &lhs, RValue<UInt2> rhs)
4828 //      {
4829 //              return lhs = lhs % rhs;
4830 //      }
4831
4832         RValue<UInt2> operator&=(UInt2 &lhs, RValue<UInt2> rhs)
4833         {
4834                 return lhs = lhs & rhs;
4835         }
4836
4837         RValue<UInt2> operator|=(UInt2 &lhs, RValue<UInt2> rhs)
4838         {
4839                 return lhs = lhs | rhs;
4840         }
4841
4842         RValue<UInt2> operator^=(UInt2 &lhs, RValue<UInt2> rhs)
4843         {
4844                 return lhs = lhs ^ rhs;
4845         }
4846
4847         RValue<UInt2> operator<<=(UInt2 &lhs, unsigned char rhs)
4848         {
4849                 return lhs = lhs << rhs;
4850         }
4851
4852         RValue<UInt2> operator>>=(UInt2 &lhs, unsigned char rhs)
4853         {
4854                 return lhs = lhs >> rhs;
4855         }
4856
4857 //      RValue<UInt2> operator+(RValue<UInt2> val)
4858 //      {
4859 //              return val;
4860 //      }
4861
4862 //      RValue<UInt2> operator-(RValue<UInt2> val)
4863 //      {
4864 //              return RValue<UInt2>(Nucleus::createNeg(val.value));
4865 //      }
4866
4867         RValue<UInt2> operator~(RValue<UInt2> val)
4868         {
4869                 return RValue<UInt2>(Nucleus::createNot(val.value));
4870         }
4871
4872         Type *UInt2::getType()
4873         {
4874                 return T(Type_v2i32);
4875         }
4876
4877         Int4::Int4(RValue<Byte4> cast)
4878         {
4879                 Value *x = Nucleus::createBitCast(cast.value, Int::getType());
4880                 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
4881
4882                 Value *e;
4883                 int swizzle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};
4884                 Value *b = Nucleus::createBitCast(a, Byte16::getType());
4885                 Value *c = Nucleus::createShuffleVector(b, V(Nucleus::createNullValue(Byte16::getType())), swizzle);
4886
4887                 int swizzle2[8] = {0, 8, 1, 9, 2, 10, 3, 11};
4888                 Value *d = Nucleus::createBitCast(c, Short8::getType());
4889                 e = Nucleus::createShuffleVector(d, V(Nucleus::createNullValue(Short8::getType())), swizzle2);
4890
4891                 Value *f = Nucleus::createBitCast(e, Int4::getType());
4892                 storeValue(f);
4893         }
4894
4895         Int4::Int4(RValue<SByte4> cast)
4896         {
4897                 Value *x = Nucleus::createBitCast(cast.value, Int::getType());
4898                 Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
4899
4900                 Value *e;
4901                 int swizzle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7};
4902                 Value *b = Nucleus::createBitCast(a, Byte16::getType());
4903                 Value *c = Nucleus::createShuffleVector(b, b, swizzle);
4904
4905                 int swizzle2[8] = {0, 0, 1, 1, 2, 2, 3, 3};
4906                 Value *d = Nucleus::createBitCast(c, Short8::getType());
4907                 e = Nucleus::createShuffleVector(d, d, swizzle2);
4908
4909                 Value *f = Nucleus::createBitCast(e, Int4::getType());
4910                 Value *g = Nucleus::createAShr(f, V(::context->getConstantInt32(24)));
4911                 storeValue(g);
4912         }
4913
4914         Int4::Int4(RValue<Float4> cast)
4915         {
4916                 Value *xyzw = Nucleus::createFPToSI(cast.value, Int4::getType());
4917
4918                 storeValue(xyzw);
4919         }
4920
4921         Int4::Int4(RValue<Short4> cast)
4922         {
4923                 int swizzle[8] = {0, 0, 1, 1, 2, 2, 3, 3};
4924                 Value *c = Nucleus::createShuffleVector(cast.value, cast.value, swizzle);
4925                 Value *d = Nucleus::createBitCast(c, Int4::getType());
4926                 Value *e = Nucleus::createAShr(d, V(::context->getConstantInt32(16)));
4927                 storeValue(e);
4928         }
4929
4930         Int4::Int4(RValue<UShort4> cast)
4931         {
4932                 int swizzle[8] = {0, 8, 1, 9, 2, 10, 3, 11};
4933                 Value *c = Nucleus::createShuffleVector(cast.value, Short8(0, 0, 0, 0, 0, 0, 0, 0).loadValue(), swizzle);
4934                 Value *d = Nucleus::createBitCast(c, Int4::getType());
4935                 storeValue(d);
4936         }
4937
4938         Int4::Int4(int xyzw)
4939         {
4940                 constant(xyzw, xyzw, xyzw, xyzw);
4941         }
4942
4943         Int4::Int4(int x, int yzw)
4944         {
4945                 constant(x, yzw, yzw, yzw);
4946         }
4947
4948         Int4::Int4(int x, int y, int zw)
4949         {
4950                 constant(x, y, zw, zw);
4951         }
4952
4953         Int4::Int4(int x, int y, int z, int w)
4954         {
4955                 constant(x, y, z, w);
4956         }
4957
4958         void Int4::constant(int x, int y, int z, int w)
4959         {
4960                 int64_t constantVector[4] = {x, y, z, w};
4961                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
4962         }
4963
4964         Int4::Int4(RValue<Int4> rhs)
4965         {
4966                 storeValue(rhs.value);
4967         }
4968
4969         Int4::Int4(const Int4 &rhs)
4970         {
4971                 Value *value = rhs.loadValue();
4972                 storeValue(value);
4973         }
4974
4975         Int4::Int4(const Reference<Int4> &rhs)
4976         {
4977                 Value *value = rhs.loadValue();
4978                 storeValue(value);
4979         }
4980
4981         Int4::Int4(RValue<UInt4> rhs)
4982         {
4983                 storeValue(rhs.value);
4984         }
4985
4986         Int4::Int4(const UInt4 &rhs)
4987         {
4988                 Value *value = rhs.loadValue();
4989                 storeValue(value);
4990         }
4991
4992         Int4::Int4(const Reference<UInt4> &rhs)
4993         {
4994                 Value *value = rhs.loadValue();
4995                 storeValue(value);
4996         }
4997
4998         Int4::Int4(RValue<Int2> lo, RValue<Int2> hi)
4999         {
5000                 int shuffle[4] = {0, 1, 4, 5};   // Real type is v4i32
5001                 Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
5002
5003                 storeValue(packed);
5004         }
5005
5006         Int4::Int4(RValue<Int> rhs)
5007         {
5008                 Value *vector = loadValue();
5009                 Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
5010
5011                 int swizzle[4] = {0, 0, 0, 0};
5012                 Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle);
5013
5014                 storeValue(replicate);
5015         }
5016
5017         Int4::Int4(const Int &rhs)
5018         {
5019                 *this = RValue<Int>(rhs.loadValue());
5020         }
5021
5022         Int4::Int4(const Reference<Int> &rhs)
5023         {
5024                 *this = RValue<Int>(rhs.loadValue());
5025         }
5026
5027         RValue<Int4> Int4::operator=(RValue<Int4> rhs)
5028         {
5029                 storeValue(rhs.value);
5030
5031                 return rhs;
5032         }
5033
5034         RValue<Int4> Int4::operator=(const Int4 &rhs)
5035         {
5036                 Value *value = rhs.loadValue();
5037                 storeValue(value);
5038
5039                 return RValue<Int4>(value);
5040         }
5041
5042         RValue<Int4> Int4::operator=(const Reference<Int4> &rhs)
5043         {
5044                 Value *value = rhs.loadValue();
5045                 storeValue(value);
5046
5047                 return RValue<Int4>(value);
5048         }
5049
5050         RValue<Int4> operator+(RValue<Int4> lhs, RValue<Int4> rhs)
5051         {
5052                 return RValue<Int4>(Nucleus::createAdd(lhs.value, rhs.value));
5053         }
5054
5055         RValue<Int4> operator-(RValue<Int4> lhs, RValue<Int4> rhs)
5056         {
5057                 return RValue<Int4>(Nucleus::createSub(lhs.value, rhs.value));
5058         }
5059
5060         RValue<Int4> operator*(RValue<Int4> lhs, RValue<Int4> rhs)
5061         {
5062                 return RValue<Int4>(Nucleus::createMul(lhs.value, rhs.value));
5063         }
5064
5065         RValue<Int4> operator/(RValue<Int4> lhs, RValue<Int4> rhs)
5066         {
5067                 return RValue<Int4>(Nucleus::createSDiv(lhs.value, rhs.value));
5068         }
5069
5070         RValue<Int4> operator%(RValue<Int4> lhs, RValue<Int4> rhs)
5071         {
5072                 return RValue<Int4>(Nucleus::createSRem(lhs.value, rhs.value));
5073         }
5074
5075         RValue<Int4> operator&(RValue<Int4> lhs, RValue<Int4> rhs)
5076         {
5077                 return RValue<Int4>(Nucleus::createAnd(lhs.value, rhs.value));
5078         }
5079
5080         RValue<Int4> operator|(RValue<Int4> lhs, RValue<Int4> rhs)
5081         {
5082                 return RValue<Int4>(Nucleus::createOr(lhs.value, rhs.value));
5083         }
5084
5085         RValue<Int4> operator^(RValue<Int4> lhs, RValue<Int4> rhs)
5086         {
5087                 return RValue<Int4>(Nucleus::createXor(lhs.value, rhs.value));
5088         }
5089
5090         RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
5091         {
5092                 return RValue<Int4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
5093         }
5094
5095         RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
5096         {
5097                 return RValue<Int4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
5098         }
5099
5100         RValue<Int4> operator<<(RValue<Int4> lhs, RValue<Int4> rhs)
5101         {
5102                 return RValue<Int4>(Nucleus::createShl(lhs.value, rhs.value));
5103         }
5104
5105         RValue<Int4> operator>>(RValue<Int4> lhs, RValue<Int4> rhs)
5106         {
5107                 return RValue<Int4>(Nucleus::createAShr(lhs.value, rhs.value));
5108         }
5109
5110         RValue<Int4> operator+=(Int4 &lhs, RValue<Int4> rhs)
5111         {
5112                 return lhs = lhs + rhs;
5113         }
5114
5115         RValue<Int4> operator-=(Int4 &lhs, RValue<Int4> rhs)
5116         {
5117                 return lhs = lhs - rhs;
5118         }
5119
5120         RValue<Int4> operator*=(Int4 &lhs, RValue<Int4> rhs)
5121         {
5122                 return lhs = lhs * rhs;
5123         }
5124
5125 //      RValue<Int4> operator/=(Int4 &lhs, RValue<Int4> rhs)
5126 //      {
5127 //              return lhs = lhs / rhs;
5128 //      }
5129
5130 //      RValue<Int4> operator%=(Int4 &lhs, RValue<Int4> rhs)
5131 //      {
5132 //              return lhs = lhs % rhs;
5133 //      }
5134
5135         RValue<Int4> operator&=(Int4 &lhs, RValue<Int4> rhs)
5136         {
5137                 return lhs = lhs & rhs;
5138         }
5139
5140         RValue<Int4> operator|=(Int4 &lhs, RValue<Int4> rhs)
5141         {
5142                 return lhs = lhs | rhs;
5143         }
5144
5145         RValue<Int4> operator^=(Int4 &lhs, RValue<Int4> rhs)
5146         {
5147                 return lhs = lhs ^ rhs;
5148         }
5149
5150         RValue<Int4> operator<<=(Int4 &lhs, unsigned char rhs)
5151         {
5152                 return lhs = lhs << rhs;
5153         }
5154
5155         RValue<Int4> operator>>=(Int4 &lhs, unsigned char rhs)
5156         {
5157                 return lhs = lhs >> rhs;
5158         }
5159
5160         RValue<Int4> operator+(RValue<Int4> val)
5161         {
5162                 return val;
5163         }
5164
5165         RValue<Int4> operator-(RValue<Int4> val)
5166         {
5167                 return RValue<Int4>(Nucleus::createNeg(val.value));
5168         }
5169
5170         RValue<Int4> operator~(RValue<Int4> val)
5171         {
5172                 return RValue<Int4>(Nucleus::createNot(val.value));
5173         }
5174
5175         RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
5176         {
5177                 return RValue<Int4>(Nucleus::createICmpEQ(x.value, y.value));
5178         }
5179
5180         RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
5181         {
5182                 return RValue<Int4>(Nucleus::createICmpSLT(x.value, y.value));
5183         }
5184
5185         RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
5186         {
5187                 return RValue<Int4>(Nucleus::createICmpSLE(x.value, y.value));
5188         }
5189
5190         RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
5191         {
5192                 return RValue<Int4>(Nucleus::createICmpNE(x.value, y.value));
5193         }
5194
5195         RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
5196         {
5197                 return RValue<Int4>(Nucleus::createICmpSGE(x.value, y.value));
5198         }
5199
5200         RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
5201         {
5202                 return RValue<Int4>(Nucleus::createICmpSGT(x.value, y.value));
5203         }
5204
5205         RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
5206         {
5207                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
5208                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sle, condition, x.value, y.value);
5209                 ::basicBlock->appendInst(cmp);
5210
5211                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
5212                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
5213                 ::basicBlock->appendInst(select);
5214
5215                 return RValue<Int4>(V(result));
5216         }
5217
5218         RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
5219         {
5220                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
5221                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Sgt, condition, x.value, y.value);
5222                 ::basicBlock->appendInst(cmp);
5223
5224                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
5225                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
5226                 ::basicBlock->appendInst(select);
5227
5228                 return RValue<Int4>(V(result));
5229         }
5230
5231         RValue<Int4> RoundInt(RValue<Float4> cast)
5232         {
5233                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
5234                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Nearbyint, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
5235                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
5236                 auto nearbyint = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
5237                 nearbyint->addArg(cast.value);
5238                 ::basicBlock->appendInst(nearbyint);
5239
5240                 return RValue<Int4>(V(result));
5241         }
5242
5243         RValue<Short8> Pack(RValue<Int4> x, RValue<Int4> y)
5244         {
5245                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
5246                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackSigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
5247                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
5248                 auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
5249                 pack->addArg(x.value);
5250                 pack->addArg(y.value);
5251                 ::basicBlock->appendInst(pack);
5252
5253                 return RValue<Short8>(V(result));
5254         }
5255
5256         RValue<Int> Extract(RValue<Int4> x, int i)
5257         {
5258                 return RValue<Int>(Nucleus::createExtractElement(x.value, Int::getType(), i));
5259         }
5260
5261         RValue<Int4> Insert(RValue<Int4> x, RValue<Int> element, int i)
5262         {
5263                 return RValue<Int4>(Nucleus::createInsertElement(x.value, element.value, i));
5264         }
5265
5266         RValue<Int> SignMask(RValue<Int4> x)
5267         {
5268                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
5269                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
5270                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
5271                 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
5272                 movmsk->addArg(x.value);
5273                 ::basicBlock->appendInst(movmsk);
5274
5275                 return RValue<Int>(V(result));
5276         }
5277
5278         RValue<Int4> Swizzle(RValue<Int4> x, unsigned char select)
5279         {
5280                 return RValue<Int4>(createSwizzle4(x.value, select));
5281         }
5282
5283         Type *Int4::getType()
5284         {
5285                 return T(Ice::IceType_v4i32);
5286         }
5287
5288         UInt4::UInt4(RValue<Float4> cast)
5289         {
5290                 // Smallest positive value representable in UInt, but not in Int
5291                 const unsigned int ustart = 0x80000000u;
5292                 const float ustartf = float(ustart);
5293
5294                 // Check if the value can be represented as an Int
5295                 Int4 uiValue = CmpNLT(cast, Float4(ustartf));
5296                 // If the value is too large, subtract ustart and re-add it after conversion.
5297                 uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) |
5298                 // Otherwise, just convert normally
5299                           (~uiValue & Int4(cast));
5300                 // If the value is negative, store 0, otherwise store the result of the conversion
5301                 storeValue((~(As<Int4>(cast) >> 31) & uiValue).value);
5302         }
5303
5304         UInt4::UInt4(int xyzw)
5305         {
5306                 constant(xyzw, xyzw, xyzw, xyzw);
5307         }
5308
5309         UInt4::UInt4(int x, int yzw)
5310         {
5311                 constant(x, yzw, yzw, yzw);
5312         }
5313
5314         UInt4::UInt4(int x, int y, int zw)
5315         {
5316                 constant(x, y, zw, zw);
5317         }
5318
5319         UInt4::UInt4(int x, int y, int z, int w)
5320         {
5321                 constant(x, y, z, w);
5322         }
5323
5324         void UInt4::constant(int x, int y, int z, int w)
5325         {
5326                 int64_t constantVector[4] = {x, y, z, w};
5327                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
5328         }
5329
5330         UInt4::UInt4(RValue<UInt4> rhs)
5331         {
5332                 storeValue(rhs.value);
5333         }
5334
5335         UInt4::UInt4(const UInt4 &rhs)
5336         {
5337                 Value *value = rhs.loadValue();
5338                 storeValue(value);
5339         }
5340
5341         UInt4::UInt4(const Reference<UInt4> &rhs)
5342         {
5343                 Value *value = rhs.loadValue();
5344                 storeValue(value);
5345         }
5346
5347         UInt4::UInt4(RValue<Int4> rhs)
5348         {
5349                 storeValue(rhs.value);
5350         }
5351
5352         UInt4::UInt4(const Int4 &rhs)
5353         {
5354                 Value *value = rhs.loadValue();
5355                 storeValue(value);
5356         }
5357
5358         UInt4::UInt4(const Reference<Int4> &rhs)
5359         {
5360                 Value *value = rhs.loadValue();
5361                 storeValue(value);
5362         }
5363
5364         UInt4::UInt4(RValue<UInt2> lo, RValue<UInt2> hi)
5365         {
5366                 int shuffle[4] = {0, 1, 4, 5};   // Real type is v4i32
5367                 Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
5368
5369                 storeValue(packed);
5370         }
5371
5372         RValue<UInt4> UInt4::operator=(RValue<UInt4> rhs)
5373         {
5374                 storeValue(rhs.value);
5375
5376                 return rhs;
5377         }
5378
5379         RValue<UInt4> UInt4::operator=(const UInt4 &rhs)
5380         {
5381                 Value *value = rhs.loadValue();
5382                 storeValue(value);
5383
5384                 return RValue<UInt4>(value);
5385         }
5386
5387         RValue<UInt4> UInt4::operator=(const Reference<UInt4> &rhs)
5388         {
5389                 Value *value = rhs.loadValue();
5390                 storeValue(value);
5391
5392                 return RValue<UInt4>(value);
5393         }
5394
5395         RValue<UInt4> operator+(RValue<UInt4> lhs, RValue<UInt4> rhs)
5396         {
5397                 return RValue<UInt4>(Nucleus::createAdd(lhs.value, rhs.value));
5398         }
5399
5400         RValue<UInt4> operator-(RValue<UInt4> lhs, RValue<UInt4> rhs)
5401         {
5402                 return RValue<UInt4>(Nucleus::createSub(lhs.value, rhs.value));
5403         }
5404
5405         RValue<UInt4> operator*(RValue<UInt4> lhs, RValue<UInt4> rhs)
5406         {
5407                 return RValue<UInt4>(Nucleus::createMul(lhs.value, rhs.value));
5408         }
5409
5410         RValue<UInt4> operator/(RValue<UInt4> lhs, RValue<UInt4> rhs)
5411         {
5412                 return RValue<UInt4>(Nucleus::createUDiv(lhs.value, rhs.value));
5413         }
5414
5415         RValue<UInt4> operator%(RValue<UInt4> lhs, RValue<UInt4> rhs)
5416         {
5417                 return RValue<UInt4>(Nucleus::createURem(lhs.value, rhs.value));
5418         }
5419
5420         RValue<UInt4> operator&(RValue<UInt4> lhs, RValue<UInt4> rhs)
5421         {
5422                 return RValue<UInt4>(Nucleus::createAnd(lhs.value, rhs.value));
5423         }
5424
5425         RValue<UInt4> operator|(RValue<UInt4> lhs, RValue<UInt4> rhs)
5426         {
5427                 return RValue<UInt4>(Nucleus::createOr(lhs.value, rhs.value));
5428         }
5429
5430         RValue<UInt4> operator^(RValue<UInt4> lhs, RValue<UInt4> rhs)
5431         {
5432                 return RValue<UInt4>(Nucleus::createXor(lhs.value, rhs.value));
5433         }
5434
5435         RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
5436         {
5437                 return RValue<UInt4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
5438         }
5439
5440         RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
5441         {
5442                 return RValue<UInt4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
5443         }
5444
5445         RValue<UInt4> operator<<(RValue<UInt4> lhs, RValue<UInt4> rhs)
5446         {
5447                 return RValue<UInt4>(Nucleus::createShl(lhs.value, rhs.value));
5448         }
5449
5450         RValue<UInt4> operator>>(RValue<UInt4> lhs, RValue<UInt4> rhs)
5451         {
5452                 return RValue<UInt4>(Nucleus::createLShr(lhs.value, rhs.value));
5453         }
5454
5455         RValue<UInt4> operator+=(UInt4 &lhs, RValue<UInt4> rhs)
5456         {
5457                 return lhs = lhs + rhs;
5458         }
5459
5460         RValue<UInt4> operator-=(UInt4 &lhs, RValue<UInt4> rhs)
5461         {
5462                 return lhs = lhs - rhs;
5463         }
5464
5465         RValue<UInt4> operator*=(UInt4 &lhs, RValue<UInt4> rhs)
5466         {
5467                 return lhs = lhs * rhs;
5468         }
5469
5470 //      RValue<UInt4> operator/=(UInt4 &lhs, RValue<UInt4> rhs)
5471 //      {
5472 //              return lhs = lhs / rhs;
5473 //      }
5474
5475 //      RValue<UInt4> operator%=(UInt4 &lhs, RValue<UInt4> rhs)
5476 //      {
5477 //              return lhs = lhs % rhs;
5478 //      }
5479
5480         RValue<UInt4> operator&=(UInt4 &lhs, RValue<UInt4> rhs)
5481         {
5482                 return lhs = lhs & rhs;
5483         }
5484
5485         RValue<UInt4> operator|=(UInt4 &lhs, RValue<UInt4> rhs)
5486         {
5487                 return lhs = lhs | rhs;
5488         }
5489
5490         RValue<UInt4> operator^=(UInt4 &lhs, RValue<UInt4> rhs)
5491         {
5492                 return lhs = lhs ^ rhs;
5493         }
5494
5495         RValue<UInt4> operator<<=(UInt4 &lhs, unsigned char rhs)
5496         {
5497                 return lhs = lhs << rhs;
5498         }
5499
5500         RValue<UInt4> operator>>=(UInt4 &lhs, unsigned char rhs)
5501         {
5502                 return lhs = lhs >> rhs;
5503         }
5504
5505         RValue<UInt4> operator+(RValue<UInt4> val)
5506         {
5507                 return val;
5508         }
5509
5510         RValue<UInt4> operator-(RValue<UInt4> val)
5511         {
5512                 return RValue<UInt4>(Nucleus::createNeg(val.value));
5513         }
5514
5515         RValue<UInt4> operator~(RValue<UInt4> val)
5516         {
5517                 return RValue<UInt4>(Nucleus::createNot(val.value));
5518         }
5519
5520         RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
5521         {
5522                 return RValue<UInt4>(Nucleus::createICmpEQ(x.value, y.value));
5523         }
5524
5525         RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
5526         {
5527                 return RValue<UInt4>(Nucleus::createICmpULT(x.value, y.value));
5528         }
5529
5530         RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
5531         {
5532                 return RValue<UInt4>(Nucleus::createICmpULE(x.value, y.value));
5533         }
5534
5535         RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
5536         {
5537                 return RValue<UInt4>(Nucleus::createICmpNE(x.value, y.value));
5538         }
5539
5540         RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
5541         {
5542                 return RValue<UInt4>(Nucleus::createICmpUGE(x.value, y.value));
5543         }
5544
5545         RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
5546         {
5547                 return RValue<UInt4>(Nucleus::createICmpUGT(x.value, y.value));
5548         }
5549
5550         RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
5551         {
5552                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
5553                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ule, condition, x.value, y.value);
5554                 ::basicBlock->appendInst(cmp);
5555
5556                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
5557                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
5558                 ::basicBlock->appendInst(select);
5559
5560                 return RValue<UInt4>(V(result));
5561         }
5562
5563         RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
5564         {
5565                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
5566                 auto cmp = Ice::InstIcmp::create(::function, Ice::InstIcmp::Ugt, condition, x.value, y.value);
5567                 ::basicBlock->appendInst(cmp);
5568
5569                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4i32);
5570                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
5571                 ::basicBlock->appendInst(select);
5572
5573                 return RValue<UInt4>(V(result));
5574         }
5575
5576         RValue<UShort8> Pack(RValue<UInt4> x, RValue<UInt4> y)
5577         {
5578                 if(CPUID::SSE4_1)
5579                 {
5580                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v8i16);
5581                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::VectorPackUnsigned, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
5582                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
5583                         auto pack = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
5584                         pack->addArg(x.value);
5585                         pack->addArg(y.value);
5586                         ::basicBlock->appendInst(pack);
5587
5588                         return RValue<UShort8>(V(result));
5589                 }
5590                 else
5591                 {
5592                         RValue<Int4> sx = As<Int4>(x);
5593                         RValue<Int4> bx = (sx & ~(sx >> 31)) - Int4(0x8000);
5594
5595                         RValue<Int4> sy = As<Int4>(y);
5596                         RValue<Int4> by = (sy & ~(sy >> 31)) - Int4(0x8000);
5597
5598                         return As<UShort8>(Pack(bx, by) + Short8(0x8000u));
5599                 }
5600         }
5601
5602         Type *UInt4::getType()
5603         {
5604                 return T(Ice::IceType_v4i32);
5605         }
5606
5607         Float::Float(RValue<Int> cast)
5608         {
5609                 Value *integer = Nucleus::createSIToFP(cast.value, Float::getType());
5610
5611                 storeValue(integer);
5612         }
5613
5614         Float::Float(float x)
5615         {
5616                 storeValue(Nucleus::createConstantFloat(x));
5617         }
5618
5619         Float::Float(RValue<Float> rhs)
5620         {
5621                 storeValue(rhs.value);
5622         }
5623
5624         Float::Float(const Float &rhs)
5625         {
5626                 Value *value = rhs.loadValue();
5627                 storeValue(value);
5628         }
5629
5630         Float::Float(const Reference<Float> &rhs)
5631         {
5632                 Value *value = rhs.loadValue();
5633                 storeValue(value);
5634         }
5635
5636         RValue<Float> Float::operator=(RValue<Float> rhs)
5637         {
5638                 storeValue(rhs.value);
5639
5640                 return rhs;
5641         }
5642
5643         RValue<Float> Float::operator=(const Float &rhs)
5644         {
5645                 Value *value = rhs.loadValue();
5646                 storeValue(value);
5647
5648                 return RValue<Float>(value);
5649         }
5650
5651         RValue<Float> Float::operator=(const Reference<Float> &rhs)
5652         {
5653                 Value *value = rhs.loadValue();
5654                 storeValue(value);
5655
5656                 return RValue<Float>(value);
5657         }
5658
5659         RValue<Float> operator+(RValue<Float> lhs, RValue<Float> rhs)
5660         {
5661                 return RValue<Float>(Nucleus::createFAdd(lhs.value, rhs.value));
5662         }
5663
5664         RValue<Float> operator-(RValue<Float> lhs, RValue<Float> rhs)
5665         {
5666                 return RValue<Float>(Nucleus::createFSub(lhs.value, rhs.value));
5667         }
5668
5669         RValue<Float> operator*(RValue<Float> lhs, RValue<Float> rhs)
5670         {
5671                 return RValue<Float>(Nucleus::createFMul(lhs.value, rhs.value));
5672         }
5673
5674         RValue<Float> operator/(RValue<Float> lhs, RValue<Float> rhs)
5675         {
5676                 return RValue<Float>(Nucleus::createFDiv(lhs.value, rhs.value));
5677         }
5678
5679         RValue<Float> operator+=(Float &lhs, RValue<Float> rhs)
5680         {
5681                 return lhs = lhs + rhs;
5682         }
5683
5684         RValue<Float> operator-=(Float &lhs, RValue<Float> rhs)
5685         {
5686                 return lhs = lhs - rhs;
5687         }
5688
5689         RValue<Float> operator*=(Float &lhs, RValue<Float> rhs)
5690         {
5691                 return lhs = lhs * rhs;
5692         }
5693
5694         RValue<Float> operator/=(Float &lhs, RValue<Float> rhs)
5695         {
5696                 return lhs = lhs / rhs;
5697         }
5698
5699         RValue<Float> operator+(RValue<Float> val)
5700         {
5701                 return val;
5702         }
5703
5704         RValue<Float> operator-(RValue<Float> val)
5705         {
5706                 return RValue<Float>(Nucleus::createFNeg(val.value));
5707         }
5708
5709         RValue<Bool> operator<(RValue<Float> lhs, RValue<Float> rhs)
5710         {
5711                 return RValue<Bool>(Nucleus::createFCmpOLT(lhs.value, rhs.value));
5712         }
5713
5714         RValue<Bool> operator<=(RValue<Float> lhs, RValue<Float> rhs)
5715         {
5716                 return RValue<Bool>(Nucleus::createFCmpOLE(lhs.value, rhs.value));
5717         }
5718
5719         RValue<Bool> operator>(RValue<Float> lhs, RValue<Float> rhs)
5720         {
5721                 return RValue<Bool>(Nucleus::createFCmpOGT(lhs.value, rhs.value));
5722         }
5723
5724         RValue<Bool> operator>=(RValue<Float> lhs, RValue<Float> rhs)
5725         {
5726                 return RValue<Bool>(Nucleus::createFCmpOGE(lhs.value, rhs.value));
5727         }
5728
5729         RValue<Bool> operator!=(RValue<Float> lhs, RValue<Float> rhs)
5730         {
5731                 return RValue<Bool>(Nucleus::createFCmpONE(lhs.value, rhs.value));
5732         }
5733
5734         RValue<Bool> operator==(RValue<Float> lhs, RValue<Float> rhs)
5735         {
5736                 return RValue<Bool>(Nucleus::createFCmpOEQ(lhs.value, rhs.value));
5737         }
5738
5739         RValue<Float> Abs(RValue<Float> x)
5740         {
5741                 return IfThenElse(x > 0.0f, x, -x);
5742         }
5743
5744         RValue<Float> Max(RValue<Float> x, RValue<Float> y)
5745         {
5746                 return IfThenElse(x > y, x, y);
5747         }
5748
5749         RValue<Float> Min(RValue<Float> x, RValue<Float> y)
5750         {
5751                 return IfThenElse(x < y, x, y);
5752         }
5753
5754         RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
5755         {
5756                 return 1.0f / x;
5757         }
5758
5759         RValue<Float> RcpSqrt_pp(RValue<Float> x)
5760         {
5761                 return Rcp_pp(Sqrt(x));
5762         }
5763
5764         RValue<Float> Sqrt(RValue<Float> x)
5765         {
5766                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_f32);
5767                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
5768                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
5769                 auto sqrt = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
5770                 sqrt->addArg(x.value);
5771                 ::basicBlock->appendInst(sqrt);
5772
5773                 return RValue<Float>(V(result));
5774         }
5775
5776         RValue<Float> Round(RValue<Float> x)
5777         {
5778                 return Float4(Round(Float4(x))).x;
5779         }
5780
5781         RValue<Float> Trunc(RValue<Float> x)
5782         {
5783                 return Float4(Trunc(Float4(x))).x;
5784         }
5785
5786         RValue<Float> Frac(RValue<Float> x)
5787         {
5788                 return Float4(Frac(Float4(x))).x;
5789         }
5790
5791         RValue<Float> Floor(RValue<Float> x)
5792         {
5793                 return Float4(Floor(Float4(x))).x;
5794         }
5795
5796         RValue<Float> Ceil(RValue<Float> x)
5797         {
5798                 return Float4(Ceil(Float4(x))).x;
5799         }
5800
5801         Type *Float::getType()
5802         {
5803                 return T(Ice::IceType_f32);
5804         }
5805
5806         Float2::Float2(RValue<Float4> cast)
5807         {
5808                 storeValue(Nucleus::createBitCast(cast.value, getType()));
5809         }
5810
5811         Type *Float2::getType()
5812         {
5813                 return T(Type_v2f32);
5814         }
5815
5816         Float4::Float4(RValue<Byte4> cast) : FloatXYZW(this)
5817         {
5818                 Value *a = Int4(cast).loadValue();
5819                 Value *xyzw = Nucleus::createSIToFP(a, Float4::getType());
5820
5821                 storeValue(xyzw);
5822         }
5823
5824         Float4::Float4(RValue<SByte4> cast) : FloatXYZW(this)
5825         {
5826                 Value *a = Int4(cast).loadValue();
5827                 Value *xyzw = Nucleus::createSIToFP(a, Float4::getType());
5828
5829                 storeValue(xyzw);
5830         }
5831
5832         Float4::Float4(RValue<Short4> cast) : FloatXYZW(this)
5833         {
5834                 Int4 c(cast);
5835                 storeValue(Nucleus::createSIToFP(RValue<Int4>(c).value, Float4::getType()));
5836         }
5837
5838         Float4::Float4(RValue<UShort4> cast) : FloatXYZW(this)
5839         {
5840                 Int4 c(cast);
5841                 storeValue(Nucleus::createSIToFP(RValue<Int4>(c).value, Float4::getType()));
5842         }
5843
5844         Float4::Float4(RValue<Int4> cast) : FloatXYZW(this)
5845         {
5846                 Value *xyzw = Nucleus::createSIToFP(cast.value, Float4::getType());
5847
5848                 storeValue(xyzw);
5849         }
5850
5851         Float4::Float4(RValue<UInt4> cast) : FloatXYZW(this)
5852         {
5853                 RValue<Float4> result = Float4(Int4(cast & UInt4(0x7FFFFFFF))) +
5854                                         As<Float4>((As<Int4>(cast) >> 31) & As<Int4>(Float4(0x80000000u)));
5855
5856                 storeValue(result.value);
5857         }
5858
5859         Float4::Float4() : FloatXYZW(this)
5860         {
5861         }
5862
5863         Float4::Float4(float xyzw) : FloatXYZW(this)
5864         {
5865                 constant(xyzw, xyzw, xyzw, xyzw);
5866         }
5867
5868         Float4::Float4(float x, float yzw) : FloatXYZW(this)
5869         {
5870                 constant(x, yzw, yzw, yzw);
5871         }
5872
5873         Float4::Float4(float x, float y, float zw) : FloatXYZW(this)
5874         {
5875                 constant(x, y, zw, zw);
5876         }
5877
5878         Float4::Float4(float x, float y, float z, float w) : FloatXYZW(this)
5879         {
5880                 constant(x, y, z, w);
5881         }
5882
5883         void Float4::constant(float x, float y, float z, float w)
5884         {
5885                 double constantVector[4] = {x, y, z, w};
5886                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
5887         }
5888
5889         Float4::Float4(RValue<Float4> rhs) : FloatXYZW(this)
5890         {
5891                 storeValue(rhs.value);
5892         }
5893
5894         Float4::Float4(const Float4 &rhs) : FloatXYZW(this)
5895         {
5896                 Value *value = rhs.loadValue();
5897                 storeValue(value);
5898         }
5899
5900         Float4::Float4(const Reference<Float4> &rhs) : FloatXYZW(this)
5901         {
5902                 Value *value = rhs.loadValue();
5903                 storeValue(value);
5904         }
5905
5906         Float4::Float4(RValue<Float> rhs) : FloatXYZW(this)
5907         {
5908                 Value *vector = loadValue();
5909                 Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
5910
5911                 int swizzle[4] = {0, 0, 0, 0};
5912                 Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle);
5913
5914                 storeValue(replicate);
5915         }
5916
5917         Float4::Float4(const Float &rhs) : FloatXYZW(this)
5918         {
5919                 *this = RValue<Float>(rhs.loadValue());
5920         }
5921
5922         Float4::Float4(const Reference<Float> &rhs) : FloatXYZW(this)
5923         {
5924                 *this = RValue<Float>(rhs.loadValue());
5925         }
5926
5927         RValue<Float4> Float4::operator=(float x)
5928         {
5929                 return *this = Float4(x, x, x, x);
5930         }
5931
5932         RValue<Float4> Float4::operator=(RValue<Float4> rhs)
5933         {
5934                 storeValue(rhs.value);
5935
5936                 return rhs;
5937         }
5938
5939         RValue<Float4> Float4::operator=(const Float4 &rhs)
5940         {
5941                 Value *value = rhs.loadValue();
5942                 storeValue(value);
5943
5944                 return RValue<Float4>(value);
5945         }
5946
5947         RValue<Float4> Float4::operator=(const Reference<Float4> &rhs)
5948         {
5949                 Value *value = rhs.loadValue();
5950                 storeValue(value);
5951
5952                 return RValue<Float4>(value);
5953         }
5954
5955         RValue<Float4> Float4::operator=(RValue<Float> rhs)
5956         {
5957                 return *this = Float4(rhs);
5958         }
5959
5960         RValue<Float4> Float4::operator=(const Float &rhs)
5961         {
5962                 return *this = Float4(rhs);
5963         }
5964
5965         RValue<Float4> Float4::operator=(const Reference<Float> &rhs)
5966         {
5967                 return *this = Float4(rhs);
5968         }
5969
5970         RValue<Float4> operator+(RValue<Float4> lhs, RValue<Float4> rhs)
5971         {
5972                 return RValue<Float4>(Nucleus::createFAdd(lhs.value, rhs.value));
5973         }
5974
5975         RValue<Float4> operator-(RValue<Float4> lhs, RValue<Float4> rhs)
5976         {
5977                 return RValue<Float4>(Nucleus::createFSub(lhs.value, rhs.value));
5978         }
5979
5980         RValue<Float4> operator*(RValue<Float4> lhs, RValue<Float4> rhs)
5981         {
5982                 return RValue<Float4>(Nucleus::createFMul(lhs.value, rhs.value));
5983         }
5984
5985         RValue<Float4> operator/(RValue<Float4> lhs, RValue<Float4> rhs)
5986         {
5987                 return RValue<Float4>(Nucleus::createFDiv(lhs.value, rhs.value));
5988         }
5989
5990         RValue<Float4> operator%(RValue<Float4> lhs, RValue<Float4> rhs)
5991         {
5992                 return RValue<Float4>(Nucleus::createFRem(lhs.value, rhs.value));
5993         }
5994
5995         RValue<Float4> operator+=(Float4 &lhs, RValue<Float4> rhs)
5996         {
5997                 return lhs = lhs + rhs;
5998         }
5999
6000         RValue<Float4> operator-=(Float4 &lhs, RValue<Float4> rhs)
6001         {
6002                 return lhs = lhs - rhs;
6003         }
6004
6005         RValue<Float4> operator*=(Float4 &lhs, RValue<Float4> rhs)
6006         {
6007                 return lhs = lhs * rhs;
6008         }
6009
6010         RValue<Float4> operator/=(Float4 &lhs, RValue<Float4> rhs)
6011         {
6012                 return lhs = lhs / rhs;
6013         }
6014
6015         RValue<Float4> operator%=(Float4 &lhs, RValue<Float4> rhs)
6016         {
6017                 return lhs = lhs % rhs;
6018         }
6019
6020         RValue<Float4> operator+(RValue<Float4> val)
6021         {
6022                 return val;
6023         }
6024
6025         RValue<Float4> operator-(RValue<Float4> val)
6026         {
6027                 return RValue<Float4>(Nucleus::createFNeg(val.value));
6028         }
6029
6030         RValue<Float4> Abs(RValue<Float4> x)
6031         {
6032                 Value *vector = Nucleus::createBitCast(x.value, Int4::getType());
6033                 int64_t constantVector[4] = {0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF};
6034                 Value *result = Nucleus::createAnd(vector, V(Nucleus::createConstantVector(constantVector, Int4::getType())));
6035
6036                 return As<Float4>(result);
6037         }
6038
6039         RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
6040         {
6041                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
6042                 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Ule, condition, x.value, y.value);
6043                 ::basicBlock->appendInst(cmp);
6044
6045                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
6046                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
6047                 ::basicBlock->appendInst(select);
6048
6049                 return RValue<Float4>(V(result));
6050         }
6051
6052         RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
6053         {
6054                 Ice::Variable *condition = ::function->makeVariable(Ice::IceType_v4i1);
6055                 auto cmp = Ice::InstFcmp::create(::function, Ice::InstFcmp::Ugt, condition, x.value, y.value);
6056                 ::basicBlock->appendInst(cmp);
6057
6058                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
6059                 auto select = Ice::InstSelect::create(::function, result, condition, y.value, x.value);
6060                 ::basicBlock->appendInst(select);
6061
6062                 return RValue<Float4>(V(result));
6063         }
6064
6065         RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
6066         {
6067                 return Float4(1.0f) / x;
6068         }
6069
6070         RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
6071         {
6072                 return Rcp_pp(Sqrt(x));
6073         }
6074
6075         RValue<Float4> Sqrt(RValue<Float4> x)
6076         {
6077                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
6078                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Sqrt, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
6079                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
6080                 auto sqrt = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
6081                 sqrt->addArg(x.value);
6082                 ::basicBlock->appendInst(sqrt);
6083
6084                 return RValue<Float4>(V(result));
6085         }
6086
6087         RValue<Float4> Insert(RValue<Float4> x, RValue<Float> element, int i)
6088         {
6089                 return RValue<Float4>(Nucleus::createInsertElement(x.value, element.value, i));
6090         }
6091
6092         RValue<Float> Extract(RValue<Float4> x, int i)
6093         {
6094                 return RValue<Float>(Nucleus::createExtractElement(x.value, Float::getType(), i));
6095         }
6096
6097         RValue<Float4> Swizzle(RValue<Float4> x, unsigned char select)
6098         {
6099                 return RValue<Float4>(createSwizzle4(x.value, select));
6100         }
6101
6102         RValue<Float4> ShuffleLowHigh(RValue<Float4> x, RValue<Float4> y, unsigned char imm)
6103         {
6104                 int shuffle[4] =
6105                 {
6106                         ((imm >> 0) & 0x03) + 0,
6107                         ((imm >> 2) & 0x03) + 0,
6108                         ((imm >> 4) & 0x03) + 4,
6109                         ((imm >> 6) & 0x03) + 4,
6110                 };
6111
6112                 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
6113         }
6114
6115         RValue<Float4> UnpackLow(RValue<Float4> x, RValue<Float4> y)
6116         {
6117                 int shuffle[4] = {0, 4, 1, 5};
6118                 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
6119         }
6120
6121         RValue<Float4> UnpackHigh(RValue<Float4> x, RValue<Float4> y)
6122         {
6123                 int shuffle[4] = {2, 6, 3, 7};
6124                 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
6125         }
6126
6127         RValue<Float4> Mask(Float4 &lhs, RValue<Float4> rhs, unsigned char select)
6128         {
6129                 Value *vector = lhs.loadValue();
6130                 Value *result = createMask4(vector, rhs.value, select);
6131                 lhs.storeValue(result);
6132
6133                 return RValue<Float4>(result);
6134         }
6135
6136         RValue<Int> SignMask(RValue<Float4> x)
6137         {
6138                 Ice::Variable *result = ::function->makeVariable(Ice::IceType_i32);
6139                 const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::SignMask, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
6140                 auto target = ::context->getConstantUndef(Ice::IceType_i32);
6141                 auto movmsk = Ice::InstIntrinsicCall::create(::function, 1, result, target, intrinsic);
6142                 movmsk->addArg(x.value);
6143                 ::basicBlock->appendInst(movmsk);
6144
6145                 return RValue<Int>(V(result));
6146         }
6147
6148         RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
6149         {
6150                 return RValue<Int4>(Nucleus::createFCmpOEQ(x.value, y.value));
6151         }
6152
6153         RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
6154         {
6155                 return RValue<Int4>(Nucleus::createFCmpOLT(x.value, y.value));
6156         }
6157
6158         RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
6159         {
6160                 return RValue<Int4>(Nucleus::createFCmpOLE(x.value, y.value));
6161         }
6162
6163         RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
6164         {
6165                 return RValue<Int4>(Nucleus::createFCmpONE(x.value, y.value));
6166         }
6167
6168         RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
6169         {
6170                 return RValue<Int4>(Nucleus::createFCmpOGE(x.value, y.value));
6171         }
6172
6173         RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
6174         {
6175                 return RValue<Int4>(Nucleus::createFCmpOGT(x.value, y.value));
6176         }
6177
6178         RValue<Float4> Round(RValue<Float4> x)
6179         {
6180                 if(CPUID::SSE4_1)
6181                 {
6182                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
6183                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
6184                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
6185                         auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
6186                         round->addArg(x.value);
6187                         round->addArg(::context->getConstantInt32(0));
6188                         ::basicBlock->appendInst(round);
6189
6190                         return RValue<Float4>(V(result));
6191                 }
6192                 else
6193                 {
6194                         return Float4(RoundInt(x));
6195                 }
6196         }
6197
6198         RValue<Float4> Trunc(RValue<Float4> x)
6199         {
6200                 if(CPUID::SSE4_1)
6201                 {
6202                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
6203                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
6204                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
6205                         auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
6206                         round->addArg(x.value);
6207                         round->addArg(::context->getConstantInt32(3));
6208                         ::basicBlock->appendInst(round);
6209
6210                         return RValue<Float4>(V(result));
6211                 }
6212                 else
6213                 {
6214                         return Float4(Int4(x));
6215                 }
6216         }
6217
6218         RValue<Float4> Frac(RValue<Float4> x)
6219         {
6220                 if(CPUID::SSE4_1)
6221                 {
6222                         return x - Floor(x);
6223                 }
6224                 else
6225                 {
6226                         Float4 frc = x - Float4(Int4(x));   // Signed fractional part
6227
6228                         return frc + As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1, 1, 1, 1)));
6229                 }
6230         }
6231
6232         RValue<Float4> Floor(RValue<Float4> x)
6233         {
6234                 if(CPUID::SSE4_1)
6235                 {
6236                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
6237                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
6238                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
6239                         auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
6240                         round->addArg(x.value);
6241                         round->addArg(::context->getConstantInt32(1));
6242                         ::basicBlock->appendInst(round);
6243
6244                         return RValue<Float4>(V(result));
6245                 }
6246                 else
6247                 {
6248                         return x - Frac(x);
6249                 }
6250         }
6251
6252         RValue<Float4> Ceil(RValue<Float4> x)
6253         {
6254                 if(CPUID::SSE4_1)
6255                 {
6256                         Ice::Variable *result = ::function->makeVariable(Ice::IceType_v4f32);
6257                         const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::Round, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
6258                         auto target = ::context->getConstantUndef(Ice::IceType_i32);
6259                         auto round = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
6260                         round->addArg(x.value);
6261                         round->addArg(::context->getConstantInt32(2));
6262                         ::basicBlock->appendInst(round);
6263
6264                         return RValue<Float4>(V(result));
6265                 }
6266                 else
6267                 {
6268                         return -Floor(-x);
6269                 }
6270         }
6271
6272         Type *Float4::getType()
6273         {
6274                 return T(Ice::IceType_v4f32);
6275         }
6276
6277         RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, int offset)
6278         {
6279                 return lhs + RValue<Int>(Nucleus::createConstantInt(offset));
6280         }
6281
6282         RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<Int> offset)
6283         {
6284                 return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), offset.value));
6285         }
6286
6287         RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<UInt> offset)
6288         {
6289                 return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), offset.value));
6290         }
6291
6292         RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, int offset)
6293         {
6294                 return lhs = lhs + offset;
6295         }
6296
6297         RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, RValue<Int> offset)
6298         {
6299                 return lhs = lhs + offset;
6300         }
6301
6302         RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, RValue<UInt> offset)
6303         {
6304                 return lhs = lhs + offset;
6305         }
6306
6307         RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, int offset)
6308         {
6309                 return lhs + -offset;
6310         }
6311
6312         RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, RValue<Int> offset)
6313         {
6314                 return lhs + -offset;
6315         }
6316
6317         RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, RValue<UInt> offset)
6318         {
6319                 return lhs + -offset;
6320         }
6321
6322         RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, int offset)
6323         {
6324                 return lhs = lhs - offset;
6325         }
6326
6327         RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, RValue<Int> offset)
6328         {
6329                 return lhs = lhs - offset;
6330         }
6331
6332         RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, RValue<UInt> offset)
6333         {
6334                 return lhs = lhs - offset;
6335         }
6336
6337         void Return()
6338         {
6339                 Nucleus::createRetVoid();
6340                 Nucleus::setInsertBlock(Nucleus::createBasicBlock());
6341                 Nucleus::createUnreachable();
6342         }
6343
6344         void Return(RValue<Int> ret)
6345         {
6346                 Nucleus::createRet(ret.value);
6347                 Nucleus::setInsertBlock(Nucleus::createBasicBlock());
6348                 Nucleus::createUnreachable();
6349         }
6350
6351         bool branch(RValue<Bool> cmp, BasicBlock *bodyBB, BasicBlock *endBB)
6352         {
6353                 Nucleus::createCondBr(cmp.value, bodyBB, endBB);
6354                 Nucleus::setInsertBlock(bodyBB);
6355
6356                 return true;
6357         }
6358
6359         RValue<Long> Ticks()
6360         {
6361                 assert(false && "UNIMPLEMENTED"); return RValue<Long>(V(nullptr));
6362         }
6363 }